1 '''
2 Created on 08/01/2010
3
4 @author: Luis
5 '''
6 from struct import unpack_from
7 from Utils import utils
8 from cStringIO import StringIO
9
11 '''
12 This class is in charge of VData extraction from a linear buffer.
13 VData objects are returned as a Python table (list of rows).
14 '''
15
17 '''
18 Constructor
19 '''
20 self.schema="{http://schemas.hdfgroup.org/hdf4/h4}"
21
22 self.tools=utils()
23
24
25
27 '''
28 This method is a little workaround to speed up the unpack function, if we unpack byte streams row by row we will have to process
29 different types of data. If we switch the data space as if the varying dimension was "column" we can unpack n elements of the
30 same data type.
31 '''
32 col_offset=0
33 row=[]
34 newbuffer=StringIO()
35 total_offset=sum(columns_offset)
36 for column in columns_offset:
37 for row in range(table_rows):
38 linear_buffer.seek((row*total_offset)+col_offset,0)
39 newbuffer.write(linear_buffer.read(int(column)))
40 col_offset=col_offset+column
41
42 return newbuffer
43
44
45
46 - def getColumn(self, column,num_rows,column_offset,column_entries,linear_buffer):
47 '''
48 This method uses the format of a column to extract it from the buffer and processes
49 a little formating; if the column has more than one entry theses entries are concatenated using the | character
50 if the data type is float or double the function "fixFloatingPoint" is used to normalize the internal representation.
51
52 '''
53
54 col=[]
55 col_endianness=column[0]
56 col_type=column[-1]
57 current_entry=0
58 full_colum_format=str(col_endianness) + str (column_entries*num_rows) + col_type
59
60 column_data=unpack_from(full_colum_format,linear_buffer.getvalue(),column_offset)
61
62 if column_entries>1:
63 for rows in range(num_rows):
64 field=""
65 for entry in range(column_entries):
66 if column[-1] in ("d","f"):
67 newFloat=self.tools.fixFloatingPoint(column_data[current_entry])
68 field= field + "|" + newFloat
69 else:
70 field= field + "|" + str(column_data[current_entry])
71 current_entry=current_entry+1
72
73 field=field + "|"
74 col.append(field)
75
76 return col
77 else:
78 if col_type in ("d","f"):
79 for rows in range(num_rows):
80 newFloat=self.tools.fixFloatingPoint(column_data[rows])
81 field= newFloat
82 col.append(field)
83 return col
84 else:
85 for rows in range(num_rows):
86 col.append(str(column_data[rows]))
87 return col
88
89
91 '''
92 Extract a single VData object, the method returns the object as a Table.
93 The parameter 'node' has to be an ElemenTree node of a "Table" XML tag.
94 This tag contains the necessary metadata to extract and reconstruct the VData object.
95 '''
96 table_rows= int(self.tools.getXMLattribute(node, "nRows"))
97 table_cols= int(self.tools.getXMLattribute(node, "nColumns"))
98 table_formatCol=[]
99
100 column_names=[]
101 columns_offset=[]
102 columns_endianness=[]
103 columns_entries=[]
104
105 row_offset=0
106 py_endianness="@"
107
108
109
110 data_node=node.find(self.schema + "tableData")
111
112 try:
113 VaryingDimension = data_node.attrib["fastestVaryingDimension"]
114 except:
115
116 VaryingDimension ="row"
117
118
119
120
121
122
123
124
125
126 for column in node.getiterator(self.schema + "Column"):
127
128 column_names.append(column.attrib["name"])
129 column_entries=column.attrib["nEntries"]
130 columns_entries.append(int(column_entries))
131
132 column_metadata= column.getchildren()
133 if self.tools.getXMLattribute(column_metadata[0],"dataType"):
134 mapped_type=column_metadata[0].attrib["dataType"]
135 else:
136 print "A column has no data type"
137 return
138 if self.tools.getXMLattribute(column_metadata[0],"byteOrder"):
139 byte_order=column_metadata[0].attrib["byteOrder"]
140 else:
141 byte_order="littleEndian"
142
143 py_format,data_offset,py_endianness=self.tools.getPythonFormat(mapped_type,byte_order)
144
145 columns_endianness.append(py_endianness)
146 columns_offset.append(int(column_entries)*data_offset)
147
148 row_offset=row_offset+(int(column_entries)*data_offset)
149 if py_format == "not supported":
150 print "Data type not supported at column: " + column.attrib["name"]
151 return
152
153 table_formatCol.append(py_endianness + column_entries + py_format)
154
155 if VaryingDimension=="row":
156 linear_buffer=self.dataSpaceToColumns(columns_offset,table_rows,linear_buffer)
157
158 colum_offset=0
159 table_switched=[]
160
161 if dump_format==False:
162
163
164 for column in range(table_cols):
165
166 col_data=self.getColumn(table_formatCol[column],table_rows,colum_offset,columns_entries[column],linear_buffer)
167 colum_offset=colum_offset + (columns_offset[column]*table_rows)
168 table_switched.append(col_data)
169
170 table_data=zip(*table_switched)
171 table_data.insert(0,column_names)
172
173
174
175 else:
176 table_data=linear_buffer
177
178 return table_data
179