Module VData
[hide private]
[frames] | no frames]

Source Code for Module VData

  1  '''
 
  2  Created on 08/01/2010
 
  3  
 
  4  @author: Luis
 
  5  ''' 
  6  from struct import unpack_from 
  7  from Utils import utils 
  8  from cStringIO import StringIO 
  9  
 
10 -class VData():
11 ''' 12 This class is in charge of VData extraction from a linear buffer. 13 VData objects are returned as a Python table (list of rows). 14 ''' 15
16 - def __init__(self,):
17 ''' 18 Constructor 19 ''' 20 self.schema="{http://schemas.hdfgroup.org/hdf4/h4}" #Etree uses full schema name spaces 21 #self.linear_buffer=buffer 22 self.tools=utils()
23 24 25
26 - def dataSpaceToColumns(self,columns_offset,table_rows,linear_buffer):
27 ''' 28 This method is a little workaround to speed up the unpack function, if we unpack byte streams row by row we will have to process 29 different types of data. If we switch the data space as if the varying dimension was "column" we can unpack n elements of the 30 same data type. 31 ''' 32 col_offset=0 33 row=[] 34 newbuffer=StringIO() 35 total_offset=sum(columns_offset) 36 for column in columns_offset: 37 for row in range(table_rows): 38 linear_buffer.seek((row*total_offset)+col_offset,0) 39 newbuffer.write(linear_buffer.read(int(column))) 40 col_offset=col_offset+column 41 #print "col processed" 42 return newbuffer
43 44 45
46 - def getColumn(self, column,num_rows,column_offset,column_entries,linear_buffer):
47 ''' 48 This method uses the format of a column to extract it from the buffer and processes 49 a little formating; if the column has more than one entry theses entries are concatenated using the | character 50 if the data type is float or double the function "fixFloatingPoint" is used to normalize the internal representation. 51 52 ''' 53 54 col=[] 55 col_endianness=column[0] 56 col_type=column[-1] 57 current_entry=0 58 full_colum_format=str(col_endianness) + str (column_entries*num_rows) + col_type 59 #unpack uses bytewise operations, not compression involve. 60 column_data=unpack_from(full_colum_format,linear_buffer.getvalue(),column_offset) 61 #print " getcolumn : " + str(len(column_data)) + " format: " + full_colum_format 62 if column_entries>1: 63 for rows in range(num_rows): 64 field="" 65 for entry in range(column_entries): 66 if column[-1] in ("d","f"): 67 newFloat=self.tools.fixFloatingPoint(column_data[current_entry]) 68 field= field + "|" + newFloat 69 else: 70 field= field + "|" + str(column_data[current_entry]) 71 current_entry=current_entry+1 72 73 field=field + "|" 74 col.append(field) 75 #print column_data 76 return col 77 else: 78 if col_type in ("d","f"): 79 for rows in range(num_rows): 80 newFloat=self.tools.fixFloatingPoint(column_data[rows]) 81 field= newFloat 82 col.append(field) 83 return col 84 else: 85 for rows in range(num_rows): 86 col.append(str(column_data[rows])) 87 return col
88 89
90 - def Extract(self,node,linear_buffer,dump_format):
91 ''' 92 Extract a single VData object, the method returns the object as a Table. 93 The parameter 'node' has to be an ElemenTree node of a "Table" XML tag. 94 This tag contains the necessary metadata to extract and reconstruct the VData object. 95 ''' 96 table_rows= int(self.tools.getXMLattribute(node, "nRows")) 97 table_cols= int(self.tools.getXMLattribute(node, "nColumns")) 98 table_formatCol=[] 99 100 column_names=[] 101 columns_offset=[] 102 columns_endianness=[] 103 columns_entries=[] 104 105 row_offset=0 106 py_endianness="@" #use native endianness by default 107 108 # We traverse the XML node to find the tag "tableData" which has the 109 # information about where is the actual data in the HDF file; 110 data_node=node.find(self.schema + "tableData") 111 #If the attribute "fastestVaryingDimension" is not found the reader will assume it is "row" 112 try: 113 VaryingDimension = data_node.attrib["fastestVaryingDimension"] 114 except: 115 #print "using default row" 116 VaryingDimension ="row" 117 118 # Now we traverse the "Column" tags to find out the format of each column 119 # The function getPythonFormat is used, this function returns 3 values: 120 # a) endianness : byte order; the type char does not need endianness. 121 # b) python format: it represents the Python data type translated from 122 # the original mapped type. i.e. "int16" into "i" 123 # c) column offset: gets the number of bytes used for each column; if a column has more than one entry the 124 # total will be given by (number_of_entries) * (data_type_bytes) 125 126 for column in node.getiterator(self.schema + "Column"): 127 128 column_names.append(column.attrib["name"]) 129 column_entries=column.attrib["nEntries"] 130 columns_entries.append(int(column_entries)) 131 132 column_metadata= column.getchildren() #we are getting the "datum" tag 133 if self.tools.getXMLattribute(column_metadata[0],"dataType"): 134 mapped_type=column_metadata[0].attrib["dataType"] 135 else: 136 print "A column has no data type" 137 return 138 if self.tools.getXMLattribute(column_metadata[0],"byteOrder"): 139 byte_order=column_metadata[0].attrib["byteOrder"] 140 else: 141 byte_order="littleEndian" 142 143 py_format,data_offset,py_endianness=self.tools.getPythonFormat(mapped_type,byte_order) 144 145 columns_endianness.append(py_endianness) 146 columns_offset.append(int(column_entries)*data_offset) 147 148 row_offset=row_offset+(int(column_entries)*data_offset) 149 if py_format == "not supported": 150 print "Data type not supported at column: " + column.attrib["name"] 151 return 152 #Exit? or just ignore this column? 153 table_formatCol.append(py_endianness + column_entries + py_format) 154 155 if VaryingDimension=="row": 156 linear_buffer=self.dataSpaceToColumns(columns_offset,table_rows,linear_buffer) 157 158 colum_offset=0 159 table_switched=[] 160 161 if dump_format==False: 162 #This is ASCII 163 164 for column in range(table_cols): 165 #print format 166 col_data=self.getColumn(table_formatCol[column],table_rows,colum_offset,columns_entries[column],linear_buffer) 167 colum_offset=colum_offset + (columns_offset[column]*table_rows) 168 table_switched.append(col_data) 169 170 table_data=zip(*table_switched)# This function will transpose rows to columns 171 table_data.insert(0,column_names) 172 #for rows in range(table_rows): 173 # table_data[rows]=table_data[rows] 174 # print table_data[rows] 175 else: 176 table_data=linear_buffer 177 178 return table_data
179