1 '''
2 Created on 08/01/2010
3
4 @author: Luis
5 '''
6
7 from Utils import utils
8 from cStringIO import StringIO
9 from ctypes import create_string_buffer
10 from os import path
11
12
13
15 '''
16 This class loads an HDF file in a Python file object and uses an XML tag from the map file to extract
17 and reconstruct that object. The method linearizeDataSpace is used to put all the possible parts together.
18 '''
20 '''
21 Constructor
22 '''
23 self.utils=utils()
24 self.schema="{http://schemas.hdfgroup.org/hdf4/h4}"
25 self.external_handler=[]
26 try:
27 file_path=path.normpath(hdf_fileName)
28 self.file_handler=file(file_path,"rb")
29 except:
30 print "HDF file not found: " + hdf_fileName, file_path
31 exit(1)
32
33
35 '''
36 The function will put together all the chunks/blocks/cubes of a data structure into a linear buffer
37 the parameter "node" has to be an XML tag with the necessary information about how the object is stored in the HDF file.
38 '''
39
40 tmp_buffer_object= StringIO()
41
42
43 if type=="VData":
44
45 for chunk in node.getiterator(self.schema+"byteStream"):
46 self.file_handler.seek(int(chunk.attrib["offset"]),0)
47
48 tmp_buffer_object.write(self.file_handler.read(int(chunk.attrib["nBytes"])))
49 return tmp_buffer_object
50
51
52
53 elif type=="SDS":
54 dataDimensionSizes=node.find(self.schema + "dataDimensionSizes").text.split(" ")
55 DataNode=node.find(self.schema + "datum")
56 mapped_type=DataNode.attrib["dataType"]
57 try:
58 byte_order=DataNode.attrib["byteOrder"]
59 except:
60 byte_order="bigEndian"
61
62 py_format,item_size,py_endianness=self.utils.getPythonFormat(mapped_type,byte_order)
63
64 arrayData=node.find(self.schema + "arrayData")
65 spatialPart=arrayData.getchildren()
66 try:
67 compressionType=arrayData.attrib["compressionType"]
68 fastestVaryingDimensionIndex=arrayData.attrib["fastestVaryingDimensionIndex"]
69 except:
70 compressionType="None"
71 fastestVaryingDimensionIndex="1"
72
73
74 if spatialPart[0].tag==self.schema+"byteStream" or spatialPart[0].tag==self.schema+"byteStreamSet":
75
76
77
78 for stream in spatialPart:
79 if stream.tag==self.schema + "byteStreamSet":
80 unzipped_subChunks=""
81 for subChunks in stream.getchildren():
82 self.file_handler.seek(int(subChunks.attrib["offset"]),0)
83 unzipped_subChunks+=self.file_handler.read(int(subChunks.attrib["nBytes"]))
84 if compressionType!="None":
85 unzipped_bytes=self.utils.inflate64(unzipped_subChunks)
86 else:
87 unzipped_bytes=unzipped_subChunks
88
89 elif stream.tag==self.schema + "byteStream":
90 self.file_handler.seek(int(stream.attrib["offset"]),0)
91 if compressionType!="None":
92 unzipped_bytes=self.utils.inflate64(self.file_handler.read(int(stream.attrib["nBytes"])))
93 else:
94 unzipped_bytes=self.file_handler.read(int(stream.attrib["nBytes"]))
95 tmp_buffer_object.write(unzipped_bytes)
96 return tmp_buffer_object
97
98 elif spatialPart[0].tag==self.schema+"chunks":
99
100
101 chunkDimensionSizes=spatialPart[0].find(self.schema+"chunkDimensionSizes").text.split(" ")
102 try:
103 allocatedDimensionSizes=node.find(self.schema + "allocatedDimensionSizes").text.split(" ")
104 except:
105 allocatedDimensionSizes=dataDimensionSizes
106 chunkChkSize=1
107
108 for dim in chunkDimensionSizes:
109 chunkChkSize*=int(dim)
110
111 chunkChkSize*=item_size
112
113 bufferSize=1
114 for dim in allocatedDimensionSizes:
115 bufferSize*=int(dim)
116
117 nDim=len(allocatedDimensionSizes)
118 bufferSize*=item_size
119
120
121 tmp=create_string_buffer(bufferSize)
122 tmp_buffer_object.write(str(tmp))
123 tmp=None
124
125 for stream in spatialPart[0].getchildren():
126 unzipped_bytes=None
127 if stream.tag==self.schema + "chunkDimensionSizes":
128 continue
129 if stream.tag==self.schema + "byteStreamSet":
130 unzipped_subChunks=""
131 chunkPos= stream.attrib["chunkPositionInArray"].replace("[","")
132 chunkPos=chunkPos.replace("]","").split(",")
133 for subChunks in stream.getchildren():
134 self.file_handler.seek(int(subChunks.attrib["offset"]),0)
135 unzipped_subChunks+=self.file_handler.read(int(subChunks.attrib["nBytes"]))
136 if compressionType!="None":
137 unzipped_bytes=self.utils.inflate64(unzipped_subChunks)
138 if len(unzipped_bytes)!=chunkChkSize:
139 print "Error: uncompressed data size does not match the chunk size: ",len(unzipped_bytes), " ! ", chunkChkSize
140 return None
141 else:
142 unzipped_bytes=unzipped_subChunks
143
144 elif stream.tag==self.schema + "byteStream":
145
146 chunkPos= stream.attrib["chunkPositionInArray"].replace("[","")
147 chunkPos=chunkPos.replace("]","").split(",")
148 self.file_handler.seek(int(stream.attrib["offset"]),0)
149 if compressionType!="None":
150 unzipped_bytes=self.utils.inflate64(self.file_handler.read(int(stream.attrib["nBytes"])))
151 if len(unzipped_bytes)!=chunkChkSize:
152 print "Error: uncompressed data size does not match the chunk size: ",len(unzipped_bytes), " ! ", chunkChkSize
153 return None
154 else:
155 unzipped_bytes=self.file_handler.read(int(stream.attrib["nBytes"]))
156
157
158 if nDim==2:
159 if fastestVaryingDimensionIndex=="2":
160
161 base=int(chunkPos[1])
162 chunkOffset=0
163 for columns in range(base,base+int(chunkDimensionSizes[1])):
164 linearOffset=(columns*int(allocatedDimensionSizes[0]))+int(chunkPos[0])
165 tmp_buffer_object.seek(linearOffset*item_size)
166 col_elements=unzipped_bytes[chunkOffset:chunkOffset+int(chunkDimensionSizes[0])*item_size]
167 tmp_buffer_object.write(col_elements)
168
169
170 chunkOffset+=int(chunkDimensionSizes[0])*item_size
171 else:
172 base=int(chunkPos[0])
173 chunkOffset=0
174 for rows in range(base,base+int(chunkDimensionSizes[0])):
175 linearOffset=(rows*int(allocatedDimensionSizes[1]))+int(chunkPos[1])
176 tmp_buffer_object.seek(linearOffset*item_size)
177
178 row_elements=unzipped_bytes[chunkOffset:chunkOffset+int(chunkDimensionSizes[1])*item_size]
179 tmp_buffer_object.write(row_elements)
180
181
182
183 chunkOffset+=int(chunkDimensionSizes[1])*item_size
184
185 elif nDim>=3:
186 print "Chunked N dimensional SDS are not mapped yet"
187 return None
188
189
190 return tmp_buffer_object
191
192 elif spatialPart[0].tag==self.schema+"dataInExternalFile":
193 for stream in spatialPart[0].getchildren():
194 if stream.tag==self.schema + "byteStreamSet":
195 unzipped_subChunks=""
196 for subChunks in stream.getchildren():
197 self.file_handler.seek(int(subChunks.attrib["offset"]),0)
198 unzipped_subChunks+=self.file_handler.read(int(subChunks.attrib["nBytes"]))
199 if compressionType!="None":
200 unzipped_bytes=self.utils.inflate64(unzipped_subChunks)
201 else:
202 unzipped_bytes=unzipped_subChunks
203
204 elif stream.tag==self.schema + "byteStream":
205
206 self.file_handler.seek(int(stream.attrib["offset"]),0)
207 if compressionType!="None":
208 unzipped_bytes=self.utils.inflate64(self.file_handler.read(int(stream.attrib["nBytes"])))
209 else:
210 unzipped_bytes=self.file_handler.read(int(stream.attrib["nBytes"]))
211 if len(unzipped_bytes)>0:
212 tmp_buffer_object.write(unzipped_bytes)
213 return tmp_buffer_object
214
215 return tmp_buffer_object
216