1 '''
2 Created on 08/01/2010
3
4 XMLparser
5
6 @author: Luis
7 '''
8 from xml.etree import ElementTree as etree
9
10
11 from VData import VData
12 from SDS import SDS
13 from HDFhandler import HDFfile
14 from Utils import utils
15
16
17
18
19
20
21
22
24 '''
25 This module recursively parses an XML map file looking for supported xml tags. These tags contain metadata about an HDF
26 object in an HDF file. If a supported tag is found the class creates an instance of "HDFhandler", this class will
27 load the HDF file and return the object data in a normalized buffer.
28
29 This buffer and the xml tag are passed then to the handler class(in this iteration just VData).
30 The handler classes are in charge of reconstruct the HDF objects and return them as Python data structures.
31
32 '''
33 - def __init__(self,hdf_path,map_file,operation,hdf_object,dump_format):
34 '''
35 Constructor:
36
37 Initialize the internal variables.
38
39 XMLfile: Name of the XML map file from the command line argument.
40 self.tree: An ElementTree instance, loads XML documents in a tree structure.
41 self.group_stack: Stack of group names later used to name the output files.
42
43 '''
44 self.xml_file= map_file
45 self.depth=0
46 self.schema="{http://schemas.hdfgroup.org/hdf4/h4}"
47 self.dump_format=dump_format
48
49
50 self.tree = etree.parse(self.xml_file).getroot()
51 self.hdf_file_name=self.tree.attrib["name"]
52 self.hdf_path=hdf_path
53 self.hdf_object=hdf_object
54 self.hdf_operation=operation
55
56
57 self.hdf_handler=HDFfile(self.hdf_path + self.hdf_file_name)
58
59
60 self.group_stack=[]
61 self.external_files={}
62
63 self.vdata=VData()
64 self.SDS=SDS()
65 self.vdata_table=[]
66 self.SDS_table=[]
67
68 self.utils=utils()
69
70
72 '''
73 It parses the XML map file using the ElementTree API in a recursive function call "recursiveWalk".
74 '''
75
76 self.group_stack.append("Root--")
77 self.recursiveWalk(self.tree,1)
78
79
81 '''
82 This recursive function traverse the XML document using the ElementTree API; all the nodes are stored in a tree-like structure.
83 If a tag is recognized the method uses "self.operation" to either print a short version of the XML file
84 or extract the object into to a CSV file.
85
86 If a 'Group' tag is found, the attribute 'ID' is inserted in a stack; its children will have this value as prefix for the file name.
87 This is accumulative, if a given VData object is under the group ID_ABC and ID_DEF the CSV file will be named: G-ID_ABC-G-ID_DEF.csv
88
89 '''
90 self.depth=depth
91 for children in node.getchildren():
92
93 if children.tag==(self.schema+ "ExternalFile"):
94 self.external_files[str(children.attrib["id"])]=str(children.attrib["location"]) +"/" + str(children.attrib["filename"])
95 print self.external_files
96
97 if children.tag==(self.schema+ "Group"):
98
99 if self.hdf_operation=="l":
100 print "-" *self.depth + "Group: " +children.attrib["name"]
101 else:
102 if self.depth>=len(self.group_stack):
103 self.group_stack.append('_G-' + children.attrib["id"] + '_')
104 else:
105 self.group_stack.pop()
106
107
108
109 if children.tag==(self.schema+ "Table"):
110 if self.hdf_operation=="l":
111 print "-" *self.depth + "VData: " +children.attrib["name"]
112 else:
113 print "-" *self.depth + "VData: " +children.attrib["name"]
114 data_node=children.find(self.schema + "tableData")
115 inExternalFile_node=data_node.getchildren()
116 if inExternalFile_node[0].tag==(self.schema+ "dataInExternalFile"):
117
118
119
120 data_buffer=HDFfile(self.hdf_path + self.external_files[inExternalFile_node[0].attrib["ref"]]).linearizeDataSpace(inExternalFile_node[0],"VData")
121 else:
122
123
124
125 data_buffer=self.hdf_handler.linearizeDataSpace(data_node,"VData")
126
127 self.vdata_table=self.vdata.Extract(children,data_buffer,self.dump_format)
128 temp_file_name= self.xml_file + "_dump/" + "".join(self.group_stack) + node.attrib["name"]+ " " + children.attrib["id"]
129
130
131 if self.dump_format==False:
132 self.utils.createCSVfromTable(self.vdata_table,temp_file_name)
133 else:
134 self.utils.createPlainDatFile(self.vdata_table, temp_file_name)
135
136
137 elif children.tag==(self.schema+ "Array"):
138 if self.hdf_operation=="l":
139 print "-" *self.depth + "Array: " +children.attrib["name"]
140 else:
141 print "-" *self.depth + "Array: " +children.attrib["name"]
142 data_node=children.find(self.schema + "arrayData")
143 data_buffer=None
144 if not etree.iselement(data_node):
145 print "arrayData not found"
146 else:
147
148 inExternalFile_node=data_node.getchildren()
149 if inExternalFile_node[0].tag==(self.schema+ "dataInExternalFile"):
150
151
152
153 print "External data"
154 data_buffer=HDFfile(self.hdf_path + self.external_files[inExternalFile_node[0].attrib["ref"]]).linearizeDataSpace(children,"SDS")
155 else:
156
157
158
159 data_buffer=self.hdf_handler.linearizeDataSpace(children,"SDS")
160
161 temp_file_name= self.xml_file + "_dump/" + "".join(self.group_stack)+ node.attrib["name"]+ " " +children.attrib["id"]
162 if self.dump_format==False:
163
164 if data_buffer!=None:
165 self.SDS_table=self.SDS.Extract(children, data_buffer)
166 self.utils.createCSVfromTable(self.SDS_table,temp_file_name)
167 else:
168 if data_buffer!=None:
169 self.utils.createPlainDatFile(data_buffer.getvalue(), temp_file_name)
170
171
172 if len(children)>0:
173 self.recursiveWalk(children,self.depth+1)
174 self.depth=self.depth-1
175