10 examples of 'how to extract data from xml file using python' in Python

Every line of 'how to extract data from xml file using python' code snippets is scanned for vulnerabilities by our powerful machine learning engine that combs millions of open source libraries, ensuring your Python code is secure.

All examples are scanned by Snyk Code

By copying the Snyk Code Snippets you agree to
this disclaimer
81def test_xml_extraction(self):
82 xml = '''
83 <?xml version="1.0" encoding="UTF-8"?>
84 <body xmlns="http://test.com/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
85 <ul>
86 <li>a</li>
87 <xsi:li>b</xsi:li>
88 </ul>
89 </body>
90 '''
91
92 namespaces = {'a': 'http://test.com/', 'b': 'http://www.w3.org/2001/XMLSchema-instance'}
93 # xpath
94 self.assertEqual(len(MockParser(xpath='//a:ul', namespaces=namespaces).parse(xml)), 1)
95 self.assertEqual(len(MockParser(xpath='a:ul', namespaces=namespaces).parse(xml)), 1)
96 self.assertEqual(len(MockParser(xpath='/a:ul', namespaces=namespaces).parse(xml)), 0)
97
98 self.assertEqual(len(MockParser(xpath='//a:li', namespaces=namespaces).parse(xml)), 1)
99 self.assertEqual(len(MockParser(xpath='a:li', namespaces=namespaces).parse(xml)), 0)
100 self.assertEqual(len(MockParser(xpath='/a:li', namespaces=namespaces).parse(xml)), 0)
101
102 self.assertEqual(len(MockParser(xpath='//b:li', namespaces=namespaces).parse(xml)), 1)
103 self.assertEqual(len(MockParser(xpath='b:li', namespaces=namespaces).parse(xml)), 0)
104 self.assertEqual(len(MockParser(xpath='/b:li', namespaces=namespaces).parse(xml)), 0)
Important

Use secure code every time

Secure your code as it's written. Use Snyk Code to scan source code in minutes – no build needed – and fix issues immediately. Enable Snyk Code

17def readXml(input_file):
18 with open(input_file,"rb") as f:
19 xmlr = XmlUnpacker()
20 return xmlr.read(f)
28def parseXML(xmlFile):
29 """
30 Parse the xml
31 """
32 f = open(xmlFile)
33 xml = f.read()
34 f.close()
35
36 tree = etl.parse(StringIO(xml))
37 context = etl.iterparse(StringIO(xml))
38 for action, elem in context:
39 if not elem.text:
40 text = "None"
41 else:
42 text = elem.text
43 print elem.tag + " => " + text
6def parse(filename='example.html'):
7 xml=loadpage(filename)
8 cells=getcells(xml)
9 row=makerow(cells)
10 print row
19def parse_xml(content, transport, base_url=None):
20 parser = etree.XMLParser(remove_comments=True, resolve_entities=False)
21 parser.resolvers.add(ImportResolver(transport))
22 try:
23 return fromstring(content, parser=parser, base_url=base_url)
24 except etree.XMLSyntaxError as exc:
25 raise XMLSyntaxError("Invalid XML content received (%s)" % exc.message)
284def parseEtree(inFileName):
285 doc = parsexml_(inFileName)
286 rootNode = doc.getroot()
287 rootTag, rootClass = get_root_tag(rootNode)
288 if rootClass is None:
289 rootTag = 'Device'
290 rootClass = DeviceObjectType
291 rootObj = rootClass.factory()
292 rootObj.build(rootNode)
293 # Enable Python to collect the space used by the DOM.
294 doc = None
295 rootElement = rootObj.to_etree(None, name_=rootTag)
296 content = etree_.tostring(rootElement, pretty_print=True,
297 xml_declaration=True, encoding="utf-8")
298 sys.stdout.write(content)
299 sys.stdout.write('\n')
300 return rootObj, rootElement
406def parseEtree(inFileName):
407 doc = parsexml_(inFileName)
408 rootNode = doc.getroot()
409 rootTag, rootClass = get_root_tag(rootNode)
410 if rootClass is None:
411 rootTag = 'Network_Route_Object'
412 rootClass = NetRouteObjectType
413 rootObj = rootClass.factory()
414 rootObj.build(rootNode)
415 # Enable Python to collect the space used by the DOM.
416 doc = None
417 rootElement = rootObj.to_etree(None, name_=rootTag)
418 content = etree_.tostring(rootElement, pretty_print=True,
419 xml_declaration=True, encoding="utf-8")
420 sys.stdout.write(content)
421 sys.stdout.write('\n')
422 return rootObj, rootElement
385def parseEtree(inFileName):
386 doc = parsexml_(inFileName)
387 rootNode = doc.getroot()
388 rootTag, rootClass = get_root_tag(rootNode)
389 if rootClass is None:
390 rootTag = 'Windows_Network_Route_Entry'
391 rootClass = WindowsNetworkRouteEntryObjectType
392 rootObj = rootClass.factory()
393 rootObj.build(rootNode)
394 # Enable Python to collect the space used by the DOM.
395 doc = None
396 rootElement = rootObj.to_etree(None, name_=rootTag)
397 content = etree_.tostring(rootElement, pretty_print=True,
398 xml_declaration=True, encoding="utf-8")
399 sys.stdout.write(content)
400 sys.stdout.write('\n')
401 return rootObj, rootElement
267def parseEtree(inFileName):
268 doc = parsexml_(inFileName)
269 rootNode = doc.getroot()
270 rootTag, rootClass = get_root_tag(rootNode)
271 if rootClass is None:
272 rootTag = 'Unix_Network_Route_Entry'
273 rootClass = UnixNetworkRouteEntryObjectType
274 rootObj = rootClass.factory()
275 rootObj.build(rootNode)
276 # Enable Python to collect the space used by the DOM.
277 doc = None
278 rootElement = rootObj.to_etree(None, name_=rootTag)
279 content = etree_.tostring(rootElement, pretty_print=True,
280 xml_declaration=True, encoding="utf-8")
281 sys.stdout.write(content)
282 sys.stdout.write('\n')
283 return rootObj, rootElement
295def parseEtree(inFileName):
296 doc = parsexml_(inFileName)
297 rootNode = doc.getroot()
298 rootTag, rootClass = get_root_tag(rootNode)
299 if rootClass is None:
300 rootTag = 'Port'
301 rootClass = PortObjectType
302 rootObj = rootClass.factory()
303 rootObj.build(rootNode)
304 # Enable Python to collect the space used by the DOM.
305 doc = None
306 rootElement = rootObj.to_etree(None, name_=rootTag)
307 content = etree_.tostring(rootElement, pretty_print=True,
308 xml_declaration=True, encoding="utf-8")
309 sys.stdout.write(content)
310 sys.stdout.write('\n')
311 return rootObj, rootElement

Related snippets