Python 解析XML
2020-07-11 本文已影响0人
墨凌风起
import xml.sax
class MovieHandleSAX(xml.sax.ContentHandler):
SAX解析xml,SAX是一种基于事件驱动器的API,涉及到两部分,解析器和事件处理器
解析器负责读取XML文档,并向事件处理器发送事件,
事件处理器对事件响应
ContentHandler 类方法介绍
characters(content) 方法
调用时机:
从行开始,遇到标签之前,存在字符,content 的值为这些字符串。
从一个标签,遇到下一个标签之前, 存在字符,content 的值为这些字符串。
从一个标签,遇到行结束符之前,存在字符,content 的值为这些字符串。
标签可以是开始标签,也可以是结束标签。
startDocument() 方法 文档启动的时候调用。
endDocument() 方法 解析器到达文档结尾时调用。
startElement(name, attrs) 方法 遇到XML开始标签时调用,name 是标签的名字,attrs 是标签的属性值字典。
endElement(name) 方法 遇到XML结束标签时调用。
def __init__(self):
self.CurrentData = ""
self.type = ""
self.format = ""
self.year = ""
self.rating = ""
self.stars = ""
self.description = ""
#元素开始调用的方法
def startElement(self,tag,attrubutes):
self.CurrentData = tag
if tag == "movie":
print("**********Movie**********")
title = attrubutes["title"]
print("Title:",title)
pass
pass
def endElement(self,arg):
if self.CurrentData == "type":
print("Type:",self.type)
elif self.CurrentData == "format":
print("Format:",self.type)
elif self.CurrentData == "year":
print("Year:",self.type)
elif self.CurrentData == "rating":
print("Rating:",self.type)
elif self.CurrentData == "stars":
print("Stars:",self.type)
elif self.CurrentData == "description":
print("Description:",self.type)
pass
self.CurrentData = ""
pass
def characters(self,content):
if self.CurrentData == "type":
self.type = content
elif self.CurrentData == "format":
self.format = content
elif self.CurrentData == "year":
self.year = content
elif self.CurrentData == "rating":
self.rating = content
elif self.CurrentData == "stars":
self.stars = content
elif self.CurrentData == "description":
self.description = content
pass
pass
from xml.dom.minidom import parse
import xml.dom.minidom
def MovieHandleDOM():
domTree = xml.dom.minidom.parse("movies.xml")
collection = domTree.documentElement
if collection.hasAttribute("shelf"):
print("Root element:%s" % collection.getAttribute("shelf"))
pass
movies = collection.getElementsByTagName("movie")
for movie in movies:
print("**********Movie**********")
if movie.hasAttribute("title"):
print("Title: %s" % movie.hasAttribute("title"))
pass
type1 = movie.getElementsByTagName('type')[0]
print('Type: %s' % type1.childNodes[0].data)
format = movie.getElementsByTagName('format')[0]
print('Format: %s' % format.childNodes[0].data)
ratting = movie.getElementsByTagName('rating')[0]
print('Rating: %s' % ratting.childNodes[0].data)
description = movie.getElementsByTagName('description')[0]
print('Description: %s' % description.childNodes[0].data)
if __name__ == '__main__':
if 0:
#创建解析器对象
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces,0)
Handle = MovieHandleSAX()
parser.setContentHandler(Handle)
parser.parse("movies.xml")
else:
MovieHandleDOM()
file.xml
<collection shelf="New Arrivals">
<movie title="Enemy Behind">
<type>War, Thriller</type>
<format>DVD</format>
<year>2003</year>
<rating>PG</rating>
<stars>10</stars>
<description>Talk about a US-Japan war</description>
</movie>
<movie title="Transformers">
<type>Anime, Science Fiction</type>
<format>DVD</format>
<year>1989</year>
<rating>R</rating>
<stars>8</stars>
<description>A schientific fiction</description>
</movie>
<movie title="Trigun">
<type>Anime, Action</type>
<format>DVD</format>
<episodes>4</episodes>
<rating>PG</rating>
<stars>10</stars>
<description>Vash the Stampede!</description>
</movie>
<movie title="Ishtar">
<type>Comedy</type>
<format>VHS</format>
<rating>PG</rating>
<stars>2</stars>
<description>Viewable boredom</description>
</movie>
</collection>