您的位置:首页 > 编程语言 > Python开发

python sax 解析XML文件

2016-09-05 18:05 525 查看
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse

class HeadlineHandler(ContentHandler):
"""docstring for ClassName"""
in_headline = False
def __init__(self, headlines):
ContentHandler.__init__(self)
self.headlines = headlines
self.data = []

def startElement(self, name, attrs):
if name == 'h1':
self.in_headline = True

def endElement(self,name):
if name == 'h1':
text = ''.join(self.data)
self.data = []
self.headlines.append(text)
self.in_headline = False

def characters(self,string):
if self.in_headline:
self.data.append(string)

headlines = []
parse('website.xml', HeadlineHandler(headlines))

print 'The following <h1> element were found:'
for h in headlines:
print h
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse

class Pagemaker(ContentHandler):
"""docstring for Pagemaker"""
passthrough = False
def startElement(self, name, attrs):
if name == 'page':
self.passthrough = True
self.out = open(attrs['name'] + '.html', 'w')
self.out.write('<html><head>\n')
self.out.write('<title>%s</title>\n' % attrs['title'])
self.out.write('</head><body>\n')
elif self.passthrough:
self.out.write('<' + name)
for key, val in attrs.items():
self.out.write(' %s="%s"' % (key, val))
self.out.write('>')

def endElement(self, name):
if name == 'page':
self.passthrough = False
self.out.write('\n</body></html>\n')
self.out.close()
elif self.passthrough:
self.out.write('</%s>' % name)

def characters(self, chars):
if self.passthrough: self.out.write(chars)

parse('website.xml', Pagemaker())
parse函数负责读取文件并且生成事件

由于要生成事件,需要调用一些事件处理程序,这些事件处理程序会作为内容处理器的对象方法来实现,这就需要继承xml.sax.handler中的Contenthandler类,因为它实现了所有需要的事件处理程序。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python sax xml