xml解析的一个例子,如下所示:
#!/usr/bin/env python#-*-coding:utf-8-*-import xml.sax.handlerclass TestHander(xml.sax.handler.ContentHandler): def __init__(self): self.mapping={} self.content="" #标签的开始 def startElement(self, name, attributes): self.buffer="" #if name=="link": # self.mapping[self.buffer.encode("utf-8")]="" #数据内容 def characters(self, data): self.buffer+=data+"\n" #标签的结束 ,在这里可以做数据的处理等 def endElement(self, name): ss=self.buffer if name=="title": self.content+=deleteCharacter(u"标题:"+ss); #print self.buffer.encode("utf-8") if name=="link": self.content+=deleteCharacter(u"链接地址:"+ss); #print self.buffer.encode("utf-8") if name=="description": self.content+=deleteCharacter(u"内容:"+ss); #print self.buffer.encode("utf-8") if name=="pubDate": self.content+=deleteCharacter(u"发布时间:"+ss); #print self.buffer.encode("utf-8") def deleteCharacter(string): ret="" #sub=(" ","","nbsp;","","<br />","\n","<br/>","\n","<p>","","</p>","") ret=string.encode("utf-8") ret=ret.replace(" ","") ret=ret.replace("nbsp;","") ret=ret.replace("<br />","\n") ret=ret.replace("<br/>","\n") ret=ret.replace("<p>","") ret=ret.replace("</p>","") return retif __name__=="__main__": import xml.sax.handler parser=xml.sax.make_parser() hander=TestHander() parser.setContentHandler(hander) parser.parse("http://blog.sina.com.cn/rss/soundfragment.xml") #parser.parse("c:\\rss.xml") #print hander.content #写入文件 file_path="c:\\wt.txt"; infile=file(file_path,"w") infile.write(hander.content) infile.close() #parser.parse(unicode(file('c:\\rss.xml', 'r', 'utf-8').read(),'utf-8').encode('utf-8'))
相关资源:Java解析XML的jar包+四种解析方法