用python xml.sax解析XML实体

| 使用xml.sax使用python解析XML,但是我的代码无法捕获实体。为什么在以下情况中未跳过skipEntity()或resolveEntity()报告:
import os
import cStringIO
import xml.sax
from xml.sax.handler import ContentHandler,EntityResolver,DTDHandler

#Class to parse and run test XML files
class TestHandler(ContentHandler,EntityResolver,DTDHandler):

    #SAX handler - Entity resolver
    def resolveEntity(self,publicID,systemID):
        print \"TestHandler.resolveEntity: %s  %s\" % (publicID,systemID)

    def skippedEntity(self, name):
        print \"TestHandler.skippedEntity: %s\" % (name)

    def unparsedEntityDecl(self,publicID,systemID,ndata):
        print \"TestHandler.unparsedEntityDecl: %s  %s\" % (publicID,systemID)

    def startElement(self,name,attrs):
        # name = string.lower(name)
        summary = \'\' + attrs.get(\'summary\',\'\')
        arg = \'\' + attrs.get(\'arg\',\'\')
        print \'TestHandler.startElement(), %s : %s (%s)\' % (name,summary,arg)


def run(xml_string):
    try:
        parser = xml.sax.make_parser()
        stream = cStringIO.StringIO(xml_string)

        curHandler = TestHandler()
        parser.setContentHandler(curHandler)
        parser.setDTDHandler( curHandler )
        parser.setEntityResolver( curHandler )

        parser.parse(stream)
        stream.close()
    except (xml.sax.SAXParseException), e:
        print \"*** PARSER error: %s\" % e;

def main():
    try:
        XML = \"<!DOCTYPE page[ <!ENTITY num \'foo\'> ]><test summary=\'step: &num;\'>Entity: &not;</test>\"
        run(XML)
    except Exception, e:
      print \'FATAL ERROR: %s\' % (str(e))

if __name__== \'__main__\':
    main()
运行时,我看到的是:
 TestHandler.startElement(), step: foo ()
 *** PARSER error: <unknown>:1:36: undefined entity
为什么我看不到#的resolveEntity打印?或跳过的&not;条目打印?     
已邀请:
        我认为resolveEntity和skippedEntity仅用于外部DTD。我通过修改XML使它起作用。
XML = \"\"\"<?xml version=\"1.0\" encoding=\"utf-8\" ?>
<!DOCTYPE test SYSTEM \"external.dtd\" >
<test summary=\'step: &foo; &bar;\'>Entity: &not;</test>
\"\"\"
external.dtd包含两个简单的实体声明。
<!ENTITY foo \"bar\">
<!ENTITY bar \"foo\">
另外,我摆脱了resolveEntity。 输出-
TestHandler.startElement(), test : step: bar foo ()
TestHandler.skippedEntity: not
希望这可以帮助。     
        这是您程序的修改版本,希望如此。它演示了调用所有
TestHandler
方法的情况。
import StringIO
import xml.sax
from xml.sax.handler import ContentHandler

# Inheriting from EntityResolver and DTDHandler is not necessary
class TestHandler(ContentHandler):

    # This method is only called for external entities. Must return a value. 
    def resolveEntity(self, publicID, systemID):
        print \"TestHandler.resolveEntity(): %s %s\" % (publicID, systemID)
        return systemID

    def skippedEntity(self, name):
        print \"TestHandler.skippedEntity(): %s\" % (name)

    def unparsedEntityDecl(self, name, publicID, systemID, ndata):
        print \"TestHandler.unparsedEntityDecl(): %s %s\" % (publicID, systemID)

    def startElement(self, name, attrs):
        summary = attrs.get(\'summary\', \'\')
        print \'TestHandler.startElement():\', summary

def main(xml_string):
    try:
        parser = xml.sax.make_parser()
        curHandler = TestHandler()
        parser.setContentHandler(curHandler)
        parser.setEntityResolver(curHandler)
        parser.setDTDHandler(curHandler)

        stream = StringIO.StringIO(xml_string)
        parser.parse(stream)
        stream.close()
    except xml.sax.SAXParseException, e:
        print \"*** PARSER error: %s\" % e

XML = \"\"\"<!DOCTYPE test SYSTEM \"test.dtd\">
<test summary=\'step: &num;\'>Entity: &not;</test>
\"\"\"

main(XML)
test.dtd包含:
<!ENTITY num \"FOO\">
<!ENTITY pic SYSTEM \'bar.gif\' NDATA gif>
输出:
TestHandler.resolveEntity(): None test.dtd
TestHandler.unparsedEntityDecl(): None bar.gif
TestHandler.startElement(): step: FOO
TestHandler.skippedEntity(): not
加成 据我所知,仅当使用外部DTD时才调用ѭ9(至少我不能提出反例;如果文档更清晰一点,那将是很好的)。 亚当在回答中说,“ 10”仅用于外部DTD。但这不是真的。在处理对在内部或外部DTD子集中声明的外部实体的引用时,也会调用“ 10”。例如:
<!DOCTYPE test [
<!ENTITY num SYSTEM \"bar.txt\">
]>
bar.txt的内容可能是“ 13”。在这种情况下,不可能在属性值中引用实体。     

要回复问题请先登录注册