root/trunk/lib/elementtree/SimpleXMLTreeBuilder.py

Revision 853, 4.7 kB (checked in by markus, 2 years ago)
Line 
1#
2# ElementTree
3# $Id: SimpleXMLTreeBuilder.py 1862 2004-06-18 07:31:02Z Fredrik $
4#
5# A simple XML tree builder, based on Python's xmllib
6#
7# Note that due to bugs in xmllib, this builder does not fully support
8# namespaces (unqualified attributes are put in the default namespace,
9# instead of being left as is).  Run this module as a script to find
10# out if this affects your Python version.
11#
12# history:
13# 2001-10-20 fl   created
14# 2002-05-01 fl   added namespace support for xmllib
15# 2002-08-17 fl   added xmllib sanity test
16#
17# Copyright (c) 1999-2004 by Fredrik Lundh.  All rights reserved.
18#
19# fredrik@pythonware.com
20# http://www.pythonware.com
21#
22# --------------------------------------------------------------------
23# The ElementTree toolkit is
24#
25# Copyright (c) 1999-2004 by Fredrik Lundh
26#
27# By obtaining, using, and/or copying this software and/or its
28# associated documentation, you agree that you have read, understood,
29# and will comply with the following terms and conditions:
30#
31# Permission to use, copy, modify, and distribute this software and
32# its associated documentation for any purpose and without fee is
33# hereby granted, provided that the above copyright notice appears in
34# all copies, and that both that copyright notice and this permission
35# notice appear in supporting documentation, and that the name of
36# Secret Labs AB or the author not be used in advertising or publicity
37# pertaining to distribution of the software without specific, written
38# prior permission.
39#
40# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
41# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
42# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
43# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
44# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
45# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
46# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
47# OF THIS SOFTWARE.
48# --------------------------------------------------------------------
49
50##
51# Tools to build element trees from XML files, using <b>xmllib</b>.
52# This module can be used instead of the standard tree builder, for
53# Python versions where "expat" is not available (such as 1.5.2).
54# <p>
55# Note that due to bugs in <b>xmllib</b>, the namespace support is
56# not reliable (you can run the module as a script to find out exactly
57# how unreliable it is on your Python version).
58##
59
60import xmllib, string
61
62import ElementTree
63
64##
65# ElementTree builder for XML source data.
66#
67# @see elementtree.ElementTree
68
69class TreeBuilder(xmllib.XMLParser):
70
71    def __init__(self, html=0):
72        self.__builder = ElementTree.TreeBuilder()
73        if html:
74            import htmlentitydefs
75            self.entitydefs.update(htmlentitydefs.entitydefs)
76        xmllib.XMLParser.__init__(self)
77
78    ##
79    # Feeds data to the parser.
80    #
81    # @param data Encoded data.
82
83    def feed(self, data):
84        xmllib.XMLParser.feed(self, data)
85
86    ##
87    # Finishes feeding data to the parser.
88    #
89    # @return An element structure.
90    # @defreturn Element
91
92    def close(self):
93        xmllib.XMLParser.close(self)
94        return self.__builder.close()
95
96    def handle_data(self, data):
97        self.__builder.data(data)
98
99    handle_cdata = handle_data
100
101    def unknown_starttag(self, tag, attrs):
102        attrib = {}
103        for key, value in attrs.items():
104            attrib[fixname(key)] = value
105        self.__builder.start(fixname(tag), attrib)
106
107    def unknown_endtag(self, tag):
108        self.__builder.end(fixname(tag))
109
110
111def fixname(name, split=string.split):
112    # xmllib in 2.0 and later provides limited (and slightly broken)
113    # support for XML namespaces.
114    if " " not in name:
115        return name
116    return "{%s}%s" % tuple(split(name, " ", 1))
117
118
119if __name__ == "__main__":
120    import sys
121    # sanity check: look for known namespace bugs in xmllib
122    p = TreeBuilder()
123    text = """\
124    <root xmlns='default'>
125       <tag attribute='value' />
126    </root>
127    """
128    p.feed(text)
129    tree = p.close()
130    status = []
131    # check for bugs in the xmllib implementation
132    tag = tree.find("{default}tag")
133    if tag is None:
134        status.append("namespaces not supported")
135    if tag is not None and tag.get("{default}attribute"):
136        status.append("default namespace applied to unqualified attribute")
137    # report bugs
138    if status:
139        print "xmllib doesn't work properly in this Python version:"
140        for bug in status:
141            print "-", bug
142    else:
143        print "congratulations; no problems found in xmllib"
Note: See TracBrowser for help on using the browser.