您的位置:首页 > 编程语言 > Python开发

python模块:xml.etree.ElementTree

2018-01-29 22:25 573 查看
1 """Lightweight XML support for Python.
2
3  XML is an inherently hierarchical data format, and the most natural way to
4  represent it is with a tree.  This module has two classes for this purpose:
5
6     1. ElementTree represents the whole XML document as a tree and
7
8     2. Element represents a single node in this tree.
9
10  Interactions with the whole document (reading and writing to/from files) are
11  usually done on the ElementTree level.  Interactions with a single XML element
12  and its sub-elements are done on the Element level.
13
14  Element is a flexible container object designed to store hierarchical data
15  structures in memory. It can be described as a cross between a list and a
16  dictionary.  Each Element has a number of properties associated with it:
17
18     'tag' - a string containing the element's name.
19
20     'attributes' - a Python dictionary storing the element's attributes.
21
22     'text' - a string containing the element's text content.
23
24     'tail' - an optional string containing text after the element's end tag.
25
26     And a number of child elements stored in a Python sequence.
27
28  To create an element instance, use the Element constructor,
29  or the SubElement factory function.
30
31  You can also use the ElementTree class to wrap an element structure
32  and convert it to and from XML.
33
34 """
35
36 #---------------------------------------------------------------------
37 # Licensed to PSF under a Contributor Agreement.
38 # See http://www.python.org/psf/license for licensing details.
39 #
40 # ElementTree
41 # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
42 #
43 # fredrik@pythonware.com
44 # http://www.pythonware.com 45 # --------------------------------------------------------------------
46 # The ElementTree toolkit is
47 #
48 # Copyright (c) 1999-2008 by Fredrik Lundh
49 #
50 # By obtaining, using, and/or copying this software and/or its
51 # associated documentation, you agree that you have read, understood,
52 # and will comply with the following terms and conditions:
53 #
54 # Permission to use, copy, modify, and distribute this software and
55 # its associated documentation for any purpose and without fee is
56 # hereby granted, provided that the above copyright notice appears in
57 # all copies, and that both that copyright notice and this permission
58 # notice appear in supporting documentation, and that the name of
59 # Secret Labs AB or the author not be used in advertising or publicity
60 # pertaining to distribution of the software without specific, written
61 # prior permission.
62 #
63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70 # OF THIS SOFTWARE.
71 # --------------------------------------------------------------------
72
73 __all__ = [
74     # public symbols
75     "Comment",
76     "dump",
77     "Element", "ElementTree",
78     "fromstring", "fromstringlist",
79     "iselement", "iterparse",
80     "parse", "ParseError",
81     "PI", "ProcessingInstruction",
82     "QName",
83     "SubElement",
84     "tostring", "tostringlist",
85     "TreeBuilder",
86     "VERSION",
87     "XML", "XMLID",
88     "XMLParser", "XMLPullParser",
89     "register_namespace",
90     ]
91
92 VERSION = "1.3.0"
93
94 import sys
95 import re
96 import warnings
97 import io
98 import collections
99 import contextlib
100
101 from . import ElementPath
102
103
104 class ParseError(SyntaxError):
105     """An error when parsing an XML document.
106
107     In addition to its exception value, a ParseError contains
108     two extra attributes:
109         'code'     - the specific exception code
110         'position' - the line and column of the error
111
112     """
113     pass
114
115 # --------------------------------------------------------------------
116
117
118 def iselement(element):
119     """Return True if *element* appears to be an Element."""
120     return hasattr(element, 'tag')
121
122
123 class Element:
124     """An XML element.
125
126     This class is the reference implementation of the Element interface.
127
128     An element's length is its number of subelements.  That means if you
129     want to check if an element is truly empty, you should check BOTH
130     its length AND its text attribute.
131
132     The element tag, attribute names, and attribute values can be either
133     bytes or strings.
134
135     *tag* is the element name.  *attrib* is an optional dictionary containing
136     element attributes. *extra* are additional element attributes given as
137     keyword arguments.
138
139     Example form:
140         <tag attrib>text<child/>...</tag>tail
141
142     """
143
144     tag = None
145     """The element's name."""
146
147     attrib = None
148     """Dictionary of the element's attributes."""
149
150     text = None
151     """
152     Text before first subelement. This is either a string or the value None.
153     Note that if there is no text, this attribute may be either
154     None or the empty string, depending on the parser.
155
156     """
157
158     tail = None
159     """
160     Text after this element's end tag, but before the next sibling element's
161     start tag.  This is either a string or the value None.  Note that if there
162     was no text, this attribute may be either None or an empty string,
163     depending on the parser.
164
165     """
166
167     def __init__(self, tag, attrib={}, **extra):
168         if not isinstance(attrib, dict):
169             raise TypeError("attrib must be dict, not %s" % (
170                 attrib.__class__.__name__,))
171         attrib = attrib.copy()
172         attrib.update(extra)
173         self.tag = tag
174         self.attrib = attrib
175         self._children = []
176
177     def __repr__(self):
178         return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
179
180     def makeelement(self, tag, attrib):
181         """Create a new element with the same type.
182
183         *tag* is a string containing the element name.
184         *attrib* is a dictionary containing the element attributes.
185
186         Do not call this method, use the SubElement factory function instead.
187
188         """
189         return self.__class__(tag, attrib)
190
191     def copy(self):
192         """Return copy of current element.
193
194         This creates a shallow copy. Subelements will be shared with the
195         original tree.
196
197         """
198         elem = self.makeelement(self.tag, self.attrib)
199         elem.text = self.text
200         elem.tail = self.tail
201         elem[:] = self
202         return elem
203
204     def __len__(self):
205         return len(self._children)
206
207     def __bool__(self):
208         warnings.warn(
209             "The behavior of this method will change in future versions.  "
210             "Use specific 'len(elem)' or 'elem is not None' test instead.",
211             FutureWarning, stacklevel=2
212             )
213         return len(self._children) != 0 # emulate old behaviour, for now
214
215     def __getitem__(self, index):
216         return self._children[index]
217
218     def __setitem__(self, index, element):
219         # if isinstance(index, slice):
220         #     for elt in element:
221         #         assert iselement(elt)
222         # else:
223         #     assert iselement(element)
224         self._children[index] = element
225
226     def __delitem__(self, index):
227         del self._children[index]
228
229     def append(self, subelement):
230         """Add *subelement* to the end of this element.
231
232         The new element will appear in document order after the last existing
233         subelement (or directly after the text, if it's the first subelement),
234         but before the end tag for this element.
235
236         """
237         self._assert_is_element(subelement)
238         self._children.append(subelement)
239
240     def extend(self, elements):
241         """Append subelements from a sequence.
242
243         *elements* is a sequence with zero or more elements.
244
245         """
246         for element in elements:
247             self._assert_is_element(element)
248         self._children.extend(elements)
249
250     def insert(self, index, subelement):
251         """Insert *subelement* at position *index*."""
252         self._assert_is_element(subelement)
253         self._children.insert(index, subelement)
254
255     def _assert_is_element(self, e):
256         # Need to refer to the actual Python implementation, not the
257         # shadowing C implementation.
258         if not isinstance(e, _Element_Py):
259             raise TypeError('expected an Element, not %s' % type(e).__name__)
260
261     def remove(self, subelement):
262         """Remove matching subelement.
263
264         Unlike the find methods, this method compares elements based on
265         identity, NOT ON tag value or contents.  To remove subelements by
266         other means, the easiest way is to use a list comprehension to
267         select what elements to keep, and then use slice assignment to update
268         the parent element.
269
270         ValueError is raised if a matching element could not be found.
271
272         """
273         # assert iselement(element)
274         self._children.remove(subelement)
275
276     def getchildren(self):
277         """(Deprecated) Return all subelements.
278
279         Elements are returned in document order.
280
281         """
282         warnings.warn(
283             "This method will be removed in future versions.  "
284             "Use 'list(elem)' or iteration over elem instead.",
285             DeprecationWarning, stacklevel=2
286             )
287         return self._children
288
289     def find(self, path, namespaces=None):
290         """Find first matching element by tag name or path.
291
292         *path* is a string having either an element tag or an XPath,
293         *namespaces* is an optional mapping from namespace prefix to full name.
294
295         Return the first matching element, or None if no element was found.
296
297         """
298         return ElementPath.find(self, path, namespaces)
299
300     def findtext(self, path, default=None, namespaces=None):
301         """Find text for first matching element by tag name or path.
302
303         *path* is a string having either an element tag or an XPath,
304         *default* is the value to return if the element was not found,
305         *namespaces* is an optional mapping from namespace prefix to full name.
306
307         Return text content of first matching element, or default value if
308         none was found.  Note that if an element is found having no text
309         content, the empty string is returned.
310
311         """
312         return ElementPath.findtext(self, path, default, namespaces)
313
314     def findall(self, path, namespaces=None):
315         """Find all matching subelements by tag name or path.
316
317         *path* is a string having either an element tag or an XPath,
318         *namespaces* is an optional mapping from namespace prefix to full name.
319
320         Returns list containing all matching elements in document order.
321
322         """
323         return ElementPath.findall(self, path, namespaces)
324
325     def iterfind(self, path, namespaces=None):
326         """Find all matching subelements by tag name or path.
327
328         *path* is a string having either an element tag or an XPath,
329         *namespaces* is an optional mapping from namespace prefix to full name.
330
331         Return an iterable yielding all matching elements in document order.
332
333         """
334         return ElementPath.iterfind(self, path, namespaces)
335
336     def clear(self):
337         """Reset element.
338
339         This function removes all subelements, clears all attributes, and sets
340         the text and tail attributes to None.
341
342         """
343         self.attrib.clear()
344         self._children = []
345         self.text = self.tail = None
346
347     def get(self, key, default=None):
348         """Get element attribute.
349
350         Equivalent to attrib.get, but some implementations may handle this a
351         bit more efficiently.  *key* is what attribute to look for, and
352         *default* is what to return if the attribute was not found.
353
354         Returns a string containing the attribute value, or the default if
355         attribute was not found.
356
357         """
358         return self.attrib.get(key, default)
359
360     def set(self, key, value):
361         """Set element attribute.
362
363         Equivalent to attrib[key] = value, but some implementations may handle
364         this a bit more efficiently.  *key* is what attribute to set, and
365         *value* is the attribute value to set it to.
366
367         """
368         self.attrib[key] = value
369
370     def keys(self):
371         """Get list of attribute names.
372
373         Names are returned in an arbitrary order, just like an ordinary
374         Python dict.  Equivalent to attrib.keys()
375
376         """
377         return self.attrib.keys()
378
379     def items(self):
380         """Get element attributes as a sequence.
381
382         The attributes are returned in arbitrary order.  Equivalent to
383         attrib.items().
384
385         Return a list of (name, value) tuples.
386
387         """
388         return self.attrib.items()
389
390     def iter(self, tag=None):
391         """Create tree iterator.
392
393         The iterator loops over the element and all subelements in document
394         order, returning all elements with a matching tag.
395
396         If the tree structure is modified during iteration, new or removed
397         elements may or may not be included.  To get a stable set, use the
398         list() function on the iterator, and loop over the resulting list.
399
400         *tag* is what tags to look for (default is to return all elements)
401
402         Return an iterator containing all the matching elements.
403
404         """
405         if tag == "*":
406             tag = None
407         if tag is None or self.tag == tag:
408             yield self
409         for e in self._children:
410             yield from e.iter(tag)
411
412     # compatibility
413     def getiterator(self, tag=None):
414         # Change for a DeprecationWarning in 1.4
415         warnings.warn(
416             "This method will be removed in future versions.  "
417             "Use 'elem.iter()' or 'list(elem.iter())' instead.",
418             PendingDeprecationWarning, stacklevel=2
419         )
420         return list(self.iter(tag))
421
422     def itertext(self):
423         """Create text iterator.
424
425         The iterator loops over the element and all subelements in document
426         order, returning all inner text.
427
428         """
429         tag = self.tag
430         if not isinstance(tag, str) and tag is not None:
431             return
432         t = self.text
433         if t:
434             yield t
435         for e in self:
436             yield from e.itertext()
437             t = e.tail
438             if t:
439                 yield t
440
441
442 def SubElement(parent, tag, attrib={}, **extra):
443     """Subelement factory which creates an element instance, and appends it
444     to an existing parent.
445
446     The element tag, attribute names, and attribute values can be either
447     bytes or Unicode strings.
448
449     *parent* is the parent element, *tag* is the subelements name, *attrib* is
450     an optional directory containing element attributes, *extra* are
451     additional attributes given as keyword arguments.
452
453     """
454     attrib = attrib.copy()
455     attrib.update(extra)
456     element = parent.makeelement(tag, attrib)
457     parent.append(element)
458     return element
459
460
461 def Comment(text=None):
462     """Comment element factory.
463
464     This function creates a special element which the standard serializer
465     serializes as an XML comment.
466
467     *text* is a string containing the comment string.
468
469     """
470     element = Element(Comment)
471     element.text = text
472     return element
473
474
475 def ProcessingInstruction(target, text=None):
476     """Processing Instruction element factory.
477
478     This function creates a special element which the standard serializer
479     serializes as an XML comment.
480
481     *target* is a string containing the processing instruction, *text* is a
482     string containing the processing instruction contents, if any.
483
484     """
485     element = Element(ProcessingInstruction)
486     element.text = target
487     if text:
488         element.text = element.text + " " + text
489     return element
490
491 PI = ProcessingInstruction
492
493
494 class QName:
495     """Qualified name wrapper.
496
497     This class can be used to wrap a QName attribute value in order to get
498     proper namespace handing on output.
499
500     *text_or_uri* is a string containing the QName value either in the form
501     {uri}local, or if the tag argument is given, the URI part of a QName.
502
503     *tag* is an optional argument which if given, will make the first
504     argument (text_or_uri) be interpreted as a URI, and this argument (tag)
505     be interpreted as a local name.
506
507     """
508     def __init__(self, text_or_uri, tag=None):
509         if tag:
510             text_or_uri = "{%s}%s" % (text_or_uri, tag)
511         self.text = text_or_uri
512     def __str__(self):
513         return self.text
514     def __repr__(self):
515         return '<%s %r>' % (self.__class__.__name__, self.text)
516     def __hash__(self):
517         return hash(self.text)
518     def __le__(self, other):
519         if isinstance(other, QName):
520             return self.text <= other.text
521         return self.text <= other
522     def __lt__(self, other):
523         if isinstance(other, QName):
524             return self.text < other.text
525         return self.text < other
526     def __ge__(self, other):
527         if isinstance(other, QName):
528             return self.text >= other.text
529         return self.text >= other
530     def __gt__(self, other):
531         if isinstance(other, QName):
532             return self.text > other.text
533         return self.text > other
534     def __eq__(self, other):
535         if isinstance(other, QName):
536             return self.text == other.text
537         return self.text == other
538
539 # --------------------------------------------------------------------
540
541
542 class ElementTree:
543     """An XML element hierarchy.
544
545     This class also provides support for serialization to and from
546     standard XML.
547
548     *element* is an optional root element node,
549     *file* is an optional file handle or file name of an XML file whose
550     contents will be used to initialize the tree with.
551
552     """
553     def __init__(self, element=None, file=None):
554         # assert element is None or iselement(element)
555         self._root = element # first node
556         if file:
557             self.parse(file)
558
559     def getroot(self):
560         """Return root element of this tree."""
561         return self._root
562
563     def _setroot(self, element):
564         """Replace root element of this tree.
565
566         This will discard the current contents of the tree and replace it
567         with the given element.  Use with care!
568
569         """
570         # assert iselement(element)
571         self._root = element
572
573     def parse(self, source, parser=None):
574         """Load external XML document into element tree.
575
576         *source* is a file name or file object, *parser* is an optional parser
577         instance that defaults to XMLParser.
578
579         ParseError is raised if the parser fails to parse the document.
580
581         Returns the root element of the given source document.
582
583         """
584         close_source = False
585         if not hasattr(source, "read"):
586             source = open(source, "rb")
587             close_source = True
588         try:
589             if parser is None:
590                 # If no parser was specified, create a default XMLParser
591                 parser = XMLParser()
592                 if hasattr(parser, '_parse_whole'):
593                     # The default XMLParser, when it comes from an accelerator,
594                     # can define an internal _parse_whole API for efficiency.
595                     # It can be used to parse the whole source without feeding
596                     # it with chunks.
597                     self._root = parser._parse_whole(source)
598                     return self._root
599             while True:
600                 data = source.read(65536)
601                 if not data:
602                     break
603                 parser.feed(data)
604             self._root = parser.close()
605             return self._root
606         finally:
607             if close_source:
608                 source.close()
609
610     def iter(self, tag=None):
611         """Create and return tree iterator for the root element.
612
613         The iterator loops over all elements in this tree, in document order.
614
615         *tag* is a string with the tag name to iterate over
616         (default is to return all elements).
617
618         """
619         # assert self._root is not None
620         return self._root.iter(tag)
621
622     # compatibility
623     def getiterator(self, tag=None):
624         # Change for a DeprecationWarning in 1.4
625         warnings.warn(
626             "This method will be removed in future versions.  "
627             "Use 'tree.iter()' or 'list(tree.iter())' instead.",
628             PendingDeprecationWarning, stacklevel=2
629         )
630         return list(self.iter(tag))
631
632     def find(self, path, namespaces=None):
633         """Find first matching element by tag name or path.
634
635         Same as getroot().find(path), which is Element.find()
636
637         *path* is a string having either an element tag or an XPath,
638         *namespaces* is an optional mapping from namespace prefix to full name.
639
640         Return the first matching element, or None if no element was found.
641
642         """
643         # assert self._root is not None
644         if path[:1] == "/":
645             path = "." + path
646             warnings.warn(
647                 "This search is broken in 1.3 and earlier, and will be "
648                 "fixed in a future version.  If you rely on the current "
649                 "behaviour, change it to %r" % path,
650                 FutureWarning, stacklevel=2
651                 )
652         return self._root.find(path, namespaces)
653
654     def findtext(self, path, default=None, namespaces=None):
655         """Find first matching element by tag name or path.
656
657         Same as getroot().findtext(path),  which is Element.findtext()
658
659         *path* is a string having either an element tag or an XPath,
660         *namespaces* is an optional mapping from namespace prefix to full name.
661
662         Return the first matching element, or None if no element was found.
663
664         """
665         # assert self._root is not None
666         if path[:1] == "/":
667             path = "." + path
668             warnings.warn(
669                 "This search is broken in 1.3 and earlier, and will be "
670                 "fixed in a future version.  If you rely on the current "
671                 "behaviour, change it to %r" % path,
672                 FutureWarning, stacklevel=2
673                 )
674         return self._root.findtext(path, default, namespaces)
675
676     def findall(self, path, namespaces=None):
677         """Find all matching subelements by tag name or path.
678
679         Same as getroot().findall(path), which is Element.findall().
680
681         *path* is a string having either an element tag or an XPath,
682         *namespaces* is an optional mapping from namespace prefix to full name.
683
684         Return list containing all matching elements in document order.
685
686         """
687         # assert self._root is not None
688         if path[:1] == "/":
689             path = "." + path
690             warnings.warn(
691                 "This search is broken in 1.3 and earlier, and will be "
692                 "fixed in a future version.  If you rely on the current "
693                 "behaviour, change it to %r" % path,
694                 FutureWarning, stacklevel=2
695                 )
696         return self._root.findall(path, namespaces)
697
698     def iterfind(self, path, namespaces=None):
699         """Find all matching subelements by tag name or path.
700
701         Same as getroot().iterfind(path), which is element.iterfind()
702
703         *path* is a string having either an element tag or an XPath,
704         *namespaces* is an optional mapping from namespace prefix to full name.
705
706         Return an iterable yielding all matching elements in document order.
707
708         """
709         # assert self._root is not None
710         if path[:1] == "/":
711             path = "." + path
712             warnings.warn(
713                 "This search is broken in 1.3 and earlier, and will be "
714                 "fixed in a future version.  If you rely on the current "
715                 "behaviour, change it to %r" % path,
716                 FutureWarning, stacklevel=2
717                 )
718         return self._root.iterfind(path, namespaces)
719
720     def write(self, file_or_filename,
721               encoding=None,
722               xml_declaration=None,
723               default_namespace=None,
724               method=None, *,
725               short_empty_elements=True):
726         """Write element tree to a file as XML.
727
728         Arguments:
729           *file_or_filename* -- file name or a file object opened for writing
730
731           *encoding* -- the output encoding (default: US-ASCII)
732
733           *xml_declaration* -- bool indicating if an XML declaration should be
734                                added to the output. If None, an XML declaration
735                                is added if encoding IS NOT either of:
736                                US-ASCII, UTF-8, or Unicode
737
738           *default_namespace* -- sets the default XML namespace (for "xmlns")
739
740           *method* -- either "xml" (default), "html, "text", or "c14n"
741
742           *short_empty_elements* -- controls the formatting of elements
743                                     that contain no content. If True (default)
744                                     they are emitted as a single self-closed
745                                     tag, otherwise they are emitted as a pair
746                                     of start/end tags
747
748         """
749         if not method:
750             method = "xml"
751         elif method not in _serialize:
752             raise ValueError("unknown method %r" % method)
753         if not encoding:
754             if method == "c14n":
755                 encoding = "utf-8"
756             else:
757                 encoding = "us-ascii"
758         enc_lower = encoding.lower()
759         with _get_writer(file_or_filename, enc_lower) as write:
760             if method == "xml" and (xml_declaration or
761                     (xml_declaration is None and
762                      enc_lower not in ("utf-8", "us-ascii", "unicode"))):
763                 declared_encoding = encoding
764                 if enc_lower == "unicode":
765                     # Retrieve the default encoding for the xml declaration
766                     import locale
767                     declared_encoding = locale.getpreferredencoding()
768                 write("<?xml version='1.0' encoding='%s'?>\n" % (
769                     declared_encoding,))
770             if method == "text":
771                 _serialize_text(write, self._root)
772             else:
773                 qnames, namespaces = _namespaces(self._root, default_namespace)
774                 serialize = _serialize[method]
775                 serialize(write, self._root, qnames, namespaces,
776                           short_empty_elements=short_empty_elements)
777
778     def write_c14n(self, file):
779         # lxml.etree compatibility.  use output method instead
780         return self.write(file, method="c14n")
781
782 # --------------------------------------------------------------------
783 # serialization support
784
785 @contextlib.contextmanager
786 def _get_writer(file_or_filename, encoding):
787     # returns text write method and release all resources after using
788     try:
789         write = file_or_filename.write
790     except AttributeError:
791         # file_or_filename is a file name
792         if encoding == "unicode":
793             file = open(file_or_filename, "w")
794         else:
795             file = open(file_or_filename, "w", encoding=encoding,
796                         errors="xmlcharrefreplace")
797         with file:
798             yield file.write
799     else:
800         # file_or_filename is a file-like object
801         # encoding determines if it is a text or binary writer
802         if encoding == "unicode":
803             # use a text writer as is
804             yield write
805         else:
806             # wrap a binary writer with TextIOWrapper
807             with contextlib.ExitStack() as stack:
808                 if isinstance(file_or_filename, io.BufferedIOBase):
809                     file = file_or_filename
810                 elif isinstance(file_or_filename, io.RawIOBase):
811                     file = io.BufferedWriter(file_or_filename)
812                     # Keep the original file open when the BufferedWriter is
813                     # destroyed
814                     stack.callback(file.detach)
815                 else:
816                     # This is to handle passed objects that aren't in the
817                     # IOBase hierarchy, but just have a write method
818                     file = io.BufferedIOBase()
819                     file.writable = lambda: True
820                     file.write = write
821                     try:
822                         # TextIOWrapper uses this methods to determine
823                         # if BOM (for UTF-16, etc) should be added
824                         file.seekable = file_or_filename.seekable
825                         file.tell = file_or_filename.tell
826                     except AttributeError:
827                         pass
828                 file = io.TextIOWrapper(file,
829                                         encoding=encoding,
830                                         errors="xmlcharrefreplace",
831                                         newline="\n")
832                 # Keep the original file open when the TextIOWrapper is
833                 # destroyed
834                 stack.callback(file.detach)
835                 yield file.write
836
837 def _namespaces(elem, default_namespace=None):
838     # identify namespaces used in this tree
839
840     # maps qnames to *encoded* prefix:local names
841     qnames = {None: None}
842
843     # maps uri:s to prefixes
844     namespaces = {}
845     if default_namespace:
846         namespaces[default_namespace] = ""
847
848     def add_qname(qname):
849         # calculate serialized qname representation
850         try:
851             if qname[:1] == "{":
852                 uri, tag = qname[1:].rsplit("}", 1)
853                 prefix = namespaces.get(uri)
854                 if prefix is None:
855                     prefix = _namespace_map.get(uri)
856                     if prefix is None:
857                         prefix = "ns%d" % len(namespaces)
858                     if prefix != "xml":
859                         namespaces[uri] = prefix
860                 if prefix:
861                     qnames[qname] = "%s:%s" % (prefix, tag)
862                 else:
863                     qnames[qname] = tag # default element
864             else:
865                 if default_namespace:
866                     # FIXME: can this be handled in XML 1.0?
867                     raise ValueError(
868                         "cannot use non-qualified names with "
869                         "default_namespace option"
870                         )
871                 qnames[qname] = qname
872         except TypeError:
873             _raise_serialization_error(qname)
874
875     # populate qname and namespaces table
876     for elem in elem.iter():
877         tag = elem.tag
878         if isinstance(tag, QName):
879             if tag.text not in qnames:
880                 add_qname(tag.text)
881         elif isinstance(tag, str):
882             if tag not in qnames:
883                 add_qname(tag)
884         elif tag is not None and tag is not Comment and tag is not PI:
885             _raise_serialization_error(tag)
886         for key, value in elem.items():
887             if isinstance(key, QName):
888                 key = key.text
889             if key not in qnames:
890                 add_qname(key)
891             if isinstance(value, QName) and value.text not in qnames:
892                 add_qname(value.text)
893         text = elem.text
894         if isinstance(text, QName) and text.text not in qnames:
895             add_qname(text.text)
896     return qnames, namespaces
897
898 def _serialize_xml(write, elem, qnames, namespaces,
899                    short_empty_elements, **kwargs):
900     tag = elem.tag
901     text = elem.text
902     if tag is Comment:
903         write("<!--%s-->" % text)
904     elif tag is ProcessingInstruction:
905         write("<?%s?>" % text)
906     else:
907         tag = qnames[tag]
908         if tag is None:
909             if text:
910                 write(_escape_cdata(text))
911             for e in elem:
912                 _serialize_xml(write, e, qnames, None,
913                                short_empty_elements=short_empty_elements)
914         else:
915             write("<" + tag)
916             items = list(elem.items())
917             if items or namespaces:
918                 if namespaces:
919                     for v, k in sorted(namespaces.items(),
920                                        key=lambda x: x[1]):  # sort on prefix
921                         if k:
922                             k = ":" + k
923                         write(" xmlns%s=\"%s\"" % (
924                             k,
925                             _escape_attrib(v)
926                             ))
927                 for k, v in sorted(items):  # lexical order
928                     if isinstance(k, QName):
929                         k = k.text
930                     if isinstance(v, QName):
931                         v = qnames[v.text]
932                     else:
933                         v = _escape_attrib(v)
934                     write(" %s=\"%s\"" % (qnames[k], v))
935             if text or len(elem) or not short_empty_elements:
936                 write(">")
937                 if text:
938                     write(_escape_cdata(text))
939                 for e in elem:
940                     _serialize_xml(write, e, qnames, None,
941                                    short_empty_elements=short_empty_elements)
942                 write("</" + tag + ">")
943             else:
944                 write(" />")
945     if elem.tail:
946         write(_escape_cdata(elem.tail))
947
948 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
949               "img", "input", "isindex", "link", "meta", "param")
950
951 try:
952     HTML_EMPTY = set(HTML_EMPTY)
953 except NameError:
954     pass
955
956 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
957     tag = elem.tag
958     text = elem.text
959     if tag is Comment:
960         write("<!--%s-->" % _escape_cdata(text))
961     elif tag is ProcessingInstruction:
962         write("<?%s?>" % _escape_cdata(text))
963     else:
964         tag = qnames[tag]
965         if tag is None:
966             if text:
967                 write(_escape_cdata(text))
968             for e in elem:
969                 _serialize_html(write, e, qnames, None)
970         else:
971             write("<" + tag)
972             items = list(elem.items())
973             if items or namespaces:
974                 if namespaces:
975                     for v, k in sorted(namespaces.items(),
976                                        key=lambda x: x[1]):  # sort on prefix
977                         if k:
978                             k = ":" + k
979                         write(" xmlns%s=\"%s\"" % (
980                             k,
981                             _escape_attrib(v)
982                             ))
983                 for k, v in sorted(items):  # lexical order
984                     if isinstance(k, QName):
985                         k = k.text
986                     if isinstance(v, QName):
987                         v = qnames[v.text]
988                     else:
989                         v = _escape_attrib_html(v)
990                     # FIXME: handle boolean attributes
991                     write(" %s=\"%s\"" % (qnames[k], v))
992             write(">")
993             ltag = tag.lower()
994             if text:
995                 if ltag == "script" or ltag == "style":
996                     write(text)
997                 else:
998                     write(_escape_cdata(text))
999             for e in elem:
1000                 _serialize_html(write, e, qnames, None)
1001             if ltag not in HTML_EMPTY:
1002                 write("</" + tag + ">")
1003     if elem.tail:
1004         write(_escape_cdata(elem.tail))
1005
1006 def _serialize_text(write, elem):
1007     for part in elem.itertext():
1008         write(part)
1009     if elem.tail:
1010         write(elem.tail)
1011
1012 _serialize = {
1013     "xml": _serialize_xml,
1014     "html": _serialize_html,
1015     "text": _serialize_text,
1016 # this optional method is imported at the end of the module
1017 #   "c14n": _serialize_c14n,
1018 }
1019
1020
1021 def register_namespace(prefix, uri):
1022     """Register a namespace prefix.
1023
1024     The registry is global, and any existing mapping for either the
1025     given prefix or the namespace URI will be removed.
1026
1027     *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1028     attributes in this namespace will be serialized with prefix if possible.
1029
1030     ValueError is raised if prefix is reserved or is invalid.
1031
1032     """
1033     if re.match(r"ns\d+$", prefix):
1034         raise ValueError("Prefix format reserved for internal use")
1035     for k, v in list(_namespace_map.items()):
1036         if k == uri or v == prefix:
1037             del _namespace_map[k]
1038     _namespace_map[uri] = prefix
1039
1040 _namespace_map = {
1041     # "well-known" namespace prefixes
1042     "http://www.w3.org/XML/1998/namespace": "xml",
1043     "http://www.w3.org/1999/xhtml": "html",
1044     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1045     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1046     # xml schema
1047     "http://www.w3.org/2001/XMLSchema": "xs",
1048     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1049     # dublin core
1050     "http://purl.org/dc/elements/1.1/": "dc",
1051 }
1052 # For tests and troubleshooting
1053 register_namespace._namespace_map = _namespace_map
1054
1055 def _raise_serialization_error(text):
1056     raise TypeError(
1057         "cannot serialize %r (type %s)" % (text, type(text).__name__)
1058         )
1059
1060 def _escape_cdata(text):
1061     # escape character data
1062     try:
1063         # it's worth avoiding do-nothing calls for strings that are
1064         # shorter than 500 character, or so.  assume that's, by far,
1065         # the most common case in most applications.
1066         if "&" in text:
1067             text = text.replace("&", "&")
1068         if "<" in text:
1069             text = text.replace("<", "<")
1070         if ">" in text:
1071             text = text.replace(">", ">")
1072         return text
1073     except (TypeError, AttributeError):
1074         _raise_serialization_error(text)
1075
1076 def _escape_attrib(text):
1077     # escape attribute value
1078     try:
1079         if "&" in text:
1080             text = text.replace("&", "&")
1081         if "<" in text:
1082             text = text.replace("<", "<")
1083         if ">" in text:
1084             text = text.replace(">", ">")
1085         if "\"" in text:
1086             text = text.replace("\"", """)
1087         # The following business with carriage returns is to satisfy
1088         # Section 2.11 of the XML specification, stating that
1089         # CR or CR LN should be replaced with just LN
1090         # http://www.w3.org/TR/REC-xml/#sec-line-ends 1091         if "\r\n" in text:
1092             text = text.replace("\r\n", "\n")
1093         if "\r" in text:
1094             text = text.replace("\r", "\n")
1095         #The following four lines are issue 17582
1096         if "\n" in text:
1097             text = text.replace("\n", "
")
1098         if "\t" in text:
1099             text = text.replace("\t", "	")
1100         return text
1101     except (TypeError, AttributeError):
1102         _raise_serialization_error(text)
1103
1104 def _escape_attrib_html(text):
1105     # escape attribute value
1106     try:
1107         if "&" in text:
1108             text = text.replace("&", "&")
1109         if ">" in text:
1110             text = text.replace(">", ">")
1111         if "\"" in text:
1112             text = text.replace("\"", """)
1113         return text
1114     except (TypeError, AttributeError):
1115         _raise_serialization_error(text)
1116
1117 # --------------------------------------------------------------------
1118
1119 def tostring(element, encoding=None, method=None, *,
1120              short_empty_elements=True):
1121     """Generate string representation of XML element.
1122
1123     All subelements are included.  If encoding is "unicode", a string
1124     is returned. Otherwise a bytestring is returned.
1125
1126     *element* is an Element instance, *encoding* is an optional output
1127     encoding defaulting to US-ASCII, *method* is an optional output which can
1128     be one of "xml" (default), "html", "text" or "c14n".
1129
1130     Returns an (optionally) encoded string containing the XML data.
1131
1132     """
1133     stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1134     ElementTree(element).write(stream, encoding, method=method,
1135                                short_empty_elements=short_empty_elements)
1136     return stream.getvalue()
1137
1138 class _ListDataStream(io.BufferedIOBase):
1139     """An auxiliary stream accumulating into a list reference."""
1140     def __init__(self, lst):
1141         self.lst = lst
1142
1143     def writable(self):
1144         return True
1145
1146     def seekable(self):
1147         return True
1148
1149     def write(self, b):
1150         self.lst.append(b)
1151
1152     def tell(self):
1153         return len(self.lst)
1154
1155 def tostringlist(element, encoding=None, method=None, *,
1156                  short_empty_elements=True):
1157     lst = []
1158     stream = _ListDataStream(lst)
1159     ElementTree(element).write(stream, encoding, method=method,
1160                                short_empty_elements=short_empty_elements)
1161     return lst
1162
1163
1164 def dump(elem):
1165     """Write element tree or element structure to sys.stdout.
1166
1167     This function should be used for debugging only.
1168
1169     *elem* is either an ElementTree, or a single Element.  The exact output
1170     format is implementation dependent.  In this version, it's written as an
1171     ordinary XML file.
1172
1173     """
1174     # debugging
1175     if not isinstance(elem, ElementTree):
1176         elem = ElementTree(elem)
1177     elem.write(sys.stdout, encoding="unicode")
1178     tail = elem.getroot().tail
1179     if not tail or tail[-1] != "\n":
1180         sys.stdout.write("\n")
1181
1182 # --------------------------------------------------------------------
1183 # parsing
1184
1185
1186 def parse(source, parser=None):
1187     """Parse XML document into element tree.
1188
1189     *source* is a filename or file object containing XML data,
1190     *parser* is an optional parser instance defaulting to XMLParser.
1191
1192     Return an ElementTree instance.
1193
1194     """
1195     tree = ElementTree()
1196     tree.parse(source, parser)
1197     return tree
1198
1199
1200 def iterparse(source, events=None, parser=None):
1201     """Incrementally parse XML document into ElementTree.
1202
1203     This class also reports what's going on to the user based on the
1204     *events* it is initialized with.  The supported events are the strings
1205     "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1206     detailed namespace information).  If *events* is omitted, only
1207     "end" events are reported.
1208
1209     *source* is a filename or file object containing XML data, *events* is
1210     a list of events to report back, *parser* is an optional parser instance.
1211
1212     Returns an iterator providing (event, elem) pairs.
1213
1214     """
1215     # Use the internal, undocumented _parser argument for now; When the
1216     # parser argument of iterparse is removed, this can be killed.
1217     pullparser = XMLPullParser(events=events, _parser=parser)
1218     def iterator():
1219         try:
1220             while True:
1221                 yield from pullparser.read_events()
1222                 # load event buffer
1223                 data = source.read(16 * 1024)
1224                 if not data:
1225                     break
1226                 pullparser.feed(data)
1227             root = pullparser._close_and_return_root()
1228             yield from pullparser.read_events()
1229             it.root = root
1230         finally:
1231             if close_source:
1232                 source.close()
1233
1234     class IterParseIterator(collections.Iterator):
1235         __next__ = iterator().__next__
1236     it = IterParseIterator()
1237     it.root = None
1238     del iterator, IterParseIterator
1239
1240     close_source = False
1241     if not hasattr(source, "read"):
1242         source = open(source, "rb")
1243         close_source = True
1244
1245     return it
1246
1247
1248 class XMLPullParser:
1249
1250     def __init__(self, events=None, *, _parser=None):
1251         # The _parser argument is for internal use only and must not be relied
1252         # upon in user code. It will be removed in a future release.
1253         # See http://bugs.python.org/issue17741 for more details.
1254
1255         self._events_queue = collections.deque()
1256         self._parser = _parser or XMLParser(target=TreeBuilder())
1257         # wire up the parser for event reporting
1258         if events is None:
1259             events = ("end",)
1260         self._parser._setevents(self._events_queue, events)
1261
1262     def feed(self, data):
1263         """Feed encoded data to parser."""
1264         if self._parser is None:
1265             raise ValueError("feed() called after end of stream")
1266         if data:
1267             try:
1268                 self._parser.feed(data)
1269             except SyntaxError as exc:
1270                 self._events_queue.append(exc)
1271
1272     def _close_and_return_root(self):
1273         # iterparse needs this to set its root attribute properly :(
1274         root = self._parser.close()
1275         self._parser = None
1276         return root
1277
1278     def close(self):
1279         """Finish feeding data to parser.
1280
1281         Unlike XMLParser, does not return the root element. Use
1282         read_events() to consume elements from XMLPullParser.
1283         """
1284         self._close_and_return_root()
1285
1286     def read_events(self):
1287         """Return an iterator over currently available (event, elem) pairs.
1288
1289         Events are consumed from the internal event queue as they are
1290         retrieved from the iterator.
1291         """
1292         events = self._events_queue
1293         while events:
1294             event = events.popleft()
1295             if isinstance(event, Exception):
1296                 raise event
1297             else:
1298                 yield event
1299
1300
1301 def XML(text, parser=None):
1302     """Parse XML document from string constant.
1303
1304     This function can be used to embed "XML Literals" in Python code.
1305
1306     *text* is a string containing XML data, *parser* is an
1307     optional parser instance, defaulting to the standard XMLParser.
1308
1309     Returns an Element instance.
1310
1311     """
1312     if not parser:
1313         parser = XMLParser(target=TreeBuilder())
1314     parser.feed(text)
1315     return parser.close()
1316
1317
1318 def XMLID(text, parser=None):
1319     """Parse XML document from string constant for its IDs.
1320
1321     *text* is a string containing XML data, *parser* is an
1322     optional parser instance, defaulting to the standard XMLParser.
1323
1324     Returns an (Element, dict) tuple, in which the
1325     dict maps element id:s to elements.
1326
1327     """
1328     if not parser:
1329         parser = XMLParser(target=TreeBuilder())
1330     parser.feed(text)
1331     tree = parser.close()
1332     ids = {}
1333     for elem in tree.iter():
1334         id = elem.get("id")
1335         if id:
1336             ids[id] = elem
1337     return tree, ids
1338
1339 # Parse XML document from string constant.  Alias for XML().
1340 fromstring = XML
1341
1342 def fromstringlist(sequence, parser=None):
1343     """Parse XML document from sequence of string fragments.
1344
1345     *sequence* is a list of other sequence, *parser* is an optional parser
1346     instance, defaulting to the standard XMLParser.
1347
1348     Returns an Element instance.
1349
1350     """
1351     if not parser:
1352         parser = XMLParser(target=TreeBuilder())
1353     for text in sequence:
1354         parser.feed(text)
1355     return parser.close()
1356
1357 # --------------------------------------------------------------------
1358
1359
1360 class TreeBuilder:
1361     """Generic element structure builder.
1362
1363     This builder converts a sequence of start, data, and end method
1364     calls to a well-formed element structure.
1365
1366     You can use this class to build an element structure using a custom XML
1367     parser, or a parser for some other XML-like format.
1368
1369     *element_factory* is an optional element factory which is called
1370     to create new Element instances, as necessary.
1371
1372     """
1373     def __init__(self, element_factory=None):
1374         self._data = [] # data collector
1375         self._elem = [] # element stack
1376         self._last = None # last element
1377         self._tail = None # true if we're after an end tag
1378         if element_factory is None:
1379             element_factory = Element
1380         self._factory = element_factory
1381
1382     def close(self):
1383         """Flush builder buffers and return toplevel document Element."""
1384         assert len(self._elem) == 0, "missing end tags"
1385         assert self._last is not None, "missing toplevel element"
1386         return self._last
1387
1388     def _flush(self):
1389         if self._data:
1390             if self._last is not None:
1391                 text = "".join(self._data)
1392                 if self._tail:
1393                     assert self._last.tail is None, "internal error (tail)"
1394                     self._last.tail = text
1395                 else:
1396                     assert self._last.text is None, "internal error (text)"
1397                     self._last.text = text
1398             self._data = []
1399
1400     def data(self, data):
1401         """Add text to current element."""
1402         self._data.append(data)
1403
1404     def start(self, tag, attrs):
1405         """Open new element and return it.
1406
1407         *tag* is the element name, *attrs* is a dict containing element
1408         attributes.
1409
1410         """
1411         self._flush()
1412         self._last = elem = self._factory(tag, attrs)
1413         if self._elem:
1414             self._elem[-1].append(elem)
1415         self._elem.append(elem)
1416         self._tail = 0
1417         return elem
1418
1419     def end(self, tag):
1420         """Close and return current Element.
1421
1422         *tag* is the element name.
1423
1424         """
1425         self._flush()
1426         self._last = self._elem.pop()
1427         assert self._last.tag == tag,\
1428                "end tag mismatch (expected %s, got %s)" % (
1429                    self._last.tag, tag)
1430         self._tail = 1
1431         return self._last
1432
1433
1434 # also see ElementTree and TreeBuilder
1435 class XMLParser:
1436     """Element structure builder for XML source data based on the expat parser.
1437
1438     *html* are predefined HTML entities (deprecated and not supported),
1439     *target* is an optional target object which defaults to an instance of the
1440     standard TreeBuilder class, *encoding* is an optional encoding string
1441     which if given, overrides the encoding specified in the XML file:
1442     http://www.iana.org/assignments/character-sets 1443
1444     """
1445
1446     def __init__(self, html=0, target=None, encoding=None):
1447         try:
1448             from xml.parsers import expat
1449         except ImportError:
1450             try:
1451                 import pyexpat as expat
1452             except ImportError:
1453                 raise ImportError(
1454                     "No module named expat; use SimpleXMLTreeBuilder instead"
1455                     )
1456         parser = expat.ParserCreate(encoding, "}")
1457         if target is None:
1458             target = TreeBuilder()
1459         # underscored names are provided for compatibility only
1460         self.parser = self._parser = parser
1461         self.target = self._target = target
1462         self._error = expat.error
1463         self._names = {} # name memo cache
1464         # main callbacks
1465         parser.DefaultHandlerExpand = self._default
1466         if hasattr(target, 'start'):
1467             parser.StartElementHandler = self._start
1468         if hasattr(target, 'end'):
1469             parser.EndElementHandler = self._end
1470         if hasattr(target, 'data'):
1471             parser.CharacterDataHandler = target.data
1472         # miscellaneous callbacks
1473         if hasattr(target, 'comment'):
1474             parser.CommentHandler = target.comment
1475         if hasattr(target, 'pi'):
1476             parser.ProcessingInstructionHandler = target.pi
1477         # Configure pyexpat: buffering, new-style attribute handling.
1478         parser.buffer_text = 1
1479         parser.ordered_attributes = 1
1480         parser.specified_attributes = 1
1481         self._doctype = None
1482         self.entity = {}
1483         try:
1484             self.version = "Expat %d.%d.%d" % expat.version_info
1485         except AttributeError:
1486             pass # unknown
1487
1488     def _setevents(self, events_queue, events_to_report):
1489         # Internal API for XMLPullParser
1490         # events_to_report: a list of events to report during parsing (same as
1491         # the *events* of XMLPullParser's constructor.
1492         # events_queue: a list of actual parsing events that will be populated
1493         # by the underlying parser.
1494         #
1495         parser = self._parser
1496         append = events_queue.append
1497         for event_name in events_to_report:
1498             if event_name == "start":
1499                 parser.ordered_attributes = 1
1500                 parser.specified_attributes = 1
1501                 def handler(tag, attrib_in, event=event_name, append=append,
1502                             start=self._start):
1503                     append((event, start(tag, attrib_in)))
1504                 parser.StartElementHandler = handler
1505             elif event_name == "end":
1506                 def handler(tag, event=event_name, append=append,
1507                             end=self._end):
1508                     append((event, end(tag)))
1509                 parser.EndElementHandler = handler
1510             elif event_name == "start-ns":
1511                 def handler(prefix, uri, event=event_name, append=append):
1512                     append((event, (prefix or "", uri or "")))
1513                 parser.StartNamespaceDeclHandler = handler
1514             elif event_name == "end-ns":
1515                 def handler(prefix, event=event_name, append=append):
1516                     append((event, None))
1517                 parser.EndNamespaceDeclHandler = handler
1518             else:
1519                 raise ValueError("unknown event %r" % event_name)
1520
1521     def _raiseerror(self, value):
1522         err = ParseError(value)
1523         err.code = value.code
1524         err.position = value.lineno, value.offset
1525         raise err
1526
1527     def _fixname(self, key):
1528         # expand qname, and convert name string to ascii, if possible
1529         try:
1530             name = self._names[key]
1531         except KeyError:
1532             name = key
1533             if "}" in name:
1534                 name = "{" + name
1535             self._names[key] = name
1536         return name
1537
1538     def _start(self, tag, attr_list):
1539         # Handler for expat's StartElementHandler. Since ordered_attributes
1540         # is set, the attributes are reported as a list of alternating
1541         # attribute name,value.
1542         fixname = self._fixname
1543         tag = fixname(tag)
1544         attrib = {}
1545         if attr_list:
1546             for i in range(0, len(attr_list), 2):
1547                 attrib[fixname(attr_list[i])] = attr_list[i+1]
1548         return self.target.start(tag, attrib)
1549
1550     def _end(self, tag):
1551         return self.target.end(self._fixname(tag))
1552
1553     def _default(self, text):
1554         prefix = text[:1]
1555         if prefix == "&":
1556             # deal with undefined entities
1557             try:
1558                 data_handler = self.target.data
1559             except AttributeError:
1560                 return
1561             try:
1562                 data_handler(self.entity[text[1:-1]])
1563             except KeyError:
1564                 from xml.parsers import expat
1565                 err = expat.error(
1566                     "undefined entity %s: line %d, column %d" %
1567                     (text, self.parser.ErrorLineNumber,
1568                     self.parser.ErrorColumnNumber)
1569                     )
1570                 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1571                 err.lineno = self.parser.ErrorLineNumber
1572                 err.offset = self.parser.ErrorColumnNumber
1573                 raise err
1574         elif prefix == "<" and text[:9] == "<!DOCTYPE":
1575             self._doctype = [] # inside a doctype declaration
1576         elif self._doctype is not None:
1577             # parse doctype contents
1578             if prefix == ">":
1579                 self._doctype = None
1580                 return
1581             text = text.strip()
1582             if not text:
1583                 return
1584             self._doctype.append(text)
1585             n = len(self._doctype)
1586             if n > 2:
1587                 type = self._doctype[1]
1588                 if type == "PUBLIC" and n == 4:
1589                     name, type, pubid, system = self._doctype
1590                     if pubid:
1591                         pubid = pubid[1:-1]
1592                 elif type == "SYSTEM" and n == 3:
1593                     name, type, system = self._doctype
1594                     pubid = None
1595                 else:
1596                     return
1597                 if hasattr(self.target, "doctype"):
1598                     self.target.doctype(name, pubid, system[1:-1])
1599                 elif self.doctype != self._XMLParser__doctype:
1600                     # warn about deprecated call
1601                     self._XMLParser__doctype(name, pubid, system[1:-1])
1602                     self.doctype(name, pubid, system[1:-1])
1603                 self._doctype = None
1604
1605     def doctype(self, name, pubid, system):
1606         """(Deprecated)  Handle doctype declaration
1607
1608         *name* is the Doctype name, *pubid* is the public identifier,
1609         and *system* is the system identifier.
1610
1611         """
1612         warnings.warn(
1613             "This method of XMLParser is deprecated.  Define doctype() "
1614             "method on the TreeBuilder target.",
1615             DeprecationWarning,
1616             )
1617
1618     # sentinel, if doctype is redefined in a subclass
1619     __doctype = doctype
1620
1621     def feed(self, data):
1622         """Feed encoded data to parser."""
1623         try:
1624             self.parser.Parse(data, 0)
1625         except self._error as v:
1626             self._raiseerror(v)
1627
1628     def close(self):
1629         """Finish feeding data to parser and return element structure."""
1630         try:
1631             self.parser.Parse("", 1) # end of data
1632         except self._error as v:
1633             self._raiseerror(v)
1634         try:
1635             close_handler = self.target.close
1636         except AttributeError:
1637             pass
1638         else:
1639             return close_handler()
1640         finally:
1641             # get rid of circular references
1642             del self.parser, self._parser
1643             del self.target, self._target
1644
1645
1646 # Import the C accelerators
1647 try:
1648     # Element is going to be shadowed by the C implementation. We need to keep
1649     # the Python version of it accessible for some "creative" by external code
1650     # (see tests)
1651     _Element_Py = Element
1652
1653     # Element, SubElement, ParseError, TreeBuilder, XMLParser
1654     from _elementtree import *
1655 except ImportError:
1656     pass


xml.etree.ElementTree
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: