| Home | Trees | Indices | Help |
|
|---|
|
|
1 #
2 # ElementTree
3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4 #
5 # light-weight XML support for Python 2.3 and later.
6 #
7 # history (since 1.2.6):
8 # 2005-11-12 fl added tostringlist/fromstringlist helpers
9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox
10 # 2006-07-05 fl removed support for 2.1 and earlier
11 # 2007-06-21 fl added deprecation/future warnings
12 # 2007-08-25 fl added doctype hook, added parser version attribute etc
13 # 2007-08-26 fl added new serializer code (better namespace handling, etc)
14 # 2007-08-27 fl warn for broken /tag searches on tree level
15 # 2007-09-02 fl added html/text methods to serializer (experimental)
16 # 2007-09-05 fl added method argument to tostring/tostringlist
17 # 2007-09-06 fl improved error handling
18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups
19 # 2007-12-15 fl added C14N hooks, copy method (experimental)
20 #
21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
22 #
23 # fredrik@pythonware.com
24 # http://www.pythonware.com
25 #
26 # --------------------------------------------------------------------
27 # The ElementTree toolkit is
28 #
29 # Copyright (c) 1999-2008 by Fredrik Lundh
30 #
31 # By obtaining, using, and/or copying this software and/or its
32 # associated documentation, you agree that you have read, understood,
33 # and will comply with the following terms and conditions:
34 #
35 # Permission to use, copy, modify, and distribute this software and
36 # its associated documentation for any purpose and without fee is
37 # hereby granted, provided that the above copyright notice appears in
38 # all copies, and that both that copyright notice and this permission
39 # notice appear in supporting documentation, and that the name of
40 # Secret Labs AB or the author not be used in advertising or publicity
41 # pertaining to distribution of the software without specific, written
42 # prior permission.
43 #
44 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51 # OF THIS SOFTWARE.
52 # --------------------------------------------------------------------
53
54 # Licensed to PSF under a Contributor Agreement.
55 # See http://www.python.org/psf/license for licensing details.
56
57 __all__ = [
58 # public symbols
59 "Comment",
60 "dump",
61 "Element", "ElementTree",
62 "fromstring", "fromstringlist",
63 "iselement", "iterparse",
64 "parse", "ParseError",
65 "PI", "ProcessingInstruction",
66 "QName",
67 "SubElement",
68 "tostring", "tostringlist",
69 "TreeBuilder",
70 "VERSION",
71 "XML",
72 "XMLParser", "XMLTreeBuilder",
73 ]
74
75 VERSION = "1.3.0"
76
77 ##
78 # The <b>Element</b> type is a flexible container object, designed to
79 # store hierarchical data structures in memory. The type can be
80 # described as a cross between a list and a dictionary.
81 # <p>
82 # Each element has a number of properties associated with it:
83 # <ul>
84 # <li>a <i>tag</i>. This is a string identifying what kind of data
85 # this element represents (the element type, in other words).</li>
86 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87 # <li>a <i>text</i> string.</li>
88 # <li>an optional <i>tail</i> string.</li>
89 # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90 # </ul>
91 #
92 # To create an element instance, use the {@link #Element} constructor
93 # or the {@link #SubElement} factory function.
94 # <p>
95 # The {@link #ElementTree} class can be used to wrap an element
96 # structure, and convert it from and to XML.
97 ##
98
99 import sys
100 import re
101 import warnings
102
103
105 # emulate pre-1.2 find/findtext/findall behaviour
112 elem = self.find(element, tag)
113 if elem is None:
114 return default
115 return elem.text or ""
117 if tag[:3] == ".//":
118 for elem in element.iter(tag[3:]):
119 yield elem
120 for elem in element:
121 if elem.tag == tag:
122 yield elem
125
126 try:
127 from . import ElementPath
128 except ImportError:
129 ElementPath = _SimpleElementPath()
130
131 ##
132 # Parser error. This is a subclass of <b>SyntaxError</b>.
133 # <p>
134 # In addition to the exception value, an exception instance contains a
135 # specific exception code in the <b>code</b> attribute, and the line and
136 # column of the error in the <b>position</b> attribute.
137
140
141 # --------------------------------------------------------------------
142
143 ##
144 # Checks if an object appears to be a valid element object.
145 #
146 # @param An element instance.
147 # @return A true value if this is an element object.
148 # @defreturn flag
149
151 # FIXME: not sure about this; might be a better idea to look
152 # for tag/attrib/text attributes
153 return isinstance(element, Element) or hasattr(element, "tag")
154
155 ##
156 # Element class. This class defines the Element interface, and
157 # provides a reference implementation of this interface.
158 # <p>
159 # The element name, attribute names, and attribute values can be
160 # either ASCII strings (ordinary Python strings containing only 7-bit
161 # ASCII characters) or Unicode strings.
162 #
163 # @param tag The element name.
164 # @param attrib An optional dictionary, containing element attributes.
165 # @param **extra Additional attributes, given as keyword arguments.
166 # @see Element
167 # @see SubElement
168 # @see Comment
169 # @see ProcessingInstruction
170
172 # <tag attrib>text<child/>...</tag>tail
173
174 ##
175 # (Attribute) Element tag.
176
177 tag = None
178
179 ##
180 # (Attribute) Element attribute dictionary. Where possible, use
181 # {@link #Element.get},
182 # {@link #Element.set},
183 # {@link #Element.keys}, and
184 # {@link #Element.items} to access
185 # element attributes.
186
187 attrib = None
188
189 ##
190 # (Attribute) Text before first subelement. This is either a
191 # string or the value None. Note that if there was no text, this
192 # attribute may be either None or an empty string, depending on
193 # the parser.
194
195 text = None
196
197 ##
198 # (Attribute) Text after this element's end tag, but before the
199 # next sibling element's start tag. This is either a string or
200 # the value None. Note that if there was no text, this attribute
201 # may be either None or an empty string, depending on the parser.
202
203 tail = None # text after end tag, if any
204
205 # constructor
206
208 attrib = attrib.copy()
209 attrib.update(extra)
210 self.tag = tag
211 self.attrib = attrib
212 self._children = []
213
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217 ##
218 # Creates a new element object of the same type as this element.
219 #
220 # @param tag Element tag.
221 # @param attrib Element attributes, given as a dictionary.
222 # @return A new element instance.
223
226
227 ##
228 # (Experimental) Copies the current element. This creates a
229 # shallow copy; subelements will be shared with the original tree.
230 #
231 # @return A new element instance.
232
234 elem = self.makeelement(self.tag, self.attrib)
235 elem.text = self.text
236 elem.tail = self.tail
237 elem[:] = self
238 return elem
239
240 ##
241 # Returns the number of subelements. Note that this only counts
242 # full elements; to check if there's any content in an element, you
243 # have to check both the length and the <b>text</b> attribute.
244 #
245 # @return The number of subelements.
246
249
251 warnings.warn(
252 "The behavior of this method will change in future versions. "
253 "Use specific 'len(elem)' or 'elem is not None' test instead.",
254 FutureWarning, stacklevel=2
255 )
256 return len(self._children) != 0 # emulate old behaviour, for now
257
258 ##
259 # Returns the given subelement, by index.
260 #
261 # @param index What subelement to return.
262 # @return The given subelement.
263 # @exception IndexError If the given element does not exist.
264
266 return self._children[index]
267
268 ##
269 # Replaces the given subelement, by index.
270 #
271 # @param index What subelement to replace.
272 # @param element The new element value.
273 # @exception IndexError If the given element does not exist.
274
276 # if isinstance(index, slice):
277 # for elt in element:
278 # assert iselement(elt)
279 # else:
280 # assert iselement(element)
281 self._children[index] = element
282
283 ##
284 # Deletes the given subelement, by index.
285 #
286 # @param index What subelement to delete.
287 # @exception IndexError If the given element does not exist.
288
290 del self._children[index]
291
292 ##
293 # Adds a subelement to the end of this element. In document order,
294 # the new element will appear after the last existing subelement (or
295 # directly after the text, if it's the first subelement), but before
296 # the end tag for this element.
297 #
298 # @param element The element to add.
299
303
304 ##
305 # Appends subelements from a sequence.
306 #
307 # @param elements A sequence object with zero or more elements.
308 # @since 1.3
309
314
315 ##
316 # Inserts a subelement at the given position in this element.
317 #
318 # @param index Where to insert the new subelement.
319
323
324 ##
325 # Removes a matching subelement. Unlike the <b>find</b> methods,
326 # this method compares elements based on identity, not on tag
327 # value or contents. To remove subelements by other means, the
328 # easiest way is often to use a list comprehension to select what
329 # elements to keep, and use slice assignment to update the parent
330 # element.
331 #
332 # @param element What element to remove.
333 # @exception ValueError If a matching element could not be found.
334
338
339 ##
340 # (Deprecated) Returns all subelements. The elements are returned
341 # in document order.
342 #
343 # @return A list of subelements.
344 # @defreturn list of Element instances
345
347 warnings.warn(
348 "This method will be removed in future versions. "
349 "Use 'list(elem)' or iteration over elem instead.",
350 DeprecationWarning, stacklevel=2
351 )
352 return self._children
353
354 ##
355 # Finds the first matching subelement, by tag name or path.
356 #
357 # @param path What element to look for.
358 # @keyparam namespaces Optional namespace prefix map.
359 # @return The first matching element, or None if no element was found.
360 # @defreturn Element or None
361
364
365 ##
366 # Finds text for the first matching subelement, by tag name or path.
367 #
368 # @param path What element to look for.
369 # @param default What to return if the element was not found.
370 # @keyparam namespaces Optional namespace prefix map.
371 # @return The text content of the first matching element, or the
372 # default value no element was found. Note that if the element
373 # is found, but has no text content, this method returns an
374 # empty string.
375 # @defreturn string
376
379
380 ##
381 # Finds all matching subelements, by tag name or path.
382 #
383 # @param path What element to look for.
384 # @keyparam namespaces Optional namespace prefix map.
385 # @return A list or other sequence containing all matching elements,
386 # in document order.
387 # @defreturn list of Element instances
388
391
392 ##
393 # Finds all matching subelements, by tag name or path.
394 #
395 # @param path What element to look for.
396 # @keyparam namespaces Optional namespace prefix map.
397 # @return An iterator or sequence containing all matching elements,
398 # in document order.
399 # @defreturn a generated sequence of Element instances
400
403
404 ##
405 # Resets an element. This function removes all subelements, clears
406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407 # to None.
408
413
414 ##
415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but
416 # some implementations may handle this a bit more efficiently.
417 #
418 # @param key What attribute to look for.
419 # @param default What to return if the attribute was not found.
420 # @return The attribute value, or the default value, if the
421 # attribute was not found.
422 # @defreturn string or None
423
426
427 ##
428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
429 # but some implementations may handle this a bit more efficiently.
430 #
431 # @param key What attribute to set.
432 # @param value The attribute value.
433
436
437 ##
438 # Gets a list of attribute names. The names are returned in an
439 # arbitrary order (just like for an ordinary Python dictionary).
440 # Equivalent to <b>attrib.keys()</b>.
441 #
442 # @return A list of element attribute names.
443 # @defreturn list of strings
444
447
448 ##
449 # Gets element attributes, as a sequence. The attributes are
450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
451 #
452 # @return A list of (name, value) tuples for all attributes.
453 # @defreturn list of (string, string) tuples
454
457
458 ##
459 # Creates a tree iterator. The iterator loops over this element
460 # and all subelements, in document order, and returns all elements
461 # with a matching tag.
462 # <p>
463 # If the tree structure is modified during iteration, new or removed
464 # elements may or may not be included. To get a stable set, use the
465 # list() function on the iterator, and loop over the resulting list.
466 #
467 # @param tag What tags to look for (default is to return all elements).
468 # @return An iterator containing all the matching elements.
469 # @defreturn iterator
470
472 if tag == "*":
473 tag = None
474 if tag is None or self.tag == tag:
475 yield self
476 for e in self._children:
477 for e in e.iter(tag):
478 yield e
479
480 # compatibility
482 # Change for a DeprecationWarning in 1.4
483 warnings.warn(
484 "This method will be removed in future versions. "
485 "Use 'elem.iter()' or 'list(elem.iter())' instead.",
486 PendingDeprecationWarning, stacklevel=2
487 )
488 return list(self.iter(tag))
489
490 ##
491 # Creates a text iterator. The iterator loops over this element
492 # and all subelements, in document order, and returns all inner
493 # text.
494 #
495 # @return An iterator containing all inner text.
496 # @defreturn iterator
497
509
510 # compatibility
511 _Element = _ElementInterface = Element
512
513 ##
514 # Subelement factory. This function creates an element instance, and
515 # appends it to an existing element.
516 # <p>
517 # The element name, attribute names, and attribute values can be
518 # either 8-bit ASCII strings or Unicode strings.
519 #
520 # @param parent The parent element.
521 # @param tag The subelement name.
522 # @param attrib An optional dictionary, containing element attributes.
523 # @param **extra Additional attributes, given as keyword arguments.
524 # @return An element instance.
525 # @defreturn Element
526
528 attrib = attrib.copy()
529 attrib.update(extra)
530 element = parent.makeelement(tag, attrib)
531 parent.append(element)
532 return element
533
534 ##
535 # Comment element factory. This factory function creates a special
536 # element that will be serialized as an XML comment by the standard
537 # serializer.
538 # <p>
539 # The comment string can be either an 8-bit ASCII string or a Unicode
540 # string.
541 #
542 # @param text A string containing the comment string.
543 # @return An element instance, representing a comment.
544 # @defreturn Element
545
550
551 ##
552 # PI element factory. This factory function creates a special element
553 # that will be serialized as an XML processing instruction by the standard
554 # serializer.
555 #
556 # @param target A string containing the PI target.
557 # @param text A string containing the PI contents, if any.
558 # @return An element instance, representing a PI.
559 # @defreturn Element
560
567
568 PI = ProcessingInstruction
569
570 ##
571 # QName wrapper. This can be used to wrap a QName attribute value, in
572 # order to get proper namespace handling on output.
573 #
574 # @param text A string containing the QName value, in the form {uri}local,
575 # or, if the tag argument is given, the URI part of a QName.
576 # @param tag Optional tag. If given, the first argument is interpreted as
577 # a URI, and this argument is interpreted as a local name.
578 # @return An opaque object, representing the QName.
579