Package xml :: Package etree :: Module ElementTree
[hide private]
[frames] | no frames]

Source Code for Module xml.etree.ElementTree

   1  # 
   2  # ElementTree 
   3  # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ 
   4  # 
   5  # light-weight XML support for Python 2.3 and later. 
   6  # 
   7  # history (since 1.2.6): 
   8  # 2005-11-12 fl   added tostringlist/fromstringlist helpers 
   9  # 2006-07-05 fl   merged in selected changes from the 1.3 sandbox 
  10  # 2006-07-05 fl   removed support for 2.1 and earlier 
  11  # 2007-06-21 fl   added deprecation/future warnings 
  12  # 2007-08-25 fl   added doctype hook, added parser version attribute etc 
  13  # 2007-08-26 fl   added new serializer code (better namespace handling, etc) 
  14  # 2007-08-27 fl   warn for broken /tag searches on tree level 
  15  # 2007-09-02 fl   added html/text methods to serializer (experimental) 
  16  # 2007-09-05 fl   added method argument to tostring/tostringlist 
  17  # 2007-09-06 fl   improved error handling 
  18  # 2007-09-13 fl   added itertext, iterfind; assorted cleanups 
  19  # 2007-12-15 fl   added C14N hooks, copy method (experimental) 
  20  # 
  21  # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved. 
  22  # 
  23  # fredrik@pythonware.com 
  24  # http://www.pythonware.com 
  25  # 
  26  # -------------------------------------------------------------------- 
  27  # The ElementTree toolkit is 
  28  # 
  29  # Copyright (c) 1999-2008 by Fredrik Lundh 
  30  # 
  31  # By obtaining, using, and/or copying this software and/or its 
  32  # associated documentation, you agree that you have read, understood, 
  33  # and will comply with the following terms and conditions: 
  34  # 
  35  # Permission to use, copy, modify, and distribute this software and 
  36  # its associated documentation for any purpose and without fee is 
  37  # hereby granted, provided that the above copyright notice appears in 
  38  # all copies, and that both that copyright notice and this permission 
  39  # notice appear in supporting documentation, and that the name of 
  40  # Secret Labs AB or the author not be used in advertising or publicity 
  41  # pertaining to distribution of the software without specific, written 
  42  # prior permission. 
  43  # 
  44  # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 
  45  # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 
  46  # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 
  47  # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 
  48  # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 
  49  # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 
  50  # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
  51  # OF THIS SOFTWARE. 
  52  # -------------------------------------------------------------------- 
  53   
  54  # Licensed to PSF under a Contributor Agreement. 
  55  # See http://www.python.org/psf/license for licensing details. 
  56   
  57  __all__ = [ 
  58      # public symbols 
  59      "Comment", 
  60      "dump", 
  61      "Element", "ElementTree", 
  62      "fromstring", "fromstringlist", 
  63      "iselement", "iterparse", 
  64      "parse", "ParseError", 
  65      "PI", "ProcessingInstruction", 
  66      "QName", 
  67      "SubElement", 
  68      "tostring", "tostringlist", 
  69      "TreeBuilder", 
  70      "VERSION", 
  71      "XML", 
  72      "XMLParser", "XMLTreeBuilder", 
  73      ] 
  74   
  75  VERSION = "1.3.0" 
  76   
  77  ## 
  78  # The <b>Element</b> type is a flexible container object, designed to 
  79  # store hierarchical data structures in memory. The type can be 
  80  # described as a cross between a list and a dictionary. 
  81  # <p> 
  82  # Each element has a number of properties associated with it: 
  83  # <ul> 
  84  # <li>a <i>tag</i>. This is a string identifying what kind of data 
  85  # this element represents (the element type, in other words).</li> 
  86  # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> 
  87  # <li>a <i>text</i> string.</li> 
  88  # <li>an optional <i>tail</i> string.</li> 
  89  # <li>a number of <i>child elements</i>, stored in a Python sequence</li> 
  90  # </ul> 
  91  # 
  92  # To create an element instance, use the {@link #Element} constructor 
  93  # or the {@link #SubElement} factory function. 
  94  # <p> 
  95  # The {@link #ElementTree} class can be used to wrap an element 
  96  # structure, and convert it from and to XML. 
  97  ## 
  98   
  99  import sys 
 100  import re 
 101  import warnings 
 102   
 103   
104 -class _SimpleElementPath(object):
105 # emulate pre-1.2 find/findtext/findall behaviour
106 - def find(self, element, tag, namespaces=None):
107 for elem in element: 108 if elem.tag == tag: 109 return elem 110 return None
111 - def findtext(self, element, tag, default=None, namespaces=None):
112 elem = self.find(element, tag) 113 if elem is None: 114 return default 115 return elem.text or ""
116 - def iterfind(self, element, tag, namespaces=None):
117 if tag[:3] == ".//": 118 for elem in element.iter(tag[3:]): 119 yield elem 120 for elem in element: 121 if elem.tag == tag: 122 yield elem
123 - def findall(self, element, tag, namespaces=None):
124 return list(self.iterfind(element, tag, namespaces))
125 126 try: 127 from . import ElementPath 128 except ImportError: 129 ElementPath = _SimpleElementPath() 130 131 ## 132 # Parser error. This is a subclass of <b>SyntaxError</b>. 133 # <p> 134 # In addition to the exception value, an exception instance contains a 135 # specific exception code in the <b>code</b> attribute, and the line and 136 # column of the error in the <b>position</b> attribute. 137
138 -class ParseError(SyntaxError):
139 pass
140 141 # -------------------------------------------------------------------- 142 143 ## 144 # Checks if an object appears to be a valid element object. 145 # 146 # @param An element instance. 147 # @return A true value if this is an element object. 148 # @defreturn flag 149
150 -def iselement(element):
151 # FIXME: not sure about this; might be a better idea to look 152 # for tag/attrib/text attributes 153 return isinstance(element, Element) or hasattr(element, "tag")
154 155 ## 156 # Element class. This class defines the Element interface, and 157 # provides a reference implementation of this interface. 158 # <p> 159 # The element name, attribute names, and attribute values can be 160 # either ASCII strings (ordinary Python strings containing only 7-bit 161 # ASCII characters) or Unicode strings. 162 # 163 # @param tag The element name. 164 # @param attrib An optional dictionary, containing element attributes. 165 # @param **extra Additional attributes, given as keyword arguments. 166 # @see Element 167 # @see SubElement 168 # @see Comment 169 # @see ProcessingInstruction 170
171 -class Element(object):
172 # <tag attrib>text<child/>...</tag>tail 173 174 ## 175 # (Attribute) Element tag. 176 177 tag = None 178 179 ## 180 # (Attribute) Element attribute dictionary. Where possible, use 181 # {@link #Element.get}, 182 # {@link #Element.set}, 183 # {@link #Element.keys}, and 184 # {@link #Element.items} to access 185 # element attributes. 186 187 attrib = None 188 189 ## 190 # (Attribute) Text before first subelement. This is either a 191 # string or the value None. Note that if there was no text, this 192 # attribute may be either None or an empty string, depending on 193 # the parser. 194 195 text = None 196 197 ## 198 # (Attribute) Text after this element's end tag, but before the 199 # next sibling element's start tag. This is either a string or 200 # the value None. Note that if there was no text, this attribute 201 # may be either None or an empty string, depending on the parser. 202 203 tail = None # text after end tag, if any 204 205 # constructor 206
207 - def __init__(self, tag, attrib={}, **extra):
208 attrib = attrib.copy() 209 attrib.update(extra) 210 self.tag = tag 211 self.attrib = attrib 212 self._children = []
213
214 - def __repr__(self):
215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216 217 ## 218 # Creates a new element object of the same type as this element. 219 # 220 # @param tag Element tag. 221 # @param attrib Element attributes, given as a dictionary. 222 # @return A new element instance. 223
224 - def makeelement(self, tag, attrib):
225 return self.__class__(tag, attrib)
226 227 ## 228 # (Experimental) Copies the current element. This creates a 229 # shallow copy; subelements will be shared with the original tree. 230 # 231 # @return A new element instance. 232
233 - def copy(self):
234 elem = self.makeelement(self.tag, self.attrib) 235 elem.text = self.text 236 elem.tail = self.tail 237 elem[:] = self 238 return elem
239 240 ## 241 # Returns the number of subelements. Note that this only counts 242 # full elements; to check if there's any content in an element, you 243 # have to check both the length and the <b>text</b> attribute. 244 # 245 # @return The number of subelements. 246
247 - def __len__(self):
248 return len(self._children)
249
250 - def __nonzero__(self):
251 warnings.warn( 252 "The behavior of this method will change in future versions. " 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", 254 FutureWarning, stacklevel=2 255 ) 256 return len(self._children) != 0 # emulate old behaviour, for now
257 258 ## 259 # Returns the given subelement, by index. 260 # 261 # @param index What subelement to return. 262 # @return The given subelement. 263 # @exception IndexError If the given element does not exist. 264
265 - def __getitem__(self, index):
266 return self._children[index]
267 268 ## 269 # Replaces the given subelement, by index. 270 # 271 # @param index What subelement to replace. 272 # @param element The new element value. 273 # @exception IndexError If the given element does not exist. 274
275 - def __setitem__(self, index, element):
276 # if isinstance(index, slice): 277 # for elt in element: 278 # assert iselement(elt) 279 # else: 280 # assert iselement(element) 281 self._children[index] = element
282 283 ## 284 # Deletes the given subelement, by index. 285 # 286 # @param index What subelement to delete. 287 # @exception IndexError If the given element does not exist. 288
289 - def __delitem__(self, index):
290 del self._children[index]
291 292 ## 293 # Adds a subelement to the end of this element. In document order, 294 # the new element will appear after the last existing subelement (or 295 # directly after the text, if it's the first subelement), but before 296 # the end tag for this element. 297 # 298 # @param element The element to add. 299
300 - def append(self, element):
301 # assert iselement(element) 302 self._children.append(element)
303 304 ## 305 # Appends subelements from a sequence. 306 # 307 # @param elements A sequence object with zero or more elements. 308 # @since 1.3 309
310 - def extend(self, elements):
311 # for element in elements: 312 # assert iselement(element) 313 self._children.extend(elements)
314 315 ## 316 # Inserts a subelement at the given position in this element. 317 # 318 # @param index Where to insert the new subelement. 319
320 - def insert(self, index, element):
321 # assert iselement(element) 322 self._children.insert(index, element)
323 324 ## 325 # Removes a matching subelement. Unlike the <b>find</b> methods, 326 # this method compares elements based on identity, not on tag 327 # value or contents. To remove subelements by other means, the 328 # easiest way is often to use a list comprehension to select what 329 # elements to keep, and use slice assignment to update the parent 330 # element. 331 # 332 # @param element What element to remove. 333 # @exception ValueError If a matching element could not be found. 334
335 - def remove(self, element):
336 # assert iselement(element) 337 self._children.remove(element)
338 339 ## 340 # (Deprecated) Returns all subelements. The elements are returned 341 # in document order. 342 # 343 # @return A list of subelements. 344 # @defreturn list of Element instances 345
346 - def getchildren(self):
347 warnings.warn( 348 "This method will be removed in future versions. " 349 "Use 'list(elem)' or iteration over elem instead.", 350 DeprecationWarning, stacklevel=2 351 ) 352 return self._children
353 354 ## 355 # Finds the first matching subelement, by tag name or path. 356 # 357 # @param path What element to look for. 358 # @keyparam namespaces Optional namespace prefix map. 359 # @return The first matching element, or None if no element was found. 360 # @defreturn Element or None 361
362 - def find(self, path, namespaces=None):
363 return ElementPath.find(self, path, namespaces)
364 365 ## 366 # Finds text for the first matching subelement, by tag name or path. 367 # 368 # @param path What element to look for. 369 # @param default What to return if the element was not found. 370 # @keyparam namespaces Optional namespace prefix map. 371 # @return The text content of the first matching element, or the 372 # default value no element was found. Note that if the element 373 # is found, but has no text content, this method returns an 374 # empty string. 375 # @defreturn string 376
377 - def findtext(self, path, default=None, namespaces=None):
378 return ElementPath.findtext(self, path, default, namespaces)
379 380 ## 381 # Finds all matching subelements, by tag name or path. 382 # 383 # @param path What element to look for. 384 # @keyparam namespaces Optional namespace prefix map. 385 # @return A list or other sequence containing all matching elements, 386 # in document order. 387 # @defreturn list of Element instances 388
389 - def findall(self, path, namespaces=None):
390 return ElementPath.findall(self, path, namespaces)
391 392 ## 393 # Finds all matching subelements, by tag name or path. 394 # 395 # @param path What element to look for. 396 # @keyparam namespaces Optional namespace prefix map. 397 # @return An iterator or sequence containing all matching elements, 398 # in document order. 399 # @defreturn a generated sequence of Element instances 400
401 - def iterfind(self, path, namespaces=None):
402 return ElementPath.iterfind(self, path, namespaces)
403 404 ## 405 # Resets an element. This function removes all subelements, clears 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes 407 # to None. 408
409 - def clear(self):
410 self.attrib.clear() 411 self._children = [] 412 self.text = self.tail = None
413 414 ## 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but 416 # some implementations may handle this a bit more efficiently. 417 # 418 # @param key What attribute to look for. 419 # @param default What to return if the attribute was not found. 420 # @return The attribute value, or the default value, if the 421 # attribute was not found. 422 # @defreturn string or None 423
424 - def get(self, key, default=None):
425 return self.attrib.get(key, default)
426 427 ## 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, 429 # but some implementations may handle this a bit more efficiently. 430 # 431 # @param key What attribute to set. 432 # @param value The attribute value. 433
434 - def set(self, key, value):
435 self.attrib[key] = value
436 437 ## 438 # Gets a list of attribute names. The names are returned in an 439 # arbitrary order (just like for an ordinary Python dictionary). 440 # Equivalent to <b>attrib.keys()</b>. 441 # 442 # @return A list of element attribute names. 443 # @defreturn list of strings 444
445 - def keys(self):
446 return self.attrib.keys()
447 448 ## 449 # Gets element attributes, as a sequence. The attributes are 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. 451 # 452 # @return A list of (name, value) tuples for all attributes. 453 # @defreturn list of (string, string) tuples 454
455 - def items(self):
456 return self.attrib.items()
457 458 ## 459 # Creates a tree iterator. The iterator loops over this element 460 # and all subelements, in document order, and returns all elements 461 # with a matching tag. 462 # <p> 463 # If the tree structure is modified during iteration, new or removed 464 # elements may or may not be included. To get a stable set, use the 465 # list() function on the iterator, and loop over the resulting list. 466 # 467 # @param tag What tags to look for (default is to return all elements). 468 # @return An iterator containing all the matching elements. 469 # @defreturn iterator 470
471 - def iter(self, tag=None):
472 if tag == "*": 473 tag = None 474 if tag is None or self.tag == tag: 475 yield self 476 for e in self._children: 477 for e in e.iter(tag): 478 yield e
479 480 # compatibility
481 - def getiterator(self, tag=None):
482 # Change for a DeprecationWarning in 1.4 483 warnings.warn( 484 "This method will be removed in future versions. " 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 486 PendingDeprecationWarning, stacklevel=2 487 ) 488 return list(self.iter(tag))
489 490 ## 491 # Creates a text iterator. The iterator loops over this element 492 # and all subelements, in document order, and returns all inner 493 # text. 494 # 495 # @return An iterator containing all inner text. 496 # @defreturn iterator 497
498 - def itertext(self):
499 tag = self.tag 500 if not isinstance(tag, basestring) and tag is not None: 501 return 502 if self.text: 503 yield self.text 504 for e in self: 505 for s in e.itertext(): 506 yield s 507 if e.tail: 508 yield e.tail
509 510 # compatibility 511 _Element = _ElementInterface = Element 512 513 ## 514 # Subelement factory. This function creates an element instance, and 515 # appends it to an existing element. 516 # <p> 517 # The element name, attribute names, and attribute values can be 518 # either 8-bit ASCII strings or Unicode strings. 519 # 520 # @param parent The parent element. 521 # @param tag The subelement name. 522 # @param attrib An optional dictionary, containing element attributes. 523 # @param **extra Additional attributes, given as keyword arguments. 524 # @return An element instance. 525 # @defreturn Element 526
527 -def SubElement(parent, tag, attrib={}, **extra):
528 attrib = attrib.copy() 529 attrib.update(extra) 530 element = parent.makeelement(tag, attrib) 531 parent.append(element) 532 return element
533 534 ## 535 # Comment element factory. This factory function creates a special 536 # element that will be serialized as an XML comment by the standard 537 # serializer. 538 # <p> 539 # The comment string can be either an 8-bit ASCII string or a Unicode 540 # string. 541 # 542 # @param text A string containing the comment string. 543 # @return An element instance, representing a comment. 544 # @defreturn Element 545
546 -def Comment(text=None):
547 element = Element(Comment) 548 element.text = text 549 return element
550 551 ## 552 # PI element factory. This factory function creates a special element 553 # that will be serialized as an XML processing instruction by the standard 554 # serializer. 555 # 556 # @param target A string containing the PI target. 557 # @param text A string containing the PI contents, if any. 558 # @return An element instance, representing a PI. 559 # @defreturn Element 560
561 -def ProcessingInstruction(target, text=None):
562 element = Element(ProcessingInstruction) 563 element.text = target 564 if text: 565 element.text = element.text + " " + text 566 return element
567 568 PI = ProcessingInstruction 569 570 ## 571 # QName wrapper. This can be used to wrap a QName attribute value, in 572 # order to get proper namespace handling on output. 573 # 574 # @param text A string containing the QName value, in the form {uri}local, 575 # or, if the tag argument is given, the URI part of a QName. 576 # @param tag Optional tag. If given, the first argument is interpreted as 577 # a URI, and this argument is interpreted as a local name. 578 # @return An opaque object, representing the QName. 579
580 -class QName(object):
581 - def __init__(self, text_or_uri, tag=None):
582 if tag: 583 text_or_uri = "{%s}%s" % (text_or_uri, tag) 584 self.text = text_or_uri
585 - def __str__(self):
586 return self.text
587 - def __hash__(self):
588 return hash(self.text)
589 - def __cmp__(self, oth