Mercurial > bitten > bitten-test
view bitten/util/xmlio.py @ 287:6abd43d0cd8a
The build slave now stores snapshot archives and the corresponding work directories in project folders of the main work folder, to keep build configurations from different projects that share the same name separate. This also requires transmitting the project name (simply the name of the environment directory) with the build initiation.
author | cmlenz |
---|---|
date | Sat, 15 Oct 2005 13:39:23 +0000 |
parents | 17e4b8d01db6 |
children | 13b290f5f1ee |
line wrap: on
line source
# -*- coding: iso8859-1 -*- # # Copyright (C) 2005 Christopher Lenz <cmlenz@gmx.de> # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://bitten.cmlenz.net/wiki/License. """Utility code for easy input and output of XML. The current implementation uses `xml.dom.minidom` under the hood for parsing. """ import os try: from cStringIO import StringIO except ImportError: from StringIO import StringIO from UserDict import DictMixin __all__ = ['Element', 'parse'] def _escape_text(text): """Escape special characters in the provided text so that it can be safely included in XML text nodes. """ return str(text).replace('&', '&').replace('<', '<') \ .replace('>', '>') def _escape_attr(attr): """Escape special characters in the provided text so that it can be safely included in XML attribute values. """ return _escape_text(attr).replace('"', '"') class Fragment(object): """A collection of XML elements.""" __slots__ = ['children'] def __init__(self): """Create an XML fragment.""" self.children = [] def __getitem__(self, nodes): """Add nodes to the fragment.""" if not isinstance(nodes, (list, tuple)): nodes = [nodes] for node in nodes: self.append(node) return self def __str__(self): """Return a string representation of the XML fragment.""" buf = StringIO() self.write(buf) return buf.getvalue() def append(self, node): """Append an element or fragment as child.""" if isinstance(node, Element): self.children.append(node) elif isinstance(node, Fragment): self.children += node.children elif node is not None and node != '': self.children.append(str(node)) def write(self, out, newlines=False): """Serializes the element and writes the XML to the given output stream. """ for child in self.children: if isinstance(child, (Element, ParsedElement)): child.write(out, newlines=newlines) else: if child[0] == '<': out.write('<![CDATA[' + child + ']]>') else: out.write(_escape_text(child)) class Element(Fragment): """Simple XML output generator based on the builder pattern. Construct XML elements by passing the tag name to the constructor: >>> print Element('foo') <foo/> Attributes can be specified using keyword arguments. The values of the arguments will be converted to strings and any special XML characters escaped: >>> print Element('foo', bar=42) <foo bar="42"/> >>> print Element('foo', bar='1 < 2') <foo bar="1 < 2"/> >>> print Element('foo', bar='"baz"') <foo bar=""baz""/> The order in which attributes are rendered is undefined. Elements can be using item access notation: >>> print Element('foo')[Element('bar'), Element('baz')] <foo><bar/><baz/></foo> Text nodes can be nested in an element by using strings instead of elements in item access. Any special characters in the strings are escaped automatically: >>> print Element('foo')['Hello world'] <foo>Hello world</foo> >>> print Element('foo')[42] <foo>42</foo> >>> print Element('foo')['1 < 2'] <foo>1 < 2</foo> This technique also allows mixed content: >>> print Element('foo')['Hello ', Element('b')['world']] <foo>Hello <b>world</b></foo> Finally, text starting with an opening angle bracket is treated specially: under the assumption that the text actually contains XML itself, the whole thing is wrapped in a CDATA block instead of escaping all special characters individually: >>> print Element('foo')['<bar a="3" b="4"><baz/></bar>'] <foo><![CDATA[<bar a="3" b="4"><baz/></bar>]]></foo> """ __slots__ = ['name', 'attr'] def __init__(self, name_, **attr): """Create an XML element using the specified tag name. The tag name must be supplied as the first positional argument. All keyword arguments following it are handled as attributes of the element. """ Fragment.__init__(self) self.name = name_ self.attr = dict([(name, value) for name, value in attr.items() if value is not None]) def write(self, out, newlines=False): """Serializes the element and writes the XML to the given output stream. """ out.write('<') out.write(self.name) for name, value in self.attr.items(): out.write(' %s="%s"' % (name, _escape_attr(value))) if self.children: out.write('>') Fragment.write(self, out, newlines) out.write('</' + self.name + '>') else: out.write('/>') if newlines: out.write(os.linesep) class ParseError(Exception): """Exception thrown when there's an error parsing an XML document.""" def parse(text_or_file): """Parse an XML document provided as string or file-like object. Returns an instance of `ParsedElement` that can be used to traverse the parsed document. """ from xml.dom import minidom from xml.parsers import expat try: if isinstance(text_or_file, (str, unicode)): dom = minidom.parseString(text_or_file) else: dom = minidom.parse(text_or_file) return ParsedElement(dom.documentElement) except expat.error, e: raise ParseError, e class ParsedElement(object): """Representation of an XML element that was parsed from a string or file. This class should not be used directly. Rather, XML text parsed using `xmlio.parse()` will return an instance of this class. >>> xml = parse('<root/>') >>> print xml.name root Parsed elements can be serialized to a string using the `write()` method: >>> import sys >>> parse('<root></root>').write(sys.stdout) <root/> For convenience, this is also done when coercing the object to a string using the builtin `str()` function, which is used when `print`ing an object: >>> print parse('<root></root>') <root/> (Note that serializing the element will produce a normalized representation that may not excatly match the input string.) Attributes are accessed via the `attr` member: >>> print parse('<root foo="bar"/>').attr['foo'] bar Attributes can also be updated, added or removed: >>> xml = parse('<root foo="bar"/>') >>> xml.attr['foo'] = 'baz' >>> print xml <root foo="baz"/> >>> del xml.attr['foo'] >>> print xml <root/> >>> xml.attr['foo'] = 'bar' >>> print xml <root foo="bar"/> """ __slots__ = ['_node', 'attr'] class _Attrs(DictMixin): """Simple wrapper around the element attributes to provide a dictionary interface.""" def __init__(self, node): self._node = node def __getitem__(self, name): attr = self._node.getAttributeNode(name) if not attr: raise KeyError, name return attr.value.encode() def __setitem__(self, name, value): self._node.setAttribute(name, value) def __delitem__(self, name): self._node.removeAttribute(name) def keys(self): return [name.encode() for name in self._node.attributes.keys()] def __init__(self, node): self._node = node self.attr = ParsedElement._Attrs(node) name = property(fget=lambda self: self._node.localName) namespace = property(fget=lambda self: self._node.namespaceURI) def children(self, name=None): """Iterate over the child elements of this element. If the parameter `name` is provided, only include elements with a matching local name. Otherwise, include all elements. """ for child in [c for c in self._node.childNodes if c.nodeType == 1]: if name in (None, child.tagName): yield ParsedElement(child) def __iter__(self): return self.children() def gettext(self): """Return the text content of this element. This concatenates the values of all text nodes that are immediate children of this element. """ return ''.join([c.nodeValue or '' for c in self._node.childNodes]) def write(self, out, newlines=False): """Serializes the element and writes the XML to the given output stream. """ self._node.writexml(out, newl=newlines and '\n' or '') def __str__(self): """Return a string representation of the XML element.""" buf = StringIO() self.write(buf) return buf.getvalue() if __name__ == '__main__': import doctest doctest.testmod()