# HG changeset patch # User cmlenz # Date 1177530069 0 # Node ID 75425671b437c5afb421580ba2b12f0d3b6463af # Parent 5f5b227b04beace898dd0c097c279b3ced6f0674 Apply patch by Alec Thomas for processing XML declarations (#111). Thanks! diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,8 @@ * Fix incorrect reference to translation function in the I18N filter. * The `ET()` function now correctly handles attributes with a namespace. + * XML declarations are now processed internally, as well as written to the + output when XML serialization is used (ticket #111). Version 0.4 diff --git a/genshi/core.py b/genshi/core.py --- a/genshi/core.py +++ b/genshi/core.py @@ -56,6 +56,7 @@ START = StreamEventKind('START') #: a start tag END = StreamEventKind('END') #: an end tag TEXT = StreamEventKind('TEXT') #: literal text + XML_DECL = StreamEventKind('XML_DECL') #: XML declaration DOCTYPE = StreamEventKind('DOCTYPE') #: doctype declaration START_NS = StreamEventKind('START_NS') #: start namespace mapping END_NS = StreamEventKind('END_NS') #: end namespace mapping @@ -208,6 +209,7 @@ START = Stream.START END = Stream.END TEXT = Stream.TEXT +XML_DECL = Stream.XML_DECL DOCTYPE = Stream.DOCTYPE START_NS = Stream.START_NS END_NS = Stream.END_NS diff --git a/genshi/input.py b/genshi/input.py --- a/genshi/input.py +++ b/genshi/input.py @@ -26,7 +26,7 @@ from StringIO import StringIO from genshi.core import Attrs, QName, Stream, stripentities -from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ +from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, END_NS, \ START_CDATA, END_CDATA, PI, COMMENT __all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML'] @@ -123,6 +123,7 @@ parser.StartCdataSectionHandler = self._handle_start_cdata parser.EndCdataSectionHandler = self._handle_end_cdata parser.ProcessingInstructionHandler = self._handle_pi + parser.XmlDeclHandler = self._handle_xml_decl parser.CommentHandler = self._handle_comment # Tell Expat that we'll handle non-XML entities ourselves @@ -216,6 +217,9 @@ def _handle_data(self, text): self._enqueue(TEXT, text) + def _handle_xml_decl(self, version, encoding, standalone): + self._enqueue(XML_DECL, (version, encoding, standalone)) + def _handle_doctype(self, name, sysid, pubid, has_internal_subset): self._enqueue(DOCTYPE, (name, pubid, sysid)) diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -23,7 +23,7 @@ import re from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind -from genshi.core import DOCTYPE, START, END, START_NS, END_NS, TEXT, \ +from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE __all__ = ['DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer', @@ -87,7 +87,7 @@ self.filters.append(NamespaceFlattener(prefixes=namespace_prefixes)) def __call__(self, stream): - have_doctype = False + have_decl = have_doctype = False in_cdata = False stream = chain(self.preamble, stream) @@ -115,6 +115,18 @@ elif kind is COMMENT: yield Markup('' % data) + elif kind is XML_DECL and not have_decl: + version, encoding, standalone = data + buf = ['\n') + yield Markup(u''.join(buf)) + have_decl = True + elif kind is DOCTYPE and not have_doctype: name, pubid, sysid = data buf = [' \xf6 """.encode('iso-8859-1'), encoding='iso-8859-1') - self.assertEqual(u"""