# HG changeset patch # User cmlenz # Date 1153048054 0 # Node ID 4938c310d90447d1e37dcdea8fcf673bcbf55ce5 # Parent 0a1843b2c0963ae8a530cca531332ce9a96e62c2 Improve handling of DOCTYPE declarations. diff --git a/markup/output.py b/markup/output.py --- a/markup/output.py +++ b/markup/output.py @@ -19,6 +19,7 @@ frozenset except NameError: from sets import ImmutableSet as frozenset +from itertools import chain from markup.core import escape, Markup, Namespace, QName from markup.core import DOCTYPE, START, END, START_NS, END_NS, TEXT @@ -39,6 +40,22 @@ raise NotImplementedError +class DocType(object): + """Defines a number of commonly used DOCTYPE declarations as constants.""" + + HTML_STRICT = ('html', '-//W3C//DTD HTML 4.01//EN', + 'http://www.w3.org/TR/html4/strict.dtd') + HTML_TRANSITIONAL = ('html', '-//W3C//DTD HTML 4.01 Transitional//EN', + 'http://www.w3.org/TR/html4/loose.dtd') + HTML = HTML_STRICT + + XHTML_STRICT = ('html', '-//W3C//DTD XHTML 1.0 Strict//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd') + XHTML_TRANSITIONAL = ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') + XHTML = XHTML_STRICT + + class XMLSerializer(Serializer): """Produces XML text from an event stream. @@ -47,17 +64,38 @@ >>> print ''.join(XMLSerializer().serialize(elem.generate()))
""" + def __init__(self, doctype=None): + """Initialize the XML serializer. + + @param doctype: a `(name, pubid, sysid)` tuple that represents the + DOCTYPE declaration that should be included at the top of the + generated output + """ + self.preamble = [] + if doctype: + self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) def serialize(self, stream): + have_doctype = False ns_attrib = [] ns_mapping = {} - stream = _PushbackIterator(stream) + stream = _PushbackIterator(chain(self.preamble, stream)) for kind, data, pos in stream: if kind is DOCTYPE: - # FIXME: what if there's no system or public ID in the input? - yield Markup('\n' % data) + if not have_doctype: + name, pubid, sysid = data + buf = ['\n') + yield Markup(''.join(buf), *filter(None, data)) + have_doctype = True elif kind is START_NS: prefix, uri = data @@ -132,14 +170,37 @@ 'defer', 'disabled', 'ismap', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap']) + def __init__(self, doctype=None): + """Initialize the HTML serializer. + + @param doctype: a `(name, pubid, sysid)` tuple that represents the + DOCTYPE declaration that should be included at the top of the + generated output + """ + self.preamble = [] + if doctype: + self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) + def serialize(self, stream): + have_doctype = False ns_mapping = {} - stream = _PushbackIterator(stream) + stream = _PushbackIterator(chain(self.preamble, stream)) for kind, data, pos in stream: if kind is DOCTYPE: - yield Markup('\n' % data) + if not have_doctype: + name, pubid, sysid = data + buf = ['\n') + yield Markup(''.join(buf), *filter(None, data)) + have_doctype = True elif kind is START_NS: prefix, uri = data diff --git a/markup/tests/output.py b/markup/tests/output.py --- a/markup/tests/output.py +++ b/markup/tests/output.py @@ -15,11 +15,64 @@ import unittest import sys -from markup import output +from markup.core import Stream +from markup.output import DocType, XMLSerializer + + +class XMLSerializerTestCase(unittest.TestCase): + + def test_doctype_in_stream(self): + stream = Stream([(Stream.DOCTYPE, DocType.HTML_STRICT, ('?', -1, -1))]) + output = stream.render(XMLSerializer) + self.assertEqual('\n', + output) + + def test_doctype_in_stream_no_sysid(self): + stream = Stream([(Stream.DOCTYPE, + ('html', '-//W3C//DTD HTML 4.01//EN', None), + ('?', -1, -1))]) + output = stream.render(XMLSerializer) + self.assertEqual('\n', + output) + + def test_doctype_in_stream_no_pubid(self): + stream = Stream([(Stream.DOCTYPE, + ('html', None, 'http://www.w3.org/TR/html4/strict.dtd'), + ('?', -1, -1))]) + output = stream.render(XMLSerializer) + self.assertEqual('\n', + output) + + def test_doctype_in_stream_no_pubid_or_sysid(self): + stream = Stream([(Stream.DOCTYPE, ('html', None, None), + ('?', -1, -1))]) + output = stream.render(XMLSerializer) + self.assertEqual('\n', output) + + def test_serializer_doctype(self): + stream = Stream([]) + output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT) + self.assertEqual('\n', + output) + + def test_doctype_one_and_only(self): + stream = Stream([(Stream.DOCTYPE, ('html', None, None), ('?', -1, -1))]) + output = stream.render(XMLSerializer, doctype=DocType.HTML_STRICT) + self.assertEqual('\n', + output) + def suite(): suite = unittest.TestSuite() - suite.addTest(doctest.DocTestSuite(output)) + suite.addTest(unittest.makeSuite(XMLSerializerTestCase, 'test')) + suite.addTest(doctest.DocTestSuite(XMLSerializer.__module__)) return suite if __name__ == '__main__':