cmlenz@1: # -*- coding: utf-8 -*-
cmlenz@1: #
cmlenz@854: # Copyright (C) 2006-2009 Edgewall Software
cmlenz@1: # All rights reserved.
cmlenz@1: #
cmlenz@1: # This software is licensed as described in the file COPYING, which
cmlenz@1: # you should have received as part of this distribution. The terms
cmlenz@230: # are also available at http://genshi.edgewall.org/wiki/License.
cmlenz@1: #
cmlenz@1: # This software consists of voluntary contributions made by many
cmlenz@1: # individuals. For the exact contribution history, see the revision
cmlenz@230: # history and logs, available at http://genshi.edgewall.org/log/.
cmlenz@1:
cmlenz@26: import doctest
cmlenz@134: from StringIO import StringIO
cmlenz@134: import sys
cmlenz@1: import unittest
cmlenz@1:
cmlenz@378: from genshi.core import Attrs, Stream
cmlenz@230: from genshi.input import XMLParser, HTMLParser, ParseError
cmlenz@134:
cmlenz@134:
cmlenz@134: class XMLParserTestCase(unittest.TestCase):
cmlenz@134:
cmlenz@134: def test_text_node_pos_single_line(self):
cmlenz@134: text = 'foo bar'
cmlenz@134: events = list(XMLParser(StringIO(text)))
cmlenz@134: kind, data, pos = events[1]
cmlenz@134: self.assertEqual(Stream.TEXT, kind)
cmlenz@854: self.assertEqual('foo bar', data)
cmlenz@750: self.assertEqual((None, 1, 6), pos)
cmlenz@134:
cmlenz@134: def test_text_node_pos_multi_line(self):
cmlenz@134: text = '''foo
cmlenz@134: bar'''
cmlenz@134: events = list(XMLParser(StringIO(text)))
cmlenz@134: kind, data, pos = events[1]
cmlenz@134: self.assertEqual(Stream.TEXT, kind)
cmlenz@854: self.assertEqual('foo\nbar', data)
cmlenz@750: self.assertEqual((None, 1, -1), pos)
cmlenz@134:
cmlenz@160: def test_element_attribute_order(self):
cmlenz@160: text = ''
cmlenz@160: events = list(XMLParser(StringIO(text)))
cmlenz@160: kind, data, pos = events[0]
cmlenz@160: self.assertEqual(Stream.START, kind)
cmlenz@160: tag, attrib = data
cmlenz@854: self.assertEqual('elem', tag)
cmlenz@854: self.assertEqual(('title', 'baz'), attrib[0])
cmlenz@854: self.assertEqual(('id', 'foo'), attrib[1])
cmlenz@854: self.assertEqual(('class', 'bar'), attrib[2])
cmlenz@160:
cmlenz@207: def test_unicode_input(self):
cmlenz@207: text = u'
\u2013
'
cmlenz@207: events = list(XMLParser(StringIO(text)))
cmlenz@207: kind, data, pos = events[1]
cmlenz@207: self.assertEqual(Stream.TEXT, kind)
cmlenz@207: self.assertEqual(u'\u2013', data)
cmlenz@207:
cmlenz@316: def test_latin1_encoded(self):
cmlenz@316: text = u'\xf6
'.encode('iso-8859-1')
cmlenz@316: events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
cmlenz@316: kind, data, pos = events[1]
cmlenz@316: self.assertEqual(Stream.TEXT, kind)
cmlenz@316: self.assertEqual(u'\xf6', data)
cmlenz@316:
cmlenz@316: def test_latin1_encoded_xmldecl(self):
cmlenz@316: text = u"""
cmlenz@316: \xf6
cmlenz@316: """.encode('iso-8859-1')
cmlenz@316: events = list(XMLParser(StringIO(text)))
cmlenz@460: kind, data, pos = events[2]
cmlenz@316: self.assertEqual(Stream.TEXT, kind)
cmlenz@316: self.assertEqual(u'\xf6', data)
cmlenz@316:
cmlenz@209: def test_html_entity_with_dtd(self):
cmlenz@209: text = """
cmlenz@209:
cmlenz@209: """
cmlenz@209: events = list(XMLParser(StringIO(text)))
cmlenz@209: kind, data, pos = events[2]
cmlenz@209: self.assertEqual(Stream.TEXT, kind)
cmlenz@209: self.assertEqual(u'\xa0', data)
cmlenz@209:
cmlenz@209: def test_html_entity_without_dtd(self):
cmlenz@209: text = ' '
cmlenz@209: events = list(XMLParser(StringIO(text)))
cmlenz@209: kind, data, pos = events[1]
cmlenz@209: self.assertEqual(Stream.TEXT, kind)
cmlenz@209: self.assertEqual(u'\xa0', data)
cmlenz@209:
cmlenz@293: def test_html_entity_in_attribute(self):
cmlenz@293: text = ''
cmlenz@293: events = list(XMLParser(StringIO(text)))
cmlenz@293: kind, data, pos = events[0]
cmlenz@293: self.assertEqual(Stream.START, kind)
cmlenz@293: self.assertEqual(u'\xa0', data[1].get('title'))
cmlenz@293: kind, data, pos = events[1]
cmlenz@293: self.assertEqual(Stream.END, kind)
cmlenz@293:
cmlenz@209: def test_undefined_entity_with_dtd(self):
cmlenz@209: text = """
cmlenz@209: &junk;
cmlenz@209: """
cmlenz@209: events = XMLParser(StringIO(text))
cmlenz@209: self.assertRaises(ParseError, list, events)
cmlenz@209:
cmlenz@209: def test_undefined_entity_without_dtd(self):
cmlenz@209: text = '&junk;'
cmlenz@209: events = XMLParser(StringIO(text))
cmlenz@209: self.assertRaises(ParseError, list, events)
cmlenz@209:
cmlenz@134:
cmlenz@134: class HTMLParserTestCase(unittest.TestCase):
cmlenz@134:
cmlenz@134: def test_text_node_pos_single_line(self):
cmlenz@134: text = 'foo bar'
cmlenz@134: events = list(HTMLParser(StringIO(text)))
cmlenz@134: kind, data, pos = events[1]
cmlenz@134: self.assertEqual(Stream.TEXT, kind)
cmlenz@854: self.assertEqual('foo bar', data)
cmlenz@750: self.assertEqual((None, 1, 6), pos)
cmlenz@134:
cmlenz@134: def test_text_node_pos_multi_line(self):
cmlenz@134: text = '''foo
cmlenz@134: bar'''
cmlenz@134: events = list(HTMLParser(StringIO(text)))
cmlenz@134: kind, data, pos = events[1]
cmlenz@134: self.assertEqual(Stream.TEXT, kind)
cmlenz@854: self.assertEqual('foo\nbar', data)
cmlenz@750: self.assertEqual((None, 1, 6), pos)
cmlenz@1:
cmlenz@312: def test_input_encoding_text(self):
cmlenz@311: text = u'\xf6
'.encode('iso-8859-1')
cmlenz@311: events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
cmlenz@311: kind, data, pos = events[1]
cmlenz@311: self.assertEqual(Stream.TEXT, kind)
cmlenz@311: self.assertEqual(u'\xf6', data)
cmlenz@311:
cmlenz@312: def test_input_encoding_attribute(self):
cmlenz@312: text = u''.encode('iso-8859-1')
cmlenz@312: events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
cmlenz@312: kind, (tag, attrib), pos = events[0]
cmlenz@312: self.assertEqual(Stream.START, kind)
cmlenz@312: self.assertEqual(u'\xf6', attrib.get('title'))
cmlenz@312:
cmlenz@207: def test_unicode_input(self):
cmlenz@207: text = u'\u2013
'
cmlenz@207: events = list(HTMLParser(StringIO(text)))
cmlenz@207: kind, data, pos = events[1]
cmlenz@207: self.assertEqual(Stream.TEXT, kind)
cmlenz@207: self.assertEqual(u'\u2013', data)
cmlenz@207:
cmlenz@293: def test_html_entity_in_attribute(self):
cmlenz@293: text = ''
cmlenz@293: events = list(HTMLParser(StringIO(text)))
cmlenz@293: kind, data, pos = events[0]
cmlenz@293: self.assertEqual(Stream.START, kind)
cmlenz@293: self.assertEqual(u'\xa0', data[1].get('title'))
cmlenz@293: kind, data, pos = events[1]
cmlenz@293: self.assertEqual(Stream.END, kind)
cmlenz@293:
cmlenz@293: def test_html_entity_in_text(self):
cmlenz@293: text = '
'
cmlenz@293: events = list(HTMLParser(StringIO(text)))
cmlenz@293: kind, data, pos = events[1]
cmlenz@293: self.assertEqual(Stream.TEXT, kind)
cmlenz@293: self.assertEqual(u'\xa0', data)
cmlenz@293:
cmlenz@376: def test_processing_instruction(self):
cmlenz@376: text = ''
cmlenz@376: events = list(HTMLParser(StringIO(text)))
cmlenz@376: kind, (target, data), pos = events[0]
cmlenz@376: self.assertEqual(Stream.PI, kind)
cmlenz@854: self.assertEqual('php', target)
cmlenz@854: self.assertEqual('echo "Foobar"', data)
cmlenz@376:
hodgestar@999: def test_processing_instruction_no_data_1(self):
hodgestar@999: text = u''
hodgestar@999: events = list(HTMLParser(StringIO(text)))
hodgestar@999: kind, (target, data), pos = events[0]
hodgestar@999: self.assertEqual(Stream.PI, kind)
hodgestar@999: self.assertEqual('foo', target)
hodgestar@999: self.assertEqual('', data)
hodgestar@999:
hodgestar@999: def test_processing_instruction_no_data_2(self):
hodgestar@999: text = u'.../experiment>'
hodgestar@999: events = list(HTMLParser(StringIO(text)))
hodgestar@999: kind, (target, data), pos = events[0]
hodgestar@999: self.assertEqual(Stream.PI, kind)
hodgestar@999: self.assertEqual('experiment', target)
hodgestar@999: self.assertEqual('', data)
hodgestar@999: kind, (target, data), pos = events[2]
hodgestar@999: self.assertEqual('/experiment', target)
hodgestar@999: self.assertEqual('', data)
hodgestar@999:
cmlenz@460: def test_xmldecl(self):
cmlenz@460: text = ''
cmlenz@460: events = list(XMLParser(StringIO(text)))
cmlenz@460: kind, (version, encoding, standalone), pos = events[0]
cmlenz@460: self.assertEqual(Stream.XML_DECL, kind)
cmlenz@854: self.assertEqual('1.0', version)
cmlenz@460: self.assertEqual(None, encoding)
cmlenz@460: self.assertEqual(-1, standalone)
cmlenz@460:
cmlenz@460: def test_xmldecl_encoding(self):
cmlenz@460: text = ''
cmlenz@460: events = list(XMLParser(StringIO(text)))
cmlenz@460: kind, (version, encoding, standalone), pos = events[0]
cmlenz@460: self.assertEqual(Stream.XML_DECL, kind)
cmlenz@854: self.assertEqual('1.0', version)
cmlenz@854: self.assertEqual('utf-8', encoding)
cmlenz@460: self.assertEqual(-1, standalone)
cmlenz@460:
cmlenz@460: def test_xmldecl_standalone(self):
cmlenz@460: text = ''
cmlenz@460: events = list(XMLParser(StringIO(text)))
cmlenz@460: kind, (version, encoding, standalone), pos = events[0]
cmlenz@460: self.assertEqual(Stream.XML_DECL, kind)
cmlenz@854: self.assertEqual('1.0', version)
cmlenz@460: self.assertEqual(None, encoding)
cmlenz@460: self.assertEqual(1, standalone)
cmlenz@460:
cmlenz@376: def test_processing_instruction_trailing_qmark(self):
cmlenz@376: text = ''
cmlenz@376: events = list(HTMLParser(StringIO(text)))
cmlenz@376: kind, (target, data), pos = events[0]
cmlenz@376: self.assertEqual(Stream.PI, kind)
cmlenz@854: self.assertEqual('php', target)
cmlenz@854: self.assertEqual('echo "Foobar" ?', data)
cmlenz@376:
cmlenz@378: def test_out_of_order_tags1(self):
cmlenz@378: text = 'Foobar'
cmlenz@378: events = list(HTMLParser(StringIO(text)))
cmlenz@378: self.assertEqual(5, len(events))
cmlenz@378: self.assertEqual((Stream.START, ('span', ())), events[0][:2])
cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2])
cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
cmlenz@378: self.assertEqual((Stream.END, 'b'), events[3][:2])
cmlenz@378: self.assertEqual((Stream.END, 'span'), events[4][:2])
cmlenz@378:
cmlenz@378: def test_out_of_order_tags2(self):
cmlenz@378: text = 'Foobar'
cmlenz@378: events = list(HTMLParser(StringIO(text)))
cmlenz@378: self.assertEqual(7, len(events))
cmlenz@378: self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
cmlenz@378: events[0][:2])
cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2])
cmlenz@378: self.assertEqual((Stream.START, ('i', ())), events[2][:2])
cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2])
cmlenz@378: self.assertEqual((Stream.END, 'i'), events[4][:2])
cmlenz@378: self.assertEqual((Stream.END, 'b'), events[5][:2])
cmlenz@378: self.assertEqual((Stream.END, 'span'), events[6][:2])
cmlenz@378:
cmlenz@383: def test_out_of_order_tags3(self):
cmlenz@378: text = 'Foobar'
cmlenz@378: events = list(HTMLParser(StringIO(text)))
cmlenz@378: self.assertEqual(5, len(events))
cmlenz@378: self.assertEqual((Stream.START, ('span', ())), events[0][:2])
cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2])
cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2])
cmlenz@378: self.assertEqual((Stream.END, 'b'), events[3][:2])
cmlenz@378: self.assertEqual((Stream.END, 'span'), events[4][:2])
cmlenz@378:
cmlenz@423: def test_hex_charref(self):
cmlenz@423: text = '''
cmlenz@423: events = list(HTMLParser(StringIO(text)))
cmlenz@423: self.assertEqual(3, len(events))
cmlenz@423: self.assertEqual((Stream.START, ('span', ())), events[0][:2])
cmlenz@423: self.assertEqual((Stream.TEXT, "'"), events[1][:2])
cmlenz@423: self.assertEqual((Stream.END, 'span'), events[2][:2])
cmlenz@423:
cmlenz@1:
cmlenz@1: def suite():
cmlenz@1: suite = unittest.TestSuite()
cmlenz@141: suite.addTest(doctest.DocTestSuite(XMLParser.__module__))
cmlenz@134: suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test'))
cmlenz@134: suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test'))
cmlenz@1: return suite
cmlenz@1:
cmlenz@1: if __name__ == '__main__':
cmlenz@1: unittest.main(defaultTest='suite')