cmlenz@1: # -*- coding: utf-8 -*- cmlenz@1: # cmlenz@854: # Copyright (C) 2006-2009 Edgewall Software cmlenz@1: # All rights reserved. cmlenz@1: # cmlenz@1: # This software is licensed as described in the file COPYING, which cmlenz@1: # you should have received as part of this distribution. The terms cmlenz@230: # are also available at http://genshi.edgewall.org/wiki/License. cmlenz@1: # cmlenz@1: # This software consists of voluntary contributions made by many cmlenz@1: # individuals. For the exact contribution history, see the revision cmlenz@230: # history and logs, available at http://genshi.edgewall.org/log/. cmlenz@1: cmlenz@26: import doctest cmlenz@134: from StringIO import StringIO cmlenz@134: import sys cmlenz@1: import unittest cmlenz@1: cmlenz@378: from genshi.core import Attrs, Stream cmlenz@230: from genshi.input import XMLParser, HTMLParser, ParseError cmlenz@134: cmlenz@134: cmlenz@134: class XMLParserTestCase(unittest.TestCase): cmlenz@134: cmlenz@134: def test_text_node_pos_single_line(self): cmlenz@134: text = 'foo bar' cmlenz@134: events = list(XMLParser(StringIO(text))) cmlenz@134: kind, data, pos = events[1] cmlenz@134: self.assertEqual(Stream.TEXT, kind) cmlenz@854: self.assertEqual('foo bar', data) cmlenz@750: self.assertEqual((None, 1, 6), pos) cmlenz@134: cmlenz@134: def test_text_node_pos_multi_line(self): cmlenz@134: text = '''foo cmlenz@134: bar''' cmlenz@134: events = list(XMLParser(StringIO(text))) cmlenz@134: kind, data, pos = events[1] cmlenz@134: self.assertEqual(Stream.TEXT, kind) cmlenz@854: self.assertEqual('foo\nbar', data) cmlenz@750: self.assertEqual((None, 1, -1), pos) cmlenz@134: cmlenz@160: def test_element_attribute_order(self): cmlenz@160: text = '' cmlenz@160: events = list(XMLParser(StringIO(text))) cmlenz@160: kind, data, pos = events[0] cmlenz@160: self.assertEqual(Stream.START, kind) cmlenz@160: tag, attrib = data cmlenz@854: self.assertEqual('elem', tag) cmlenz@854: self.assertEqual(('title', 'baz'), attrib[0]) cmlenz@854: self.assertEqual(('id', 'foo'), attrib[1]) cmlenz@854: self.assertEqual(('class', 'bar'), attrib[2]) cmlenz@160: cmlenz@207: def test_unicode_input(self): cmlenz@207: text = u'
\u2013
' cmlenz@207: events = list(XMLParser(StringIO(text))) cmlenz@207: kind, data, pos = events[1] cmlenz@207: self.assertEqual(Stream.TEXT, kind) cmlenz@207: self.assertEqual(u'\u2013', data) cmlenz@207: cmlenz@316: def test_latin1_encoded(self): cmlenz@316: text = u'
\xf6
'.encode('iso-8859-1') cmlenz@316: events = list(XMLParser(StringIO(text), encoding='iso-8859-1')) cmlenz@316: kind, data, pos = events[1] cmlenz@316: self.assertEqual(Stream.TEXT, kind) cmlenz@316: self.assertEqual(u'\xf6', data) cmlenz@316: cmlenz@316: def test_latin1_encoded_xmldecl(self): cmlenz@316: text = u""" cmlenz@316:
\xf6
cmlenz@316: """.encode('iso-8859-1') cmlenz@316: events = list(XMLParser(StringIO(text))) cmlenz@460: kind, data, pos = events[2] cmlenz@316: self.assertEqual(Stream.TEXT, kind) cmlenz@316: self.assertEqual(u'\xf6', data) cmlenz@316: cmlenz@209: def test_html_entity_with_dtd(self): cmlenz@209: text = """ cmlenz@209:   cmlenz@209: """ cmlenz@209: events = list(XMLParser(StringIO(text))) cmlenz@209: kind, data, pos = events[2] cmlenz@209: self.assertEqual(Stream.TEXT, kind) cmlenz@209: self.assertEqual(u'\xa0', data) cmlenz@209: cmlenz@209: def test_html_entity_without_dtd(self): cmlenz@209: text = ' ' cmlenz@209: events = list(XMLParser(StringIO(text))) cmlenz@209: kind, data, pos = events[1] cmlenz@209: self.assertEqual(Stream.TEXT, kind) cmlenz@209: self.assertEqual(u'\xa0', data) cmlenz@209: cmlenz@293: def test_html_entity_in_attribute(self): cmlenz@293: text = '

' cmlenz@293: events = list(XMLParser(StringIO(text))) cmlenz@293: kind, data, pos = events[0] cmlenz@293: self.assertEqual(Stream.START, kind) cmlenz@293: self.assertEqual(u'\xa0', data[1].get('title')) cmlenz@293: kind, data, pos = events[1] cmlenz@293: self.assertEqual(Stream.END, kind) cmlenz@293: cmlenz@209: def test_undefined_entity_with_dtd(self): cmlenz@209: text = """ cmlenz@209: &junk; cmlenz@209: """ cmlenz@209: events = XMLParser(StringIO(text)) cmlenz@209: self.assertRaises(ParseError, list, events) cmlenz@209: cmlenz@209: def test_undefined_entity_without_dtd(self): cmlenz@209: text = '&junk;' cmlenz@209: events = XMLParser(StringIO(text)) cmlenz@209: self.assertRaises(ParseError, list, events) cmlenz@209: cmlenz@134: cmlenz@134: class HTMLParserTestCase(unittest.TestCase): cmlenz@134: cmlenz@134: def test_text_node_pos_single_line(self): cmlenz@134: text = 'foo bar' cmlenz@134: events = list(HTMLParser(StringIO(text))) cmlenz@134: kind, data, pos = events[1] cmlenz@134: self.assertEqual(Stream.TEXT, kind) cmlenz@854: self.assertEqual('foo bar', data) cmlenz@750: self.assertEqual((None, 1, 6), pos) cmlenz@134: cmlenz@134: def test_text_node_pos_multi_line(self): cmlenz@134: text = '''foo cmlenz@134: bar''' cmlenz@134: events = list(HTMLParser(StringIO(text))) cmlenz@134: kind, data, pos = events[1] cmlenz@134: self.assertEqual(Stream.TEXT, kind) cmlenz@854: self.assertEqual('foo\nbar', data) cmlenz@750: self.assertEqual((None, 1, 6), pos) cmlenz@1: cmlenz@312: def test_input_encoding_text(self): cmlenz@311: text = u'

\xf6
'.encode('iso-8859-1') cmlenz@311: events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) cmlenz@311: kind, data, pos = events[1] cmlenz@311: self.assertEqual(Stream.TEXT, kind) cmlenz@311: self.assertEqual(u'\xf6', data) cmlenz@311: cmlenz@312: def test_input_encoding_attribute(self): cmlenz@312: text = u'
'.encode('iso-8859-1') cmlenz@312: events = list(HTMLParser(StringIO(text), encoding='iso-8859-1')) cmlenz@312: kind, (tag, attrib), pos = events[0] cmlenz@312: self.assertEqual(Stream.START, kind) cmlenz@312: self.assertEqual(u'\xf6', attrib.get('title')) cmlenz@312: cmlenz@207: def test_unicode_input(self): cmlenz@207: text = u'
\u2013
' cmlenz@207: events = list(HTMLParser(StringIO(text))) cmlenz@207: kind, data, pos = events[1] cmlenz@207: self.assertEqual(Stream.TEXT, kind) cmlenz@207: self.assertEqual(u'\u2013', data) cmlenz@207: cmlenz@293: def test_html_entity_in_attribute(self): cmlenz@293: text = '

' cmlenz@293: events = list(HTMLParser(StringIO(text))) cmlenz@293: kind, data, pos = events[0] cmlenz@293: self.assertEqual(Stream.START, kind) cmlenz@293: self.assertEqual(u'\xa0', data[1].get('title')) cmlenz@293: kind, data, pos = events[1] cmlenz@293: self.assertEqual(Stream.END, kind) cmlenz@293: cmlenz@293: def test_html_entity_in_text(self): cmlenz@293: text = '

 

' cmlenz@293: events = list(HTMLParser(StringIO(text))) cmlenz@293: kind, data, pos = events[1] cmlenz@293: self.assertEqual(Stream.TEXT, kind) cmlenz@293: self.assertEqual(u'\xa0', data) cmlenz@293: cmlenz@376: def test_processing_instruction(self): cmlenz@376: text = '' cmlenz@376: events = list(HTMLParser(StringIO(text))) cmlenz@376: kind, (target, data), pos = events[0] cmlenz@376: self.assertEqual(Stream.PI, kind) cmlenz@854: self.assertEqual('php', target) cmlenz@854: self.assertEqual('echo "Foobar"', data) cmlenz@376: hodgestar@999: def test_processing_instruction_no_data_1(self): hodgestar@999: text = u'' hodgestar@999: events = list(HTMLParser(StringIO(text))) hodgestar@999: kind, (target, data), pos = events[0] hodgestar@999: self.assertEqual(Stream.PI, kind) hodgestar@999: self.assertEqual('foo', target) hodgestar@999: self.assertEqual('', data) hodgestar@999: hodgestar@999: def test_processing_instruction_no_data_2(self): hodgestar@999: text = u'...' hodgestar@999: events = list(HTMLParser(StringIO(text))) hodgestar@999: kind, (target, data), pos = events[0] hodgestar@999: self.assertEqual(Stream.PI, kind) hodgestar@999: self.assertEqual('experiment', target) hodgestar@999: self.assertEqual('', data) hodgestar@999: kind, (target, data), pos = events[2] hodgestar@999: self.assertEqual('/experiment', target) hodgestar@999: self.assertEqual('', data) hodgestar@999: cmlenz@460: def test_xmldecl(self): cmlenz@460: text = '' cmlenz@460: events = list(XMLParser(StringIO(text))) cmlenz@460: kind, (version, encoding, standalone), pos = events[0] cmlenz@460: self.assertEqual(Stream.XML_DECL, kind) cmlenz@854: self.assertEqual('1.0', version) cmlenz@460: self.assertEqual(None, encoding) cmlenz@460: self.assertEqual(-1, standalone) cmlenz@460: cmlenz@460: def test_xmldecl_encoding(self): cmlenz@460: text = '' cmlenz@460: events = list(XMLParser(StringIO(text))) cmlenz@460: kind, (version, encoding, standalone), pos = events[0] cmlenz@460: self.assertEqual(Stream.XML_DECL, kind) cmlenz@854: self.assertEqual('1.0', version) cmlenz@854: self.assertEqual('utf-8', encoding) cmlenz@460: self.assertEqual(-1, standalone) cmlenz@460: cmlenz@460: def test_xmldecl_standalone(self): cmlenz@460: text = '' cmlenz@460: events = list(XMLParser(StringIO(text))) cmlenz@460: kind, (version, encoding, standalone), pos = events[0] cmlenz@460: self.assertEqual(Stream.XML_DECL, kind) cmlenz@854: self.assertEqual('1.0', version) cmlenz@460: self.assertEqual(None, encoding) cmlenz@460: self.assertEqual(1, standalone) cmlenz@460: cmlenz@376: def test_processing_instruction_trailing_qmark(self): cmlenz@376: text = '' cmlenz@376: events = list(HTMLParser(StringIO(text))) cmlenz@376: kind, (target, data), pos = events[0] cmlenz@376: self.assertEqual(Stream.PI, kind) cmlenz@854: self.assertEqual('php', target) cmlenz@854: self.assertEqual('echo "Foobar" ?', data) cmlenz@376: cmlenz@378: def test_out_of_order_tags1(self): cmlenz@378: text = 'Foobar' cmlenz@378: events = list(HTMLParser(StringIO(text))) cmlenz@378: self.assertEqual(5, len(events)) cmlenz@378: self.assertEqual((Stream.START, ('span', ())), events[0][:2]) cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2]) cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) cmlenz@378: self.assertEqual((Stream.END, 'b'), events[3][:2]) cmlenz@378: self.assertEqual((Stream.END, 'span'), events[4][:2]) cmlenz@378: cmlenz@378: def test_out_of_order_tags2(self): cmlenz@378: text = 'Foobar' cmlenz@378: events = list(HTMLParser(StringIO(text))) cmlenz@378: self.assertEqual(7, len(events)) cmlenz@378: self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))), cmlenz@378: events[0][:2]) cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2]) cmlenz@378: self.assertEqual((Stream.START, ('i', ())), events[2][:2]) cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[3][:2]) cmlenz@378: self.assertEqual((Stream.END, 'i'), events[4][:2]) cmlenz@378: self.assertEqual((Stream.END, 'b'), events[5][:2]) cmlenz@378: self.assertEqual((Stream.END, 'span'), events[6][:2]) cmlenz@378: cmlenz@383: def test_out_of_order_tags3(self): cmlenz@378: text = 'Foobar' cmlenz@378: events = list(HTMLParser(StringIO(text))) cmlenz@378: self.assertEqual(5, len(events)) cmlenz@378: self.assertEqual((Stream.START, ('span', ())), events[0][:2]) cmlenz@378: self.assertEqual((Stream.START, ('b', ())), events[1][:2]) cmlenz@378: self.assertEqual((Stream.TEXT, 'Foobar'), events[2][:2]) cmlenz@378: self.assertEqual((Stream.END, 'b'), events[3][:2]) cmlenz@378: self.assertEqual((Stream.END, 'span'), events[4][:2]) cmlenz@378: cmlenz@423: def test_hex_charref(self): cmlenz@423: text = ''' cmlenz@423: events = list(HTMLParser(StringIO(text))) cmlenz@423: self.assertEqual(3, len(events)) cmlenz@423: self.assertEqual((Stream.START, ('span', ())), events[0][:2]) cmlenz@423: self.assertEqual((Stream.TEXT, "'"), events[1][:2]) cmlenz@423: self.assertEqual((Stream.END, 'span'), events[2][:2]) cmlenz@423: cmlenz@1: cmlenz@1: def suite(): cmlenz@1: suite = unittest.TestSuite() cmlenz@141: suite.addTest(doctest.DocTestSuite(XMLParser.__module__)) cmlenz@134: suite.addTest(unittest.makeSuite(XMLParserTestCase, 'test')) cmlenz@134: suite.addTest(unittest.makeSuite(HTMLParserTestCase, 'test')) cmlenz@1: return suite cmlenz@1: cmlenz@1: if __name__ == '__main__': cmlenz@1: unittest.main(defaultTest='suite')