# HG changeset patch # User zzzeek # Date 1163034706 0 # Node ID 6cc2eabf73b8af136872a3171430bcc02afcab14 # Parent 2fdf34945dfd499ddab90ec09a9db7014e602a2f - using QName/Attrs now, is faster if match templates are being used, only slightly slower if not - got XMLSerializeFilter working. Filters also are stateful for now since they are used multiple times for the same module during generation diff --git a/genshi/codegen/adapters.py b/genshi/codegen/adapters.py --- a/genshi/codegen/adapters.py +++ b/genshi/codegen/adapters.py @@ -46,37 +46,12 @@ def __unicode__(self): return self.render(encoding=None) -class InlineEvent(object): - __eventtypes__ = {} - def __new__(cls, event): - return object.__new__(InlineEvent.__eventtypes__.get(event[0], InlineEvent), event) - def __init__(self, event): - self.event = event - def to_genshi(self): - return self.event[0:3] - -class InlineStartEvent(InlineEvent): - def to_genshi(self): - return (self.event[0], (InlineQName(self.event), Attrs(self.event[1][2])), self.event[2]) -InlineEvent.__eventtypes__[template.START] = InlineStartEvent - -class InlineQName(unicode): - """creates a QName-like object from a START event""" - def __new__(cls, event): - if event[1][0] is not None: - self = unicode.__new__(cls, u'{%s}%s' % (event[1][0], event[1][1])) - else: - self = unicode.__new__(cls, u'%s' % (event[1][1])) - self.namespace = event[1][0] - self.localname = event[1][1] - return self - class InlinePath(Path): """overrides Path.test to adapt incoming events from inlined to Genshi.""" def test(self, ignore_context=False): t = super(InlinePath, self).test(ignore_context=ignore_context) def _test(event, namespaces, variables, updateonly=False): - return t(InlineEvent(event).to_genshi(), namespaces, variables, updateonly=updateonly) + return t(event[0:3], namespaces, variables, updateonly=updateonly) return _test def select(self, stream, namespaces=None, variables=None): if namespaces is None: @@ -104,9 +79,8 @@ elif isinstance(result, tuple): yield event elif result: - # in genshi.path.Path, this could be an Attrs or a 3-tupled event. - # here, we only want Attrs to come out. - yield result + # scalar result, return an event + yield template.TEXT, unicode(result), (None, -1, -1), unicode(result) return InlineStream(_generate()) def __repr__(self): diff --git a/genshi/codegen/generator.py b/genshi/codegen/generator.py --- a/genshi/codegen/generator.py +++ b/genshi/codegen/generator.py @@ -20,6 +20,8 @@ from genshi.codegen import serialize, adapters, output, interp from compiler import ast, parse, visitor import sets, re +from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ + END_CDATA, PI, COMMENT, XML_NAMESPACE _directive_printers = {} @@ -112,7 +114,7 @@ class Generator(object): """given a Template, generates Python modules (as strings or code objects) optimized to a particular Serializer.""" - def __init__(self, template, method='html', serializer=None, strip_whitespace=False, compress_empty=False, filters=None): + def __init__(self, template, method='xml', serializer=None, strip_whitespace=False, compress_empty=False, filters=None): self.template = template self.serializer = serializer or ({ 'xml': serialize.XMLSerializeFilter, @@ -227,16 +229,16 @@ for d in directives: for evt in self.produce_directive(d, event, substream): yield evt - elif kind is template.START: + elif kind is START: for evt in self.produce_start_event(event): yield evt - elif kind is template.END: + elif kind is END: for evt in self.produce_end_event(event): yield evt elif kind is template.EXPR: for evt in self.produce_expr_event(event): yield evt - elif kind is template.TEXT: + elif kind is TEXT: for evt in self.produce_text_event(event): yield evt elif kind is template.START_NS: @@ -245,9 +247,12 @@ elif kind is template.END_NS: for evt in self.produce_end_ns_event(event): yield evt + elif kind is DOCTYPE: + for evt in self.produce_doctype_event(event): + yield evt def produce_preamble(self): for line in [ - "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream", + "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream, QName, Attrs", "from genshi.template import Context, Template", "from genshi.path import Path", "from genshi.codegen import interp, adapters", @@ -265,9 +270,8 @@ yield (PYTHON_LINE, "") def produce_start_event(self, event): qn = QName(event[1][0]) - yield (PYTHON_LINE, "yield (START, (%s, %s, %s), %s, %s)" % ( - repr(qn.namespace), - repr(qn.localname), + yield (PYTHON_LINE, "yield (START, (QName(%s), Attrs(%s)), %s, %s)" % ( + repr(qn), repr(event[1][1]), repr(event[2]), repr(event[3])) @@ -294,6 +298,12 @@ repr(event[2]), repr(unicode(event[3])) )) + def produce_doctype_event(self, event): + yield (PYTHON_LINE, "yield (DOCTYPE, (%s), %s, %s)" % ( + repr(unicode(event[1])), + repr(event[2]), + repr(unicode(event[3])) + )) def produce_start_ns_event(self, event): yield (PYTHON_LINE, "yield (START_NS, (%s), %s, %s)" % ( @@ -303,7 +313,7 @@ )) yield (PYTHON_LINE, "_namespaces[%s] = %s" % (repr(event[1][0]), repr(unicode(event[1][1])))) def produce_end_ns_event(self, event): - yield (PYTHON_LINE, "del _namespaces[%s]" % (repr(event[1]))) + #yield (PYTHON_LINE, "del _namespaces[%s]" % (repr(event[1]))) yield (PYTHON_LINE, "yield (START_NS, (%s), %s, %s)" % ( repr(unicode(event[1])), repr(event[2]), diff --git a/genshi/codegen/interp.py b/genshi/codegen/interp.py --- a/genshi/codegen/interp.py +++ b/genshi/codegen/interp.py @@ -10,7 +10,6 @@ import sets from itertools import chain - # we re-implement our own _match function, based on MarkupTemplate._match. def _match(stream, ctxt, match_templates=None): """match method from MarkupTemplate, modified to handle inlined stream of events. @@ -41,8 +40,6 @@ event[0] is not END): yield event continue - # no need for a sub-list of directives (nor _apply_directives function) since inlined code - # expands all nesting explicitly (TODO: is this really true ?) for idx, (test, path, template, namespaces) in \ enumerate(match_templates): @@ -52,9 +49,7 @@ content = chain([event], _match(_strip(stream), ctxt), tail) - # TODO: not sure if extra list of filters is needed - #for filter_ in self.filters[3:]: - # content = filter_(content, ctxt) + content = list(content) for test in [mt[0] for mt in match_templates]: @@ -63,31 +58,18 @@ def select(path): return adapters.InlinePath(path).select(Stream(content), namespaces, ctxt) - # similarly, no need for _eval (eval is inlined) as well as _flatten (inlined code already "flattened") for event in _match(template(select), ctxt, match_templates[:idx] + match_templates[idx + 1:]): yield event break else: yield event -# TODO: this adds too much overhead -def _ensure(stream): - """Ensure that every item on the stream is actually an inline event.""" - for event in stream: - if type(event) is not tuple: - if hasattr(event, 'totuple'): - event = event.totuple() - else: - event = TEXT, unicode(event), (None, -1, -1), unicode(event) - yield event - def evaluate(result, pos): if result is not None: if isinstance(result, basestring): yield TEXT, result, pos, result elif hasattr(result, '__iter__'): - substream = _ensure(result) - for event in substream: + for event in result: yield event else: yield TEXT, unicode(result), pos, result diff --git a/genshi/codegen/output.py b/genshi/codegen/output.py --- a/genshi/codegen/output.py +++ b/genshi/codegen/output.py @@ -84,8 +84,8 @@ yield TEXT, Markup(text), pos, unicode(text) if kind is START: - namespace, localname, attrib = data - tag = (namespace, localname) + qname, attrib = data + tag = (qname.namespace, qname.localname) if not preserve and (tag in preserve_elems or adapters.get_attrib(attrib, space) == 'preserve'): preserve = True diff --git a/genshi/codegen/serialize.py b/genshi/codegen/serialize.py --- a/genshi/codegen/serialize.py +++ b/genshi/codegen/serialize.py @@ -18,6 +18,8 @@ While this module is a severe transgression of DRY, reusing the output-specific logic from the genshi.output module would require de-optimizing the base genshi.output implementations. + +The Filters are also stateful and must be created per-generator. """ from itertools import chain @@ -52,10 +54,94 @@ self.preamble = [] if doctype: self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) - # TODO: fold empty tags ? + self.ns_attrib = [] + self.ns_mapping = {XML_NAMESPACE.uri: 'xml'} + self.have_doctype = False + self.in_cdata = False def __call__(self, stream): - raise "TODO" + stream = chain(self.preamble, stream) + for kind, data, pos in stream: + if kind is START: + tag, attrib = data + tagname = tag.localname + namespace = tag.namespace + if namespace: + if namespace in self.ns_mapping: + prefix = self.ns_mapping[namespace] + if prefix: + tagname = '%s:%s' % (prefix, tagname) + else: + self.ns_attrib.append((QName('xmlns'), namespace)) + buf = ['<', tagname] + + for attr, value in attrib + self.ns_attrib: + attrname = attr.localname + if attr.namespace: + prefix = ns_mapping.get(attr.namespace) + if prefix: + attrname = '%s:%s' % (prefix, attrname) + buf += [' ', attrname, '="', escape(value), '"'] + self.ns_attrib = [] + + buf += ['>'] + yield kind, data, pos, u''.join(buf) + + elif kind is END: + tag = data + tagname = tag.localname + if tag.namespace: + prefix = self.ns_mapping.get(tag.namespace) + if prefix: + tagname = '%s:%s' % (prefix, tag.localname) + yield kind, data, pos, u'%s>' % tagname + + elif kind is TEXT: + if self.in_cdata: + yield kind, data, pos, data + else: + yield kind, data, pos, escape(data, quotes=False) + + elif kind is COMMENT: + yield kind, data, pos, u'' % data + + elif kind is DOCTYPE and not self.have_doctype: + name, pubid, sysid = data + buf = ['\n'] + yield kind, data, pos, unicode(Markup(''.join(buf), *filter(None, data))) + self.have_doctype = True + + elif kind is START_NS: + prefix, uri = data + if uri not in self.ns_mapping: + self.ns_mapping[uri] = prefix + if not prefix: + self.ns_attrib.append((QName('xmlns'), uri)) + else: + self.ns_attrib.append((QName('xmlns:%s' % prefix), uri)) + yield kind, data, pos, None + + elif kind is START_CDATA: + yield kind, data, pos, u'' + self.in_cdata = False + + elif kind is PI: + yield kind, data, pos, u'%s %s?>' % data + else: + # all other events pass-thru + yield kind, data, pos, None + class XHTMLSerializeFilter(XMLSerializeFilter): """Delivers the given stream with additional XHTML text added to outgoing events. @@ -108,30 +194,32 @@ for kind, data, pos in stream: if kind is START: tag, attrib = data - if not tag.namespace or tag in namespace: - tagname = tag.localname - buf = ['<', tagname] + tagname = tag.localname + buf = ['<', tagname] - for attr, value in attrib: - attrname = attr.localname - if not attr.namespace or attr in namespace: - if attrname in boolean_attrs: - if value: - buf += [' ', attrname] - else: - buf += [' ', attrname, '="', escape(value), '"'] + for attr, value in attrib: + attrname = attr.localname + if not attr.namespace or attr in namespace: + if attrname in boolean_attrs: + if value: + buf += [' ', attrname] + else: + buf += [' ', attrname, '="', escape(value), '"'] - buf += ['>'] + buf += ['>'] + if tag.namespace and tag not in namespace: + yield kind, data, pos, u'' + else: yield kind, data, pos, u''.join(buf) - if tagname in noescape_elems: noescape = True elif kind is END: if not data.namespace or data in namespace: yield kind, data, pos, u'%s>' % data.localname - + else: + yield kind, data, pos, u'' noescape = False elif kind is TEXT: diff --git a/genshi/codegen/tests/template.py b/genshi/codegen/tests/template.py --- a/genshi/codegen/tests/template.py +++ b/genshi/codegen/tests/template.py @@ -13,7 +13,7 @@ class MarkupTemplateAdapter(object): def __init__(self, text): self.generator = Generator(RealMarkupTemplate(text), strip_whitespace=True, compress_empty=True) - #print u''.join(self.generator._generate_code_events()) + print u''.join(self.generator._generate_code_events()) def generate(self, *args, **kwargs): return self.generator.generate(*args, **kwargs) diff --git a/genshi/codegen/tests/test_generator.py b/genshi/codegen/tests/test_generator.py --- a/genshi/codegen/tests/test_generator.py +++ b/genshi/codegen/tests/test_generator.py @@ -12,9 +12,8 @@ # history and logs, available at http://genshi.edgewall.org/log/. from genshi.template import MarkupTemplate, Template, Context -from genshi.output import HTMLSerializer +from genshi.output import XMLSerializer from genshi.codegen import generator, interp -from genshi.codegen.serialize import HTMLSerializeFilter import time, sys text = """
- -