# HG changeset patch # User zzzeek # Date 1162760043 0 # Node ID d81532240de74d14e16dad959ea592852a9bda44 # Parent f013a84050aca523f88fc2c2e1fe5197cfe42fa0 inlined generation some more, with the adapters module converting events back to Genshi-style when needed. added PostWhitespaceFilter as proof of concept (greatly slows down execution) diff --git a/genshi/codegen/adapters.py b/genshi/codegen/adapters.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/adapters.py @@ -0,0 +1,69 @@ +"""a set of Inline adapters, which convert from inlined structures to Genshi core structures""" + +from genshi.path import Path + +def get_attrib(attrib, name, default=None): + """return an 'attribute' name from a list of tuples, similar to genshi.core.Attrib""" + for attr, value in attrib: + if attr == name: + return value + return default + +class InlineStream(object): + """works similarly to genshi.core.Stream""" + def __init__(self, generator, context): + self.code = generator.code + self.filters = generator.filters + self.context = context + self.stream = self.code.go(self.context) + def __iter__(self): + return list(self.stream) + + def __or__(self, function): + return InlineStream(function(self)) + + def filter(self, *filters): + return reduce(operator.or_, (self,) + filters) + + def render(self, method='xml', encoding='utf-8', **kwargs): + generator = self.serialize(method=method, **kwargs) + output = u''.join(list(generator)) + if encoding is not None: + errors = 'replace' + if method != 'text': + errors = 'xmlcharrefreplace' + return output.encode(encoding, errors) + return output + + def select(self, path, namespaces=None, variables=None): + return InlinedPath(path).select(self, namespaces, variables) + + def serialize(self, method='xml', **kwargs): + stream = self.stream + for filter_ in self.filters: + stream = filter_(stream) + for evt in stream: + yield evt[3] + + def __str__(self): + return self.render() + + def __unicode__(self): + return self.render(encoding=None) + +class InlineQName(object): + """creates a QName-like object from a START event""" + def __init__(self, event): + self.namespace = event[1][0] + self.localname = event[1][1] + +class InlinedPath(Path): + """overrides Path.test to adapt incoming events from inlined to Genshi.""" + def test(self, ignore_context=False): + t = super(InlinedPath, self).test(ignore_context=ignore_context) + def _test(event, namespaces, variables, updateonly=False): + if event[0] is START: + return t((event[0], (InlineQName(event), event[1][1]), event[2]), namespaces, variables, updateonly=updateonly) + else: + return t(event[0:3], namespaces, variables, updateonly=updateonly) + return _test diff --git a/genshi/codegen/generator.py b/genshi/codegen/generator.py --- a/genshi/codegen/generator.py +++ b/genshi/codegen/generator.py @@ -13,14 +13,17 @@ from genshi import template -from genshi.template import Template +from genshi.template import Template, Context +from genshi.path import Path +from genshi.core import QName from genshi.codegen.printer import PythonPrinter, PYTHON_LINE, PYTHON_COMMENT, PYTHON_BLOCK +from genshi.codegen import serialize, adapters, output from compiler import ast, parse, visitor import sets, re _directive_printers = {} -def ident_from_assign(assign): +def _ident_from_assign(assign): # a little trick to get the variable name from the already # compiled assignment expression x = {} @@ -40,15 +43,15 @@ class ForDirectivePrinter(DirectivePrinter): __directive__ = template.ForDirective def produce_directive(self, gencontext, directive, event, substream): - varname = ident_from_assign(directive.assign) + varname = _ident_from_assign(directive.assign) yield (PYTHON_LINE, "for %s in %s:" % (varname, directive.expr.source)) for evt in gencontext.gen_stream(substream): yield evt yield (PYTHON_LINE, "") def declared_identifiers(self, gencontext, directive, event): - return [ident_from_assign(directive.assign)] + return [_ident_from_assign(directive.assign)] def undeclared_identifiers(self, gencontext, directive, event): - s = SearchIdents(directive.expr.source) + s = _SearchIdents(directive.expr.source) return list(s.identifiers) ForDirectivePrinter() @@ -85,33 +88,52 @@ def undeclared_identifiers(self, gencontext, directive, event): result = sets.Set() for expr in directive.defaults.values(): - s = SearchIdents(expr.node) + s = _SearchIdents(expr.node) result = result.union(s.identifiers) return iter(result) - DefDirectivePrinter() - class Generator(object): """given a Template, generates Python modules (as strings or code objects) optimized to a particular Serializer.""" - def __init__(self, template): + def __init__(self, template, method='html', serializer=None, strip_whitespace=False, filters=None): self.template = template - def generate_stream(self, serializer): + self.serializer = serializer or ({ + 'xml': serialize.XMLSerializeFilter, + 'xhtml': serialize.XHTMLSerializeFilter, + 'html': serialize.HTMLSerializeFilter, + 'text': serialize.TextSerializeFilter}[method]()) + self.code = self._generate_module() + self.filters = filters or [] + if strip_whitespace: + self.filters.append(output.PostWhitespaceFilter()) + def generate(self, *args, **kwargs): + if args: + assert len(args) == 1 + ctxt = args[0] + if ctxt is None: + ctxt = Context(**kwargs) + assert isinstance(ctxt, Context) + else: + ctxt = Context(**kwargs) + + return adapters.InlineStream(self, ctxt) + + def _generate_code_events(self): return PythonPrinter( PythonGenerator( - self.template.stream, serializer + self.template.stream, self.serializer ).generate() ).generate() - def generate_module(self, serializer): + def _generate_module(self): import imp module = imp.new_module("_some_ident") - pycode = u''.join(self.generate_stream(serializer)) + pycode = u''.join(self._generate_code_events()) code = compile(pycode, '', 'exec') exec code in module.__dict__, module.__dict__ return module - -class SearchIdents(visitor.ASTVisitor): + +class _SearchIdents(visitor.ASTVisitor): """an ASTVisitor that can locate identifier names in a string-based code block. This is not used in this example module, but will be used to locate and pre-declare @@ -145,7 +167,7 @@ """locate undeclared python identifiers in the given stream. stack is an empty set.""" for evt in stream: if evt[0] is template.EXPR: - s = SearchIdents(evt[1].source) + s = _SearchIdents(evt[1].source) for ident in s.identifiers.difference(stack): yield (PYTHON_LINE, "%s = context.get('%s', None)" % (ident, ident)) elif evt[0] is template.SUB: @@ -162,7 +184,6 @@ def gen_stream(self, stream): for event in self.serializer(stream): (kind, data, pos, literal) = event - #print "INCOMING:", event if kind is template.SUB: directives, substream = event[1] for d in directives: @@ -182,7 +203,7 @@ yield evt def produce_preamble(self): for line in [ - "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, QName, Stream", + "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, Stream", "from genshi.template import Context, Template", "from genshi.path import Path", "from genshi.codegen import interp", @@ -203,8 +224,10 @@ def end(self): yield (PYTHON_LINE, "") def produce_start_event(self, event): - yield (PYTHON_LINE, "yield (START, (QName(%s), %s), %s, %s)" % ( - repr(event[1][0]), + qn = QName(event[1][0]) + yield (PYTHON_LINE, "yield (START, (%s, %s, %s), %s, %s)" % ( + repr(qn.namespace), + repr(qn.localname), repr(event[1][1]), repr(event[2]), repr(event[3])) diff --git a/genshi/codegen/output.py b/genshi/codegen/output.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/output.py @@ -0,0 +1,83 @@ +from itertools import chain +try: + frozenset +except NameError: + from sets import ImmutableSet as frozenset +import re + +from genshi.codegen import adapters +from genshi.core import escape, Markup, Namespace, QName, StreamEventKind +from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ + END_CDATA, PI, COMMENT, XML_NAMESPACE + + +class PostWhitespaceFilter(object): + """A filter that removes extraneous ignorable white space from the + stream.""" + + def __init__(self, preserve=None, noescape=None): + """Initialize the filter. + + @param preserve: a set or sequence of tag names for which white-space + should be ignored. + @param noescape: a set or sequence of tag names for which text content + should not be escaped + + Both the `preserve` and `noescape` sets are expected to refer to + elements that cannot contain further child elements. + """ + if preserve is None: + preserve = [] + self.preserve = frozenset(preserve) + if noescape is None: + noescape = [] + self.noescape = frozenset(noescape) + + def __call__(self, stream, space=XML_NAMESPACE['space'], + trim_trailing_space=re.compile('[ \t]+(?=\n)').sub, + collapse_lines=re.compile('\n{2,}').sub): + mjoin = Markup('').join + preserve_elems = self.preserve + preserve = False + noescape_elems = self.noescape + noescape = False + + textbuf = [] + push_text = textbuf.append + pop_text = textbuf.pop + for kind, data, pos, literal in chain(stream, [(None, None, None, None)]): + if kind is TEXT: + if noescape: + data = Markup(data) + push_text(data) + else: + if textbuf: + if len(textbuf) > 1: + text = mjoin(textbuf, escape_quotes=False) + del textbuf[:] + else: + text = escape(pop_text(), quotes=False) + if not preserve: + text = collapse_lines('\n', trim_trailing_space('', text)) + yield TEXT, Markup(text), pos, unicode(text) + + if kind is START: + namespace, localname, attrib = data + tag = (namespace, localname) + if not preserve and (tag in preserve_elems or + adapters.get_attrib(attrib, space) == 'preserve'): + preserve = True + if not noescape and tag in noescape_elems: + noescape = True + + elif kind is END: + preserve = noescape = False + + elif kind is START_CDATA: + noescape = True + + elif kind is END_CDATA: + noescape = False + + if kind: + yield kind, data, pos, literal diff --git a/genshi/codegen/serialize.py b/genshi/codegen/serialize.py --- a/genshi/codegen/serialize.py +++ b/genshi/codegen/serialize.py @@ -30,7 +30,7 @@ from genshi.core import escape, Markup, Namespace, QName, StreamEventKind from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ END_CDATA, PI, COMMENT, XML_NAMESPACE -from genshi.output import DocType, WhitespaceFilter +from genshi.output import DocType __all__ = ['XMLSerializeFilter', 'XHTMLSerializeFilter', 'HTMLSerializeFilter'] @@ -38,10 +38,9 @@ """Delivers the given stream with additional XML text added to outgoing events. """ - _PRESERVE_SPACE = frozenset() - def __init__(self, doctype=None, strip_whitespace=True): + def __init__(self, doctype=None): """Initialize the XML serialize filter. @param doctype: a `(name, pubid, sysid)` tuple that represents the @@ -54,10 +53,6 @@ if doctype: self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) # TODO: fold empty tags ? - self.filters = [] - if strip_whitespace: - # TODO: can we process whitespace before a template is executed with a Context ? - self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) def __call__(self, stream): raise "TODO" @@ -89,7 +84,7 @@ QName('style'), QName('http://www.w3.org/1999/xhtml}style')]) - def __init__(self, doctype=None, strip_whitespace=True): + def __init__(self, doctype=None): """Initialize the HTML serialize filter. @param doctype: a `(name, pubid, sysid)` tuple that represents the @@ -98,11 +93,8 @@ @param strip_whitespace: whether extraneous whitespace should be stripped from the output """ - super(HTMLSerializeFilter, self).__init__(doctype, False) - if strip_whitespace: - self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, - self._NOESCAPE_ELEMS)) - + super(HTMLSerializeFilter, self).__init__(doctype) + def __call__(self, stream): namespace = self.NAMESPACE ns_mapping = {} @@ -113,8 +105,6 @@ noescape = False stream = chain(self.preamble, stream) - for filter_ in self.filters: - stream = filter_(stream) for kind, data, pos in stream: if kind is START: tag, attrib = data @@ -174,3 +164,6 @@ else: # all other events pass-thru yield kind, data, pos, None + +class TextSerializeFilter(object): + pass diff --git a/genshi/codegen/tests/template.py b/genshi/codegen/tests/template.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/tests/template.py @@ -0,0 +1,30 @@ +from genshi.tests import template +from genshi.template import MarkupTemplate as RealMarkupTemplate +from genshi.codegen.generator import Generator +from genshi.codegen.serialize import HTMLSerializeFilter + + +import unittest + +# original template unittest does this: +# tmpl = MarkupTemplate(text) +# result = str(tmpl.generate(items=items))) + +class MarkupTemplateAdapter(object): + def __init__(self, text): + self.generator = Generator(RealMarkupTemplate(text), strip_whitespace=True) + def generate(self, *args, **kwargs): + return self.generator.generate(*args, **kwargs) + + +template.MarkupTemplate = MarkupTemplateAdapter + +from genshi.tests.template import * + + + +def suite(): + return template.suite() + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/genshi/codegen/tests/test_generator.py b/genshi/codegen/tests/test_generator.py --- a/genshi/codegen/tests/test_generator.py +++ b/genshi/codegen/tests/test_generator.py @@ -52,30 +52,36 @@ def items(): return ["one", "two", "three"] -data = {'lala':'hi', 'items':items, 'foo':['f1', 'f2', 'f3']} +data = {'lala':'hi', 'items':lambda:["one", "two", "three"], 'foo':['f1', 'f2', 'f3']} t = MarkupTemplate(text) -print u''.join(HTMLSerializer()(t.generate(**data))) +print t.generate(**data).render() g = generator.Generator(t) -pycode = u''.join(g.generate_stream(HTMLSerializeFilter())) +pycode = u''.join(g._generate_code_events()) print pycode -g = generator.Generator(t) -module = g.generate_module(HTMLSerializeFilter()) -print u''.join(interp.run_inlined(module, data)) +print str(g.generate(**data)) print "Running MarkupTemplate.generate()/HTMLSerializer..." now = time.time() for x in range(1,1000): stream = t.generate(**data) - serializer = HTMLSerializer() - list(serializer(stream)) + stream.render() print "MarkupTemplate.generate()/HTMLSerializer totaltime: %f" % (time.time() - now) # inline print "Running inlined module..." now = time.time() for x in range(1,1000): - list(interp.run_inlined(module, data)) + str(g.generate(**data)) print "Inlined module totaltime: %f" % (time.time() - now) + +# inline with whitespace filter +print "Running inlined module..." +g = generator.Generator(t, strip_whitespace=True) +now = time.time() +for x in range(1,1000): + str(g.generate(**data)) + +print "Inlined module w/ strip_whitespace totaltime: %f" % (time.time() - now)