# HG changeset patch # User zzzeek # Date 1162318415 0 # Node ID d60a60ba4224711694399cb5c1cda46d02b55706 # Parent 039b21d6f8f4347f1a7681f612dba661bf7d0986 - created 'codegen' package - adapted PythonPrinter from Myghty - created initial Generator/SerializeFilter framework + quickie template runner diff --git a/genshi/codegen/__init__.py b/genshi/codegen/__init__.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006 Edgewall Software and Michael Bayer +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + diff --git a/genshi/codegen/generator.py b/genshi/codegen/generator.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/generator.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006 Edgewall Software and Michael Bayer +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + + +from genshi import template +from genshi.template import Template +from genshi.codegen.printer import PythonPrinter, PYTHON_LINE, PYTHON_COMMENT, PYTHON_BLOCK + +_directive_printers = {} + +class DirectivePrinter(object): + def __init__(self): + _directive_printers[self.__directive__] = self + def start_directive(self, gencontext, directive): + pass + def end_directive(self, gencontext, directive): + pass + +class ForDirectivePrinter(DirectivePrinter): + __directive__ = template.ForDirective + def start_directive(self, gencontext, directive): + x = {} + directive.assign(x, None) + varname = list(x)[0] + yield (PYTHON_LINE, "for %s in %s:" % (varname, directive.expr.source)) + def end_directive(self, gencontext, directive): + yield (PYTHON_LINE, "") +ForDirectivePrinter() + +class Generator(object): + """given a Template, generates Python modules (as strings or code objects) + optimized to a particular Serializer.""" + def __init__(self, template): + self.template = template + def generate(self, serializer): + return PythonPrinter( + PythonGenerator( + self.template.stream, serializer + ).generate() + ).generate() + +class PythonGenerator(object): + def __init__(self, stream, serializer): + self.stream = stream + self.serializer = serializer + def generate(self): + for evt in self.start(): + yield evt + for evt in self.gen_stream(self.stream): + yield evt + for evt in self.end(): + yield evt + + def gen_stream(self, stream): + for event in self.serializer(stream): + (kind, data, pos, literal) = event + if kind is template.SUB: + directives, substream = event[1] + for d in directives: + for evt in self.produce_directive_start(d): + yield evt + for evt in self.gen_stream(substream): + yield evt + for d in directives: + for evt in self.produce_directive_end(d): + yield evt + elif kind is template.START: + for evt in self.produce_start_event(event): + yield evt + elif kind is template.END: + for evt in self.produce_end_event(event): + yield evt + def produce_preamble(self): + for line in [ + "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, QName, Stream", + "from genshi.template import Context", + "from genshi.path import Path" + ]: + yield (PYTHON_LINE, line) + + def produce_directive_start(self, directive): + for evt in _directive_printers[directive.__class__].start_directive(self, directive): + yield evt + def produce_directive_end(self, directive): + for evt in _directive_printers[directive.__class__].end_directive(self, directive): + yield evt + def start(self): + for evt in self.produce_preamble(): + yield evt + yield (PYTHON_LINE, "def go(context):") + def end(self): + yield (PYTHON_LINE, "") + def produce_start_event(self, event): + yield (PYTHON_LINE, "yield (START, (Qname(%s), %s), %s, %s)" % ( + repr(event[1][0]), + repr(event[1][1]), + repr(event[2]), + repr(event[3])) + ) + def produce_end_event(self, event): + yield (PYTHON_LINE, "yield (END, (%s), %s, %s)" % ( + repr(event[1]), + repr(event[2]), + repr(event[3])) + ) + + + diff --git a/genshi/codegen/printer.py b/genshi/codegen/printer.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/printer.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006 Edgewall Software and Michael Bayer +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +import re + +PYTHON_LINE = "line" +PYTHON_COMMENT = "comment" +PYTHON_BLOCK = "block" + +class PythonPrinter(object): + """prints Python code, keeping track of indentation level. + + Adapted from PythonPrinter in Myghty; also uses stream-based operation. The Myghty version of this is + more complicated; among other things, it includes a 'block' version useful + for properly indenting user-included blocks of Python. When Genshi gets the + tag we will want to revisit this output mode.""" + def __init__(self, stream): + # the indentation counter + self.indent = 0 + + # a stack storing information about why we incremented + # the indentation counter, to help us determine if we + # should decrement it + self.indent_detail = [] + + # the string of whitespace multiplied by the indent + # counter to produce a line + self.indentstring = " " + + # a stack of whitespace we pulled from "normalized" + # Python lines to track when the indentation counter should + # be incremented or decremented + self.spacestack = [] + + # read stream + self.stream = stream + + self._reset_multi_line_flags() + + def generate(self): + for linetype, line in self.stream: + if linetype is PYTHON_LINE: + yield self._process_line(line) + elif linetype is PYTHON_COMMENT: + yield self._process_comment(line) + elif linetype is PYTHON_BLOCK: + raise "PYTHON_BLOCK not supported yet" + else: + raise "unknown block type %s" % linetype + + def _process_line(self, line, is_comment=False): + """prints a line to the output buffer, preceded by a blank indentation + string of proportional size to the current indent counter. + + If the line ends with a colon, the indentation counter is incremented after + printing. If the line is blank, the indentation counter is decremented. + + if normalize_indent is set to true, the line is printed + with its existing whitespace "normalized" to the current indentation + counter; additionally, its existing whitespace is measured and + compared against a stack of whitespace strings grabbed from other + normalize_indent calls, which is used to adjust the current indentation + counter. + """ + decreased_indent = False + + if ( + re.match(r"^\s*#",line) or + re.match(r"^\s*$", line) + ): + hastext = False + else: + hastext = True + + # see if this line should decrease the indentation level + if (not decreased_indent and + not is_comment and + (not hastext or self._is_unindentor(line)) + ): + + if self.indent > 0: + self.indent -=1 + # if the indent_detail stack is empty, the user + # probably put extra closures - the resulting + # module wont compile. + if len(self.indent_detail) == 0: + raise "Too many whitespace closures" + self.indent_detail.pop() + + # see if this line should increase the indentation level. + # note that a line can both decrase (before printing) and + # then increase (after printing) the indentation level. + result = self._indent_line(line) + "\n" + + if re.search(r":[ \t]*(?:#.*)?$", line): + # increment indentation count, and also + # keep track of what the keyword was that indented us, + # if it is a python compound statement keyword + # where we might have to look for an "unindent" keyword + match = re.match(r"^\s*(if|try|elif|while|for)", line) + if match: + # its a "compound" keyword, so we will check for "unindentors" + indentor = match.group(1) + self.indent +=1 + self.indent_detail.append(indentor) + else: + indentor = None + # its not a "compound" keyword. but lets also + # test for valid Python keywords that might be indenting us, + # else assume its a non-indenting line + m2 = re.match(r"^\s*(def|class|else|elif|except|finally)", line) + if m2: + self.indent += 1 + self.indent_detail.append(indentor) + + return result + + def _process_comment(self, comment): + return self._process_line("# " + comment, is_comment=True) + + def _is_unindentor(self, line): + """return True if the given line unindents the most recent indent-increasing line.""" + + # no indentation detail has been pushed on; return False + if len(self.indent_detail) == 0: return False + + indentor = self.indent_detail[-1] + + # the last indent keyword we grabbed is not a + # compound statement keyword; return False + if indentor is None: return False + + # if the current line doesnt have one of the "unindentor" keywords, + # return False + match = re.match(r"^\s*(else|elif|except|finally)", line) + if not match: return False + + # whitespace matches up, we have a compound indentor, + # and this line has an unindentor, this + # is probably good enough + return True + + # should we decide that its not good enough, heres + # more stuff to check. + #keyword = match.group(1) + + # match the original indent keyword + #for crit in [ + # (r'if|elif', r'else|elif'), + # (r'try', r'except|finally|else'), + # (r'while|for', r'else'), + #]: + # if re.match(crit[0], indentor) and re.match(crit[1], keyword): return True + + #return False + + + def _indent_line(self, line, stripspace = ''): + return re.sub(r"^%s" % stripspace, self.indentstring * self.indent, line) + + def _reset_multi_line_flags(self): + (self.backslashed, self.triplequoted) = (False, False) + + def _in_multi_line(self, line): + # we are only looking for explicitly joined lines here, + # not implicit ones (i.e. brackets, braces etc.). this is just + # to guard against the possibility of modifying the space inside + # of a literal multiline string with unfortunately placed whitespace + + current_state = (self.backslashed or self.triplequoted) + + if re.search(r"\\$", line): + self.backslashed = True + else: + self.backslashed = False + + triples = len(re.findall(r"\"\"\"|\'\'\'", line)) + if triples == 1 or triples % 2 != 0: + self.triplequoted = not self.triplequoted + + return current_state diff --git a/genshi/codegen/serialize.py b/genshi/codegen/serialize.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/serialize.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006 Edgewall Software and Michael Bayer +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +""" +Adaptation of genshi.output to deliver output-specific event streams suitable for +Python code generation (i.e. adds a fourth "literal" element to each event), +given standard Genshi 3-element streams. + +While this module is a severe transgression of DRY, reusing the output-specific logic +from the genshi.output module would require de-optimizing the base genshi.output implementations. +""" + +from itertools import chain +try: + frozenset +except NameError: + from sets import ImmutableSet as frozenset +import re + +from genshi.core import escape, Markup, Namespace, QName, StreamEventKind +from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \ + END_CDATA, PI, COMMENT, XML_NAMESPACE +from genshi.output import DocType, WhitespaceFilter + +__all__ = ['XMLSerializeFilter', 'XHTMLSerializeFilter', 'HTMLSerializeFilter'] + +class XMLSerializeFilter(object): + """Delivers the given stream with additional XML text added to outgoing events. + + """ + + _PRESERVE_SPACE = frozenset() + + def __init__(self, doctype=None, strip_whitespace=True): + """Initialize the XML serialize filter. + + @param doctype: a `(name, pubid, sysid)` tuple that represents the + DOCTYPE declaration that should be included at the top of the + generated output + @param strip_whitespace: whether extraneous whitespace should be + stripped from the output + """ + self.preamble = [] + if doctype: + self.preamble.append((DOCTYPE, doctype, (None, -1, -1))) + # TODO: fold empty tags ? + self.filters = [] + if strip_whitespace: + # TODO: can we process whitespace before a template is executed with a Context ? + self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE)) + + def __call__(self, stream): + raise "TODO" + +class XHTMLSerializeFilter(XMLSerializeFilter): + """Delivers the given stream with additional XHTML text added to outgoing events. + + """ + NAMESPACE = Namespace('http://www.w3.org/1999/xhtml') + + _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame', + 'hr', 'img', 'input', 'isindex', 'link', 'meta', + 'param']) + _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare', + 'defer', 'disabled', 'ismap', 'multiple', + 'nohref', 'noresize', 'noshade', 'nowrap']) + _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')]) + + def __call__(self, stream): + raise "TODO" + +class HTMLSerializeFilter(XHTMLSerializeFilter): + """Delivers the given stream with additional HTML text added to outgoing events. + + """ + + _NOESCAPE_ELEMS = frozenset([QName('script'), + QName('http://www.w3.org/1999/xhtml}script'), + QName('style'), + QName('http://www.w3.org/1999/xhtml}style')]) + + def __init__(self, doctype=None, strip_whitespace=True): + """Initialize the HTML serialize filter. + + @param doctype: a `(name, pubid, sysid)` tuple that represents the + DOCTYPE declaration that should be included at the top of the + generated output + @param strip_whitespace: whether extraneous whitespace should be + stripped from the output + """ + super(HTMLSerializeFilter, self).__init__(doctype, False) + if strip_whitespace: + self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE, + self._NOESCAPE_ELEMS)) + + def __call__(self, stream): + namespace = self.NAMESPACE + ns_mapping = {} + boolean_attrs = self._BOOLEAN_ATTRS + empty_elems = self._EMPTY_ELEMS + noescape_elems = self._NOESCAPE_ELEMS + have_doctype = False + noescape = False + + stream = chain(self.preamble, stream) + for filter_ in self.filters: + stream = filter_(stream) + for kind, data, pos in stream: + if kind is START: + tag, attrib = data + if not tag.namespace or tag in namespace: + tagname = tag.localname + buf = ['<', tagname] + + for attr, value in attrib: + attrname = attr.localname + if not attr.namespace or attr in namespace: + if attrname in boolean_attrs: + if value: + buf += [' ', attrname] + else: + buf += [' ', attrname, '="', escape(value), '"'] + + buf += ['>'] + + yield kind, data, pos, u''.join(buf) + + if tagname in noescape_elems: + noescape = True + + elif kind is END: + if not data.namespace or data in namespace: + yield kind, data, pos, u'' % data.localname + + noescape = False + + elif kind is TEXT: + if noescape: + yield kind, data, pos, data + else: + yield kind, data, pos, escape(data, quotes=False) + + elif kind is COMMENT: + yield kind, data, pos, u'' % data + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['\n'] + yield kind, data, pos, unicode(Markup(''.join(buf), *filter(None, data))) + have_doctype = True + + elif kind is START_NS and data[1] not in ns_mapping: + ns_mapping[data[1]] = data[0] + yield kind, data, pos, None + elif kind is PI: + yield kind, data, pos, u'' % data + else: + # all other events pass-thru + yield kind, data, pos, None diff --git a/genshi/codegen/tests/__init__.py b/genshi/codegen/tests/__init__.py new file mode 100644 diff --git a/genshi/codegen/tests/test_generator.py b/genshi/codegen/tests/test_generator.py new file mode 100644 --- /dev/null +++ b/genshi/codegen/tests/test_generator.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006 Edgewall Software and Michael Bayer +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +from genshi.template import MarkupTemplate, Template +from genshi.output import HTMLSerializer +from genshi.codegen import generator +from genshi.codegen.serialize import HTMLSerializeFilter + +text = """ + + +
+
+ i am a greeting, ${item} +
+
+ + yo + + +""" + +t = MarkupTemplate(text) +g = generator.Generator(t) +print u''.join(g.generate(HTMLSerializeFilter()))