# HG changeset patch
# User zzzeek
# Date 1162318415 0
# Node ID d60a60ba4224711694399cb5c1cda46d02b55706
# Parent  039b21d6f8f4347f1a7681f612dba661bf7d0986
- created 'codegen' package
- adapted PythonPrinter from Myghty
- created initial Generator/SerializeFilter framework + quickie template runner

diff --git a/genshi/codegen/__init__.py b/genshi/codegen/__init__.py
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/__init__.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006 Edgewall Software and Michael Bayer <mike_mp@zzzcomputing.com>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
diff --git a/genshi/codegen/generator.py b/genshi/codegen/generator.py
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/generator.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006 Edgewall Software and Michael Bayer <mike_mp@zzzcomputing.com>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+
+from genshi import template
+from genshi.template import Template
+from genshi.codegen.printer import PythonPrinter, PYTHON_LINE, PYTHON_COMMENT, PYTHON_BLOCK
+
+_directive_printers = {}
+        
+class DirectivePrinter(object):
+    def __init__(self):
+        _directive_printers[self.__directive__] = self
+    def start_directive(self, gencontext, directive):
+        pass
+    def end_directive(self, gencontext, directive):
+        pass
+        
+class ForDirectivePrinter(DirectivePrinter):
+    __directive__ = template.ForDirective
+    def start_directive(self, gencontext, directive):
+        x = {}
+        directive.assign(x, None)
+        varname = list(x)[0]
+        yield (PYTHON_LINE, "for %s in %s:" % (varname, directive.expr.source))
+    def end_directive(self, gencontext, directive):
+        yield (PYTHON_LINE, "")
+ForDirectivePrinter()
+
+class Generator(object):
+    """given a Template, generates Python modules (as strings or code objects)
+    optimized to a particular Serializer."""
+    def __init__(self, template):
+        self.template = template
+    def generate(self, serializer):
+        return PythonPrinter(
+            PythonGenerator(
+                self.template.stream, serializer
+            ).generate()
+        ).generate()
+
+class PythonGenerator(object):
+    def __init__(self, stream, serializer):
+        self.stream = stream
+        self.serializer = serializer
+    def generate(self):
+        for evt in self.start():
+            yield evt
+        for evt in self.gen_stream(self.stream):
+            yield evt
+        for  evt in self.end():
+            yield evt
+
+    def gen_stream(self, stream):
+        for event in self.serializer(stream):
+            (kind, data, pos, literal) = event
+            if kind is template.SUB:
+                directives, substream = event[1]
+                for d in directives:
+                    for evt in self.produce_directive_start(d):
+                        yield evt
+                for evt in self.gen_stream(substream):
+                    yield evt
+                for d in directives:
+                    for evt in self.produce_directive_end(d):
+                        yield evt
+            elif kind is template.START:
+                for evt in self.produce_start_event(event):
+                    yield evt
+            elif kind is template.END:
+                for evt in self.produce_end_event(event):
+                    yield evt
+    def produce_preamble(self):
+        for line in [
+            "from genshi.core import START, END, START_NS, END_NS, TEXT, COMMENT, DOCTYPE, QName, Stream",
+            "from genshi.template import Context",
+            "from genshi.path import Path"
+        ]:
+            yield (PYTHON_LINE, line)
+
+    def produce_directive_start(self, directive):
+        for evt in _directive_printers[directive.__class__].start_directive(self, directive):
+            yield evt
+    def produce_directive_end(self, directive):
+        for evt in _directive_printers[directive.__class__].end_directive(self, directive):
+            yield evt
+    def start(self):
+        for evt in self.produce_preamble():
+            yield evt
+        yield (PYTHON_LINE, "def go(context):")
+    def end(self):
+        yield (PYTHON_LINE, "")
+    def produce_start_event(self, event):
+        yield (PYTHON_LINE, "yield (START, (Qname(%s), %s), %s, %s)" % (
+            repr(event[1][0]), 
+            repr(event[1][1]), 
+            repr(event[2]), 
+            repr(event[3]))
+        )
+    def produce_end_event(self, event):
+        yield (PYTHON_LINE, "yield (END, (%s), %s, %s)" % (
+            repr(event[1]), 
+            repr(event[2]), 
+            repr(event[3]))
+        )
+
+
+
diff --git a/genshi/codegen/printer.py b/genshi/codegen/printer.py
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/printer.py
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006 Edgewall Software and Michael Bayer <mike_mp@zzzcomputing.com>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import re
+
+PYTHON_LINE = "line"
+PYTHON_COMMENT = "comment"
+PYTHON_BLOCK = "block"
+
+class PythonPrinter(object):
+    """prints Python code, keeping track of indentation level.
+    
+    Adapted from PythonPrinter in Myghty; also uses stream-based operation.  The Myghty version of this is 
+    more complicated; among other things, it includes a 'block' version useful 
+    for properly indenting user-included blocks of Python.  When Genshi gets the 
+    <?python?> tag we will want to revisit this output mode."""  
+    def __init__(self, stream):
+        # the indentation counter
+        self.indent = 0
+        
+        # a stack storing information about why we incremented 
+        # the indentation counter, to help us determine if we
+        # should decrement it
+        self.indent_detail = []
+        
+        # the string of whitespace multiplied by the indent
+        # counter to produce a line
+        self.indentstring = "    "
+        
+        # a stack of whitespace we pulled from "normalized" 
+        # Python lines to track when the indentation counter should
+        # be incremented or decremented
+        self.spacestack = []
+        
+        # read stream
+        self.stream = stream
+        
+        self._reset_multi_line_flags()
+
+    def generate(self):
+        for linetype, line in self.stream:
+            if linetype is PYTHON_LINE:
+                yield self._process_line(line)
+            elif linetype is PYTHON_COMMENT:
+                yield self._process_comment(line)
+            elif linetype is PYTHON_BLOCK:
+                raise "PYTHON_BLOCK not supported yet"
+            else:
+                raise "unknown block type %s" % linetype
+        
+    def _process_line(self, line, is_comment=False):
+        """prints a line to the output buffer, preceded by a blank indentation
+        string of proportional size to the current indent counter.  
+        
+        If the line ends with a colon, the indentation counter is incremented after
+        printing.  If the line is blank, the indentation counter is decremented.
+        
+        if normalize_indent is set to true, the line is printed
+        with its existing whitespace "normalized" to the current indentation 
+        counter; additionally, its existing whitespace is measured and
+        compared against a stack of whitespace strings grabbed from other
+        normalize_indent calls, which is used to adjust the current indentation 
+        counter.
+        """
+        decreased_indent = False
+    
+        if (
+            re.match(r"^\s*#",line) or
+            re.match(r"^\s*$", line)
+            ):
+            hastext = False
+        else:
+            hastext = True
+        
+        # see if this line should decrease the indentation level
+        if (not decreased_indent and 
+            not is_comment and 
+            (not hastext or self._is_unindentor(line))
+            ):
+            
+            if self.indent > 0: 
+                self.indent -=1
+                # if the indent_detail stack is empty, the user
+                # probably put extra closures - the resulting
+                # module wont compile.  
+                if len(self.indent_detail) == 0:  
+                    raise "Too many whitespace closures"
+                self.indent_detail.pop()
+            
+        # see if this line should increase the indentation level.
+        # note that a line can both decrase (before printing) and 
+        # then increase (after printing) the indentation level.
+        result = self._indent_line(line) + "\n"
+
+        if re.search(r":[ \t]*(?:#.*)?$", line):
+            # increment indentation count, and also
+            # keep track of what the keyword was that indented us,
+            # if it is a python compound statement keyword
+            # where we might have to look for an "unindent" keyword
+            match = re.match(r"^\s*(if|try|elif|while|for)", line)
+            if match:
+                # its a "compound" keyword, so we will check for "unindentors"
+                indentor = match.group(1)
+                self.indent +=1
+                self.indent_detail.append(indentor)
+            else:
+                indentor = None
+                # its not a "compound" keyword.  but lets also
+                # test for valid Python keywords that might be indenting us,
+                # else assume its a non-indenting line
+                m2 = re.match(r"^\s*(def|class|else|elif|except|finally)", line)
+                if m2:
+                    self.indent += 1
+                    self.indent_detail.append(indentor)
+
+        return result
+        
+    def _process_comment(self, comment):
+        return self._process_line("# " + comment, is_comment=True)
+        
+    def _is_unindentor(self, line):
+        """return True if the given line unindents the most recent indent-increasing line."""
+                
+        # no indentation detail has been pushed on; return False
+        if len(self.indent_detail) == 0: return False
+
+        indentor = self.indent_detail[-1]
+        
+        # the last indent keyword we grabbed is not a 
+        # compound statement keyword; return False
+        if indentor is None: return False
+        
+        # if the current line doesnt have one of the "unindentor" keywords,
+        # return False
+        match = re.match(r"^\s*(else|elif|except|finally)", line)
+        if not match: return False
+        
+        # whitespace matches up, we have a compound indentor,
+        # and this line has an unindentor, this
+        # is probably good enough
+        return True
+        
+        # should we decide that its not good enough, heres
+        # more stuff to check.
+        #keyword = match.group(1)
+        
+        # match the original indent keyword 
+        #for crit in [
+        #   (r'if|elif', r'else|elif'),
+        #   (r'try', r'except|finally|else'),
+        #   (r'while|for', r'else'),
+        #]:
+        #   if re.match(crit[0], indentor) and re.match(crit[1], keyword): return True
+        
+        #return False
+        
+        
+    def _indent_line(self, line, stripspace = ''):
+        return re.sub(r"^%s" % stripspace, self.indentstring * self.indent, line)
+
+    def _reset_multi_line_flags(self):
+        (self.backslashed, self.triplequoted) = (False, False) 
+        
+    def _in_multi_line(self, line):
+        # we are only looking for explicitly joined lines here,
+        # not implicit ones (i.e. brackets, braces etc.).  this is just
+        # to guard against the possibility of modifying the space inside 
+        # of a literal multiline string with unfortunately placed whitespace
+         
+        current_state = (self.backslashed or self.triplequoted) 
+                        
+        if re.search(r"\\$", line):
+            self.backslashed = True
+        else:
+            self.backslashed = False
+            
+        triples = len(re.findall(r"\"\"\"|\'\'\'", line))
+        if triples == 1 or triples % 2 != 0:
+            self.triplequoted = not self.triplequoted
+
+        return current_state
diff --git a/genshi/codegen/serialize.py b/genshi/codegen/serialize.py
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/serialize.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006 Edgewall Software and Michael Bayer <mike_mp@zzzcomputing.com>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""
+Adaptation of genshi.output to deliver output-specific event streams suitable for
+Python code generation (i.e. adds a fourth "literal" element to each event), 
+given standard Genshi 3-element streams.
+
+While this module is a severe transgression of DRY, reusing the output-specific logic
+from the genshi.output module would require de-optimizing the base genshi.output implementations.
+"""
+
+from itertools import chain
+try:
+    frozenset
+except NameError:
+    from sets import ImmutableSet as frozenset
+import re
+
+from genshi.core import escape, Markup, Namespace, QName, StreamEventKind
+from genshi.core import DOCTYPE, START, END, START_NS, TEXT, START_CDATA, \
+                        END_CDATA, PI, COMMENT, XML_NAMESPACE
+from genshi.output import DocType, WhitespaceFilter
+
+__all__ = ['XMLSerializeFilter', 'XHTMLSerializeFilter', 'HTMLSerializeFilter']
+
+class XMLSerializeFilter(object):
+    """Delivers the given stream with additional XML text added to outgoing events.
+    
+    """
+
+    _PRESERVE_SPACE = frozenset()
+
+    def __init__(self, doctype=None, strip_whitespace=True):
+        """Initialize the XML serialize filter.
+        
+        @param doctype: a `(name, pubid, sysid)` tuple that represents the
+            DOCTYPE declaration that should be included at the top of the
+            generated output
+        @param strip_whitespace: whether extraneous whitespace should be
+            stripped from the output
+        """
+        self.preamble = []
+        if doctype:
+            self.preamble.append((DOCTYPE, doctype, (None, -1, -1)))
+        # TODO: fold empty tags ?
+        self.filters = []
+        if strip_whitespace:
+            # TODO: can we process whitespace before a template is executed with a Context ?
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE))
+
+    def __call__(self, stream):
+        raise "TODO"
+
+class XHTMLSerializeFilter(XMLSerializeFilter):
+    """Delivers the given stream with additional XHTML text added to outgoing events.
+    
+    """
+    NAMESPACE = Namespace('http://www.w3.org/1999/xhtml')
+
+    _EMPTY_ELEMS = frozenset(['area', 'base', 'basefont', 'br', 'col', 'frame',
+                              'hr', 'img', 'input', 'isindex', 'link', 'meta',
+                              'param'])
+    _BOOLEAN_ATTRS = frozenset(['selected', 'checked', 'compact', 'declare',
+                                'defer', 'disabled', 'ismap', 'multiple',
+                                'nohref', 'noresize', 'noshade', 'nowrap'])
+    _PRESERVE_SPACE = frozenset([QName('pre'), QName('textarea')])
+
+    def __call__(self, stream):
+        raise "TODO"
+
+class HTMLSerializeFilter(XHTMLSerializeFilter):
+    """Delivers the given stream with additional HTML text added to outgoing events.
+    
+    """
+
+    _NOESCAPE_ELEMS = frozenset([QName('script'),
+                                 QName('http://www.w3.org/1999/xhtml}script'),
+                                 QName('style'),
+                                 QName('http://www.w3.org/1999/xhtml}style')])
+
+    def __init__(self, doctype=None, strip_whitespace=True):
+        """Initialize the HTML serialize filter.
+        
+        @param doctype: a `(name, pubid, sysid)` tuple that represents the
+            DOCTYPE declaration that should be included at the top of the
+            generated output
+        @param strip_whitespace: whether extraneous whitespace should be
+            stripped from the output
+        """
+        super(HTMLSerializeFilter, self).__init__(doctype, False)
+        if strip_whitespace:
+            self.filters.append(WhitespaceFilter(self._PRESERVE_SPACE,
+                                                 self._NOESCAPE_ELEMS))
+
+    def __call__(self, stream):
+        namespace = self.NAMESPACE
+        ns_mapping = {}
+        boolean_attrs = self._BOOLEAN_ATTRS
+        empty_elems = self._EMPTY_ELEMS
+        noescape_elems = self._NOESCAPE_ELEMS
+        have_doctype = False
+        noescape = False
+
+        stream = chain(self.preamble, stream)
+        for filter_ in self.filters:
+            stream = filter_(stream)
+        for kind, data, pos in stream:
+            if kind is START:
+                tag, attrib = data
+                if not tag.namespace or tag in namespace:
+                    tagname = tag.localname
+                    buf = ['<', tagname]
+
+                    for attr, value in attrib:
+                        attrname = attr.localname
+                        if not attr.namespace or attr in namespace:
+                            if attrname in boolean_attrs:
+                                if value:
+                                    buf += [' ', attrname]
+                            else:
+                                buf += [' ', attrname, '="', escape(value), '"']
+
+                    buf += ['>']
+
+                    yield kind, data, pos, u''.join(buf)
+
+                    if tagname in noescape_elems:
+                        noescape = True
+
+            elif kind is END:
+                if not data.namespace or data in namespace:
+                    yield kind, data, pos, u'</%s>' % data.localname
+
+                noescape = False
+
+            elif kind is TEXT:
+                if noescape:
+                    yield kind, data, pos, data
+                else:
+                    yield kind, data, pos, escape(data, quotes=False)
+
+            elif kind is COMMENT:
+                yield kind, data, pos, u'<!--%s-->' % data
+
+            elif kind is DOCTYPE and not have_doctype:
+                name, pubid, sysid = data
+                buf = ['<!DOCTYPE %s']
+                if pubid:
+                    buf += [' PUBLIC "%s"']
+                elif sysid:
+                    buf += [' SYSTEM']
+                if sysid:
+                    buf += [' "%s"']
+                buf += ['>\n']
+                yield kind, data, pos, unicode(Markup(''.join(buf), *filter(None, data)))
+                have_doctype = True
+
+            elif kind is START_NS and data[1] not in ns_mapping:
+                ns_mapping[data[1]] = data[0]
+                yield kind, data, pos, None
+            elif kind is PI:
+                yield kind, data, pos, u'<?%s %s?>' % data
+            else:
+                # all other events pass-thru
+                yield kind, data, pos, None
diff --git a/genshi/codegen/tests/__init__.py b/genshi/codegen/tests/__init__.py
new file mode 100644
diff --git a/genshi/codegen/tests/test_generator.py b/genshi/codegen/tests/test_generator.py
new file mode 100644
--- /dev/null
+++ b/genshi/codegen/tests/test_generator.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006 Edgewall Software and Michael Bayer <mike_mp@zzzcomputing.com>
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+from genshi.template import MarkupTemplate, Template
+from genshi.output import HTMLSerializer
+from genshi.codegen import generator
+from genshi.codegen.serialize import HTMLSerializeFilter
+
+text = """<!DOCTYPE html
+    PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml"
+      xmlns:py="http://genshi.edgewall.org/"
+      xmlns:xi="http://www.w3.org/2001/XInclude"
+      lang="en">
+ <body>
+    <div py:for="item in items()">
+        <div py:for="x in foo">
+        i am a greeting, ${item}
+        </div>
+    </div>
+    
+     yo
+     <hi></hi>
+ </body>
+</html>"""
+
+t = MarkupTemplate(text)
+g = generator.Generator(t)
+print u''.join(g.generate(HTMLSerializeFilter()))