# HG changeset patch # User mkurczych # Date 1215810646 0 # Node ID 1fd97b75cdc9120bdb589eefffd1ef48edc24a49 # Parent 7e93b9a22fcb420e96efb083f2ccfef7edadf7e6 Optimization work first commit (many changes) diff --git a/genshi/core.py b/genshi/core.py --- a/genshi/core.py +++ b/genshi/core.py @@ -235,9 +235,11 @@ :see: XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer """ from genshi.output import get_serializer + from genshi.optimization import optimized_flatten if method is None: method = self.serializer or 'xml' - return get_serializer(method, **kwargs)(_ensure(self)) + return optimized_flatten(get_serializer(method, **kwargs) + (_ensure(self))) def __str__(self): return self.render() diff --git a/genshi/optimization.py b/genshi/optimization.py new file mode 100644 --- /dev/null +++ b/genshi/optimization.py @@ -0,0 +1,83 @@ +from genshi.core import StreamEventKind +from genshi.util import LRUCache + +OPTIMIZATION_POSSIBILITY = StreamEventKind("OPTIMIZATION_POSSIBILITY") +OPTIMIZED_FRAGMENT = StreamEventKind("OPTIMIZED_FRAGMENT") + +class Optimizer(object): + """Manages optimized tags with variables bound and filter trees""" + def __init__(self, size=100): + self._filtersCount = 1 + self._filters = {0:{}} + self._fragmentsCount = 0 + self._fragments = {} + self._data = LRUCache(size) + + def get_filters_child_id(self, fid, filter): + """Finds filter tree node""" + try: + return self._filters[fid][filter] + except KeyError: + id_ = self._filtersCount + self._filters[fid][filter] = id_ + self._filters[id_] = {} + self._filtersCount += 1 + return id_ + def get_fragment_id(self, *args): + try: + return self._fragments[args] + except KeyError: + id_ = self._fragmentsCount + self._fragments[args] = id_ + self._fragmentsCount += 1 + return id_ + + @property + def root_id(self): + return 0 + + def get_cache_for(self, fragmentId, filtersId): + try: + return self._data[(filtersId, fragmentId)] + except KeyError: + return None + + def set_cache_for(self, fragmentId, filtersId, stream): + self._data[(filtersId, fragmentId,)] = stream + + +class OptimizedFragment(object): + def __init__(self, stream, optimizer, fragmentId, filtersId=None): + self._stream = stream + self.fragmentId = fragmentId + if filtersId is None: + self.filtersId = optimizer.root_id + else: + self.filtersId = filtersId + self.optimizer = optimizer + def get_stream(self): + """Returns stream. Only for embedding in generators, if something + more needed use process_stream""" + return self._stream + def process_stream(self): + """Renders it in place and asks to save in cache""" + s = self.optimizer.get_cache_for(self.fragmentId, self.filtersId) + if s is None: + s = list(self._stream) + self.optimizer.set_cache_for(self.fragmentId, self.filtersId, s) + self._stream = s + return self._stream + def create_child(self, filter, stream): + """Create child fragment (representing fragment after applying filter)""" + filtersId = self.optimizer.get_filters_child_id(self.filtersId, filter) + #print "Creating child", filter + return OptimizedFragment(stream, self.optimizer, self.fragmentId, + filtersId) + +def optimized_flatten(stream): + for event in stream: + if event[0] is OPTIMIZED_FRAGMENT: + for e in optimized_flatten(event[1].process_stream()): + yield e + else: + yield event diff --git a/genshi/output.py b/genshi/output.py --- a/genshi/output.py +++ b/genshi/output.py @@ -25,6 +25,7 @@ from genshi.core import escape, Attrs, Markup, Namespace, QName, StreamEventKind from genshi.core import START, END, TEXT, XML_DECL, DOCTYPE, START_NS, END_NS, \ START_CDATA, END_CDATA, PI, COMMENT, XML_NAMESPACE +from genshi.optimization import OPTIMIZED_FRAGMENT __all__ = ['encode', 'get_serializer', 'DocType', 'XMLSerializer', 'XHTMLSerializer', 'HTMLSerializer', 'TextSerializer'] @@ -61,6 +62,7 @@ for chunk in iterator: out.write(_encode(chunk)) + def get_serializer(method='xml', **kwargs): """Return a serializer object for the given method. @@ -73,7 +75,10 @@ :see: `XMLSerializer`, `XHTMLSerializer`, `HTMLSerializer`, `TextSerializer` :since: version 0.4.1 """ - if isinstance(method, basestring): + serializers = XMLSerializer, XHTMLSerializer, HTMLSerializer, TextSerializer + if isinstance(method, serializers): + return method + elif isinstance(method, basestring): method = {'xml': XMLSerializer, 'xhtml': XHTMLSerializer, 'html': HTMLSerializer, @@ -202,68 +207,75 @@ self.filters.append(DocTypeInserter(doctype)) def __call__(self, stream): - have_decl = have_doctype = False - in_cdata = False for filter_ in self.filters: stream = filter_(stream) - for kind, data, pos in stream: - - if kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - buf += [' ', attr, '="', escape(value), '"'] - buf.append(kind is EMPTY and '/>' or '>') - yield Markup(u''.join(buf)) - - elif kind is END: - yield Markup('' % data) - - elif kind is TEXT: - if in_cdata: - yield data - else: - yield escape(data, quotes=False) - - elif kind is COMMENT: - yield Markup('' % data) + def _process(stream): + #TODO SOC: You know what to check :-) + have_decl = have_doctype = False + in_cdata = False + for kind, data, pos in stream: - elif kind is XML_DECL and not have_decl: - version, encoding, standalone = data - buf = ['\n') - yield Markup(u''.join(buf)) - have_decl = True + if kind is OPTIMIZED_FRAGMENT: + frag = data.create_child(self, + _process(data.get_stream())) + yield (kind, frag, pos,) + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + buf += [' ', attr, '="', escape(value), '"'] + buf.append(kind is EMPTY and '/>' or '>') + yield Markup(u''.join(buf)) - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['\n') - yield Markup(u''.join(buf)) % filter(None, data) - have_doctype = True + elif kind is END: + yield Markup('' % data) - elif kind is START_CDATA: - yield Markup('') - in_cdata = False + elif kind is COMMENT: + yield Markup('' % data) - elif kind is PI: - yield Markup('' % data) + elif kind is XML_DECL and not have_decl: + version, encoding, standalone = data + buf = ['\n') + yield Markup(u''.join(buf)) + have_decl = True + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['\n') + yield Markup(u''.join(buf)) % filter(None, data) + have_doctype = True + + elif kind is START_CDATA: + yield Markup('') + in_cdata = False + + elif kind is PI: + yield Markup('' % data) + return _process(stream) class XHTMLSerializer(XMLSerializer): @@ -303,80 +315,85 @@ boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS drop_xml_decl = self.drop_xml_decl - have_decl = have_doctype = False - in_cdata = False for filter_ in self.filters: stream = filter_(stream) - for kind, data, pos in stream: - - if kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - if attr in boolean_attrs: - value = attr - elif attr == u'xml:lang' and u'lang' not in attrib: - buf += [' lang="', escape(value), '"'] - elif attr == u'xml:space': - continue - buf += [' ', attr, '="', escape(value), '"'] - if kind is EMPTY: - if tag in empty_elems: - buf.append(' />') - else: - buf.append('>' % tag) - else: - buf.append('>') - yield Markup(u''.join(buf)) - - elif kind is END: - yield Markup('' % data) - - elif kind is TEXT: - if in_cdata: - yield data - else: - yield escape(data, quotes=False) - - elif kind is COMMENT: - yield Markup('' % data) + def _process(stream, have_decl=True, have_doctype=True): + in_cdata = False + for kind, data, pos in stream: - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['\n') - yield Markup(u''.join(buf)) % filter(None, data) - have_doctype = True + if kind is OPTIMIZED_FRAGMENT: + frag = data.create_child(self, + _process(data.get_stream())) + yield (kind, frag, pos,) + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + if attr in boolean_attrs: + value = attr + elif attr == u'xml:lang' and u'lang' not in attrib: + buf += [' lang="', escape(value), '"'] + elif attr == u'xml:space': + continue + buf += [' ', attr, '="', escape(value), '"'] + if kind is EMPTY: + if tag in empty_elems: + buf.append(' />') + else: + buf.append('>' % tag) + else: + buf.append('>') + yield Markup(u''.join(buf)) - elif kind is XML_DECL and not have_decl and not drop_xml_decl: - version, encoding, standalone = data - buf = ['\n') - yield Markup(u''.join(buf)) - have_decl = True + elif kind is END: + yield Markup('' % data) - elif kind is START_CDATA: - yield Markup('') - in_cdata = False + elif kind is COMMENT: + yield Markup('' % data) - elif kind is PI: - yield Markup('' % data) + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['\n') + yield Markup(u''.join(buf)) % filter(None, data) + have_doctype = True + + elif kind is XML_DECL and not have_decl and not drop_xml_decl: + version, encoding, standalone = data + buf = ['\n') + yield Markup(u''.join(buf)) + have_decl = True + + elif kind is START_CDATA: + yield Markup('') + in_cdata = False + + elif kind is PI: + yield Markup('' % data) + return _process(stream, False, False) class HTMLSerializer(XHTMLSerializer): @@ -417,61 +434,65 @@ boolean_attrs = self._BOOLEAN_ATTRS empty_elems = self._EMPTY_ELEMS noescape_elems = self._NOESCAPE_ELEMS - have_doctype = False - noescape = False for filter_ in self.filters: stream = filter_(stream) - for kind, data, pos in stream: - - if kind is START or kind is EMPTY: - tag, attrib = data - buf = ['<', tag] - for attr, value in attrib: - if attr in boolean_attrs: - if value: - buf += [' ', attr] - elif ':' in attr: - if attr == 'xml:lang' and u'lang' not in attrib: - buf += [' lang="', escape(value), '"'] - elif attr != 'xmlns': - buf += [' ', attr, '="', escape(value), '"'] - buf.append('>') - if kind is EMPTY: - if tag not in empty_elems: - buf.append('' % tag) - yield Markup(u''.join(buf)) - if tag in noescape_elems: - noescape = True + def _process(stream, noescape, have_doctype=True): + for kind, data, pos in stream: - elif kind is END: - yield Markup('' % data) - noescape = False - - elif kind is TEXT: - if noescape: - yield data - else: - yield escape(data, quotes=False) - - elif kind is COMMENT: - yield Markup('' % data) + if kind is OPTIMIZED_FRAGMENT: + frag = data.create_child((self, noescape), + _process(data.get_stream(), noescape)) + yield (kind, frag, pos,) + elif kind is START or kind is EMPTY: + tag, attrib = data + buf = ['<', tag] + for attr, value in attrib: + if attr in boolean_attrs: + if value: + buf += [' ', attr] + elif ':' in attr: + if attr == 'xml:lang' and u'lang' not in attrib: + buf += [' lang="', escape(value), '"'] + elif attr != 'xmlns': + buf += [' ', attr, '="', escape(value), '"'] + buf.append('>') + if kind is EMPTY: + if tag not in empty_elems: + buf.append('' % tag) + yield Markup(u''.join(buf)) + if tag in noescape_elems: + noescape = True - elif kind is DOCTYPE and not have_doctype: - name, pubid, sysid = data - buf = ['\n') - yield Markup(u''.join(buf)) % filter(None, data) - have_doctype = True + elif kind is END: + yield Markup('' % data) + noescape = False - elif kind is PI: - yield Markup('' % data) + elif kind is TEXT: + if noescape: + yield data + else: + yield escape(data, quotes=False) + + elif kind is COMMENT: + yield Markup('' % data) + + elif kind is DOCTYPE and not have_doctype: + name, pubid, sysid = data + buf = ['\n') + yield Markup(u''.join(buf)) % filter(None, data) + have_doctype = True + + elif kind is PI: + yield Markup('' % data) + return _process(stream, False, False) class TextSerializer(object): @@ -512,12 +533,18 @@ def __call__(self, stream): strip_markup = self.strip_markup - for event in stream: - if event[0] is TEXT: - data = event[1] - if strip_markup and type(data) is Markup: - data = data.striptags().stripentities() - yield unicode(data) + def _process(stream): + for event in stream: + if event[0] is OPTIMIZED_FRAGMENT: + frag = event[1].create_child((self, noescape), + _process(event[1].get_stream())) + yield (event[0], frag, event[2],) + elif event[0] is TEXT: + data = event[1] + if strip_markup and type(data) is Markup: + data = data.striptags().stripentities() + yield unicode(data) + return _process(stream) class EmptyTagFilter(object): @@ -528,23 +555,31 @@ EMPTY = StreamEventKind('EMPTY') def __call__(self, stream): - prev = (None, None, None) - for ev in stream: - if prev[0] is START: - if ev[0] is END: - prev = EMPTY, prev[1], prev[2] - yield prev - continue + def _process(stream): + prev = (None, None, None) + for ev in stream: + if ev[0] is OPTIMIZED_FRAGMENT: + frag = ev[1].create_child(EmptyTagFilter, + _process(ev[1].get_stream())) + yield (ev[0], frag, ev[2],) else: - yield prev - if ev[0] is not START: - yield ev - prev = ev + if prev[0] is START: + if ev[0] is END: + prev = EMPTY, prev[1], prev[2] + yield prev + continue + else: + yield prev + if ev[0] is not START: + yield ev + prev = ev + return _process(stream) EMPTY = EmptyTagFilter.EMPTY +#TODO SOC class NamespaceFlattener(object): r"""Output stream filter that removes namespace information from the stream, instead adding namespace attributes and prefixes as needed. @@ -589,76 +624,84 @@ yield 'ns%d' % val _gen_prefix = _gen_prefix().next - for kind, data, pos in stream: + # TODO SOC: that rather doesn't work... + def _process(stream): + for kind, data, pos in stream: - if kind is START or kind is EMPTY: - tag, attrs = data + if kind is OPTIMIZED_FRAGMENT: + sstream = _process(data.get_stream()) + frag = data.create_child(self, sstream) + yield (kind, frag, pos,) + elif kind is START or kind is EMPTY: + tag, attrs = data - tagname = tag.localname - tagns = tag.namespace - if tagns: - if tagns in namespaces: + tagname = tag.localname + tagns = tag.namespace + if tagns: + if tagns in namespaces: + prefix = namespaces[tagns][-1] + if prefix: + tagname = u'%s:%s' % (prefix, tagname) + else: + _push_ns_attr((u'xmlns', tagns)) + _push_ns('', tagns) + + new_attrs = [] + for attr, value in attrs: + attrname = attr.localname + attrns = attr.namespace + if attrns: + if attrns not in namespaces: + prefix = _gen_prefix() + _push_ns(prefix, attrns) + _push_ns_attr(('xmlns:%s' % prefix, attrns)) + else: + prefix = namespaces[attrns][-1] + if prefix: + attrname = u'%s:%s' % (prefix, attrname) + new_attrs.append((attrname, value)) + + yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos + del ns_attrs[:] + + elif kind is END: + tagname = data.localname + tagns = data.namespace + if tagns: prefix = namespaces[tagns][-1] if prefix: tagname = u'%s:%s' % (prefix, tagname) - else: - _push_ns_attr((u'xmlns', tagns)) - _push_ns('', tagns) - - new_attrs = [] - for attr, value in attrs: - attrname = attr.localname - attrns = attr.namespace - if attrns: - if attrns not in namespaces: - prefix = _gen_prefix() - _push_ns(prefix, attrns) - _push_ns_attr(('xmlns:%s' % prefix, attrns)) - else: - prefix = namespaces[attrns][-1] - if prefix: - attrname = u'%s:%s' % (prefix, attrname) - new_attrs.append((attrname, value)) - - yield kind, (tagname, Attrs(ns_attrs + new_attrs)), pos - del ns_attrs[:] + yield kind, tagname, pos - elif kind is END: - tagname = data.localname - tagns = data.namespace - if tagns: - prefix = namespaces[tagns][-1] - if prefix: - tagname = u'%s:%s' % (prefix, tagname) - yield kind, tagname, pos - - elif kind is START_NS: - prefix, uri = data - if uri not in namespaces: - prefix = prefixes.get(uri, [prefix])[-1] - _push_ns_attr(_make_ns_attr(prefix, uri)) - _push_ns(prefix, uri) + elif kind is START_NS: + prefix, uri = data + if uri not in namespaces: + prefix = prefixes.get(uri, [prefix])[-1] + _push_ns_attr(_make_ns_attr(prefix, uri)) + _push_ns(prefix, uri) - elif kind is END_NS: - if data in prefixes: - uris = prefixes.get(data) - uri = uris.pop() - if not uris: - del prefixes[data] - if uri not in uris or uri != uris[-1]: - uri_prefixes = namespaces[uri] - uri_prefixes.pop() - if not uri_prefixes: - del namespaces[uri] - if ns_attrs: - attr = _make_ns_attr(data, uri) - if attr in ns_attrs: - ns_attrs.remove(attr) + elif kind is END_NS: + if data in prefixes: + uris = prefixes.get(data) + uri = uris.pop() + if not uris: + del prefixes[data] + if uri not in uris or uri != uris[-1]: + uri_prefixes = namespaces[uri] + uri_prefixes.pop() + if not uri_prefixes: + del namespaces[uri] + if ns_attrs: + attr = _make_ns_attr(data, uri) + if attr in ns_attrs: + ns_attrs.remove(attr) - else: - yield kind, data, pos + else: + yield kind, data, pos + return _process(stream) +#TODO SOC class WhitespaceFilter(object): """A filter that removes extraneous ignorable white space from the stream. @@ -688,51 +731,60 @@ collapse_lines=re.compile('\n{2,}').sub): mjoin = Markup('').join preserve_elems = self.preserve - preserve = 0 noescape_elems = self.noescape - noescape = False - - textbuf = [] - push_text = textbuf.append - pop_text = textbuf.pop - for kind, data, pos in chain(stream, [(None, None, None)]): - if kind is TEXT: - if noescape: - data = Markup(data) - push_text(data) - else: - if textbuf: - if len(textbuf) > 1: - text = mjoin(textbuf, escape_quotes=False) - del textbuf[:] - else: - text = escape(pop_text(), quotes=False) - if not preserve: - text = collapse_lines('\n', trim_trailing_space('', text)) - yield TEXT, Markup(text), pos + def _process(stream, preserve=0, noescape=False): + textbuf = [] + push_text = textbuf.append + pop_text = textbuf.pop + for kind, data, pos in chain(stream, [(None, None, None)]): - if kind is START: - tag, attrs = data - if preserve or (tag in preserve_elems or - attrs.get(space) == 'preserve'): - preserve += 1 - if not noescape and tag in noescape_elems: + if kind is TEXT: + if noescape: + data = Markup(data) + push_text(data) + else: + if textbuf: + if len(textbuf) > 1: + text = mjoin(textbuf, escape_quotes=False) + del textbuf[:] + else: + text = escape(pop_text(), quotes=False) + if not preserve: + text = collapse_lines('\n', + trim_trailing_space('', text)) + yield TEXT, Markup(text), pos + + if kind is OPTIMIZED_FRAGMENT: + # there's exactly the same nubmer of STARTs as ENDs + # in optimized fragment, so no need for higher pres + pres = preserve and 1 or 0 + sstream = _process(data.get_stream(), pres, noescape) + frag = data.create_child((self, pres, noescape), + sstream) + yield (kind, frag, pos,) + elif kind is START: + tag, attrs = data + if preserve or (tag in preserve_elems or + attrs.get(space) == 'preserve'): + preserve += 1 + if not noescape and tag in noescape_elems: + noescape = True + + elif kind is END: + noescape = False + if preserve: + preserve -= 1 + + elif kind is START_CDATA: noescape = True - elif kind is END: - noescape = False - if preserve: - preserve -= 1 + elif kind is END_CDATA: + noescape = False - elif kind is START_CDATA: - noescape = True - - elif kind is END_CDATA: - noescape = False - - if kind: - yield kind, data, pos + if kind: + yield kind, data, pos + return _process(stream) class DocTypeInserter(object): @@ -748,8 +800,7 @@ doctype = DocType.get(doctype) self.doctype_event = (DOCTYPE, doctype, (None, -1, -1)) - def __call__(self, stream): - doctype_inserted = False + def __call__(self, stream, doctype_inserted = False): for kind, data, pos in stream: if not doctype_inserted: doctype_inserted = True @@ -759,7 +810,13 @@ continue yield self.doctype_event - yield (kind, data, pos) + #There cannot be XML Declaration in optimized fragments + if kind is OPTIMIZED_FRAGMENT: + sstream = self(data.get_stream(), True) + frag = data.create_child(DocTypeInserter, sstream) + yield (kind, frag, pos,) + else: + yield (kind, data, pos) if not doctype_inserted: yield self.doctype_event diff --git a/genshi/path.py b/genshi/path.py --- a/genshi/path.py +++ b/genshi/path.py @@ -45,6 +45,7 @@ from genshi.core import Stream, Attrs, Namespace, QName from genshi.core import START, END, TEXT, START_NS, END_NS, COMMENT, PI, \ START_CDATA, END_CDATA +from genshi.optimization import OPTIMIZED_FRAGMENT __all__ = ['Path', 'PathSyntaxError'] __docformat__ = 'restructuredtext en' @@ -128,24 +129,43 @@ namespaces = {} if variables is None: variables = {} - stream = iter(stream) + #stream stack + ss = [iter(stream)] def _generate(): test = self.test() - for event in stream: + while ss: + try: + event = ss[-1].next() + except StopIteration: + ss.pop() + continue + if event[0] is OPTIMIZED_FRAGMENT: + ss.append(iter(event[1].process_stream())) + continue result = test(event, namespaces, variables) if result is True: yield event + if event[0] is OPTIMIZED_FRAGMENT: + for e in optimized_flatten(event[1]): + test(e, namespaces, variables, + updateonly=True) + if event[0] is START: depth = 1 while depth > 0: - subevent = stream.next() + subevent = ss[-1].next() if subevent[0] is START: depth += 1 elif subevent[0] is END: depth -= 1 yield subevent - test(subevent, namespaces, variables, - updateonly=True) + if event[0] is OPTIMIZED_FRAGMENT: + for e in optimized_flatten(event[1]): + test(e, namespaces, variables, + updateonly=True) + else: + test(subevent, namespaces, variables, + updateonly=True) elif result: yield result return Stream(_generate(), diff --git a/genshi/template/base.py b/genshi/template/base.py --- a/genshi/template/base.py +++ b/genshi/template/base.py @@ -25,6 +25,7 @@ from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure from genshi.input import ParseError +from genshi.optimization import OPTIMIZATION_POSSIBILITY, OPTIMIZED_FRAGMENT, Optimizer, OptimizedFragment __all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError', 'TemplateSyntaxError', 'BadDirectiveError'] @@ -343,6 +344,7 @@ _number_conv = unicode # function used to convert numbers to event data def __init__(self, source, filepath=None, filename=None, loader=None, + optimizer=None, serializer = None, encoding=None, lookup='strict', allow_exec=True): """Initialize a template from either a string, a file-like object, or an already parsed markup stream. @@ -378,6 +380,14 @@ except ParseError, e: raise TemplateSyntaxError(e.msg, self.filepath, e.lineno, e.offset) + self.optimizer = optimizer + from genshi.output import get_serializer + if serializer is None: + self.serializerObject = get_serializer(self.serializer) + else: + self.serializerObject = get_serializer(serializer) + + def __getstate__(self): state = self.__dict__.copy() state['filters'] = [] @@ -391,7 +401,7 @@ return '<%s "%s">' % (self.__class__.__name__, self.filename) def _init_filters(self): - self.filters = [self._flatten, self._eval, self._exec] + self.filters = [self._flatten, self._optimize, self._eval, self._exec] if self.loader: self.filters.append(self._include) @@ -484,7 +494,7 @@ stream = self.stream for filter_ in self.filters: stream = filter_(iter(stream), ctxt, **vars) - return Stream(stream, self.serializer) + return Stream(stream, self.serializerObject) def _eval(self, stream, ctxt, **vars): """Internal stream filter that evaluates any expressions in `START` and @@ -591,6 +601,32 @@ else: yield event + def _optimize(self, stream, ctx, **vars): + """Changes optimization possibilities into optimized fragments""" + for event in stream: + if event[0] is OPTIMIZATION_POSSIBILITY: + substream, subvars, subid = event[1] + + # don't want match here, match should work after all optimizations + filters = (self._flatten, self._optimize, + self._eval, self._exec) + for filter_ in filters: + substream = filter_(substream, ctx, **vars) + if self.optimizer is None: + for subevent in substream: + yield subevent + elif len(subvars) == 0: + fragmentId = self.optimizer.get_fragment_id(subid, + *subvars) + ret = OptimizedFragment(substream, self.optimizer, + fragmentId) + yield (OPTIMIZED_FRAGMENT, ret, event[2],) + else: + for subevent in self._optimize(substream, ctx, **vars): + yield subevent + else: + yield event + EXEC = Template.EXEC EXPR = Template.EXPR diff --git a/genshi/template/directives.py b/genshi/template/directives.py --- a/genshi/template/directives.py +++ b/genshi/template/directives.py @@ -25,9 +25,12 @@ EXPR, _apply_directives, _eval_expr, \ _exec_suite from genshi.template.eval import Expression, ExpressionASTTransformer, _parse +from genshi.optimization import OPTIMIZATION_POSSIBILITY +#TODO SOC: after choosing name repair line breaks __all__ = ['AttrsDirective', 'ChooseDirective', 'ContentDirective', 'DefDirective', 'ForDirective', 'IfDirective', 'MatchDirective', + 'OptimizeDirective', 'OtherwiseDirective', 'ReplaceDirective', 'StripDirective', 'WhenDirective', 'WithDirective'] __docformat__ = 'restructuredtext en' @@ -463,6 +466,29 @@ def __repr__(self): return '<%s "%s">' % (self.__class__.__name__, self.path.source) +class OptimizeDirective(Directive): + """Implementation of the ``py:optimize`` template directive. + + This directive marks subtree in stream as optimization safe and depending + only on variables which names are given in argument. + """ + + def __init__(self, value, template=None, namespaces=None, lineno=-1, + offset=-1): + self.vars = filter(None, (x.strip() for x in value.split(","))) + + def attach(cls, template, stream, value, namespaces, pos): + if type(value) is dict: + value = value.get('vars') + return super(OptimizeDirective, cls).attach(template, stream, value, + namespaces, pos) + attach = classmethod(attach) + + def __call__(self, stream, directives, ctxt, **vars): + stream = _apply_directives(stream, directives, ctxt, **vars) + #TODO SOC: Should we really return id as identifier + return ((OPTIMIZATION_POSSIBILITY, (stream, self.vars, id(self)), + (None, -1, -1),),) class ReplaceDirective(Directive): """Implementation of the ``py:replace`` template directive. diff --git a/genshi/template/markup.py b/genshi/template/markup.py --- a/genshi/template/markup.py +++ b/genshi/template/markup.py @@ -25,6 +25,7 @@ from genshi.template.interpolation import interpolate from genshi.template.directives import * from genshi.template.text import NewTextTemplate +from genshi.optimization import OPTIMIZED_FRAGMENT __all__ = ['MarkupTemplate'] __docformat__ = 'restructuredtext en' @@ -53,6 +54,7 @@ ('if', IfDirective), ('choose', ChooseDirective), ('with', WithDirective), + ('optimize', OptimizeDirective), ('replace', ReplaceDirective), ('content', ContentDirective), ('attrs', AttrsDirective), @@ -221,7 +223,7 @@ assert len(streams) == 1 return streams[0] - def _match(self, stream, ctxt, match_templates=None, **vars): + def _match(self, stream, ctxt, match_templates=None, _matched=None, **vars): """Internal stream filter that applies any defined match templates to the stream. """ @@ -245,6 +247,18 @@ for event in stream: + if event[0] is OPTIMIZED_FRAGMENT: + substream = iter(event[1].process_stream()) + matched = [False] + substream = self._match(substream, ctxt, match_templates, matched) + substream = list(substream) + if not matched[0]: + yield event + else: + for subevent in substream: + yield subevent + continue + # We (currently) only care about start and end events for matching # We might care about namespace events in the future, though if not match_templates or (event[0] is not START and @@ -256,6 +270,8 @@ in enumerate(match_templates): if test(event, namespaces, ctxt) is True: + if _matched is not None: + _matched[0] = True if 'match_once' in hints: del match_templates[idx] idx -= 1 @@ -293,7 +309,9 @@ for event in self._match( self._exec( self._eval( - self._flatten(template, ctxt, **vars), + self._optimize( + self._flatten(template, ctxt, **vars), + ctxt, **vars), ctxt, **vars), ctxt, **vars), ctxt, match_templates[idx + 1:], **vars): diff --git a/genshi/template/tests/__init__.py b/genshi/template/tests/__init__.py --- a/genshi/template/tests/__init__.py +++ b/genshi/template/tests/__init__.py @@ -16,13 +16,15 @@ def suite(): from genshi.template.tests import base, directives, eval, interpolation, \ - loader, markup, plugin, text + loader, optimization, markup, plugin, \ + text suite = unittest.TestSuite() suite.addTest(base.suite()) suite.addTest(directives.suite()) suite.addTest(eval.suite()) suite.addTest(interpolation.suite()) suite.addTest(loader.suite()) + suite.addTest(optimization.suite()) suite.addTest(markup.suite()) suite.addTest(plugin.suite()) suite.addTest(text.suite()) diff --git a/genshi/template/tests/optimization.py b/genshi/template/tests/optimization.py new file mode 100644 --- /dev/null +++ b/genshi/template/tests/optimization.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +import doctest +import unittest +import sys + +from genshi.core import Attrs, Stream, QName +from genshi.input import HTML, XML +from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \ + HTMLSerializer, EmptyTagFilter +from genshi.optimization import Optimizer, OptimizedFragment, OPTIMIZED_FRAGMENT + +from genshi.template.base import BadDirectiveError, TemplateSyntaxError +from genshi.template.markup import MarkupTemplate + + +def _unopt(code): + return code.replace(' py:optimize=""', '') + +class OptimizedTemplatesTestCase(unittest.TestCase): + + def _test_doc(self, doc, serializer='xml'): + unopt = MarkupTemplate(XML(_unopt(doc)), serializer=serializer) + optimizer = Optimizer(10) + opt = MarkupTemplate(XML(doc), serializer=serializer, + optimizer=optimizer) + result = unopt.generate().render() + #non-cached one + self.assertEqual(opt.generate().render(), result) + #cached one + self.assertEqual(opt.generate().render(), result) + def test_double_match(self): + code = """\ + + + + ${select('.')} + + + Some text that could be optimized. + + + + + ${select('.')} + + + + + Foo bar bar + + + +""" + self._test_doc(code) + + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(OptimizedTemplatesTestCase, 'test')) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/genshi/tests/__init__.py b/genshi/tests/__init__.py --- a/genshi/tests/__init__.py +++ b/genshi/tests/__init__.py @@ -15,7 +15,8 @@ def suite(): import genshi - from genshi.tests import builder, core, input, output, path, util + from genshi.tests import builder, core, input, optimization, \ + output, path, util from genshi.filters import tests as filters from genshi.template import tests as template @@ -25,6 +26,7 @@ suite.addTest(filters.suite()) suite.addTest(input.suite()) suite.addTest(output.suite()) + suite.addTest(optimization.suite()) suite.addTest(path.suite()) suite.addTest(template.suite()) suite.addTest(util.suite()) diff --git a/genshi/tests/optimization.py b/genshi/tests/optimization.py new file mode 100644 --- /dev/null +++ b/genshi/tests/optimization.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2006-2008 Edgewall Software +# All rights reserved. +# +# This software is licensed as described in the file COPYING, which +# you should have received as part of this distribution. The terms +# are also available at http://genshi.edgewall.org/wiki/License. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at http://genshi.edgewall.org/log/. + +import doctest +import unittest +import sys + +from genshi.core import Attrs, Stream, QName +from genshi.input import HTML, XML +from genshi.output import DocType, XMLSerializer, XHTMLSerializer, \ + HTMLSerializer, EmptyTagFilter +from genshi.optimization import Optimizer, OptimizedFragment, OPTIMIZED_FRAGMENT + +class FiltersOptimizationTestCase(unittest.TestCase): + + def _inner_helper(self, istream): + optimizer = Optimizer(10) + of = OptimizedFragment(istream, optimizer, 1, 0) + stream = Stream([(OPTIMIZED_FRAGMENT, of, (None, -1, -1))]) + return stream + + def _test_doc(self, doc, serializer): + istream = XML(doc) + istream = Stream(list(istream), serializer) + stream = self._inner_helper(istream) + #non-cached one + self.assertEqual(stream.render(), istream.render()) + #cached one + self.assertEqual(stream.render(), istream.render()) + + test_doc = """\ +
+ + Hello world + + +
+ Hello everyone! +
+ + And you too! + +
+""" + def test_xml_serializer(self): + self._test_doc(self.test_doc, XMLSerializer()) + def test_xhtml_serializer(self): + self._test_doc(self.test_doc, XHTMLSerializer()) + def test_html_serializer(self): + self._test_doc(self.test_doc, HTMLSerializer()) + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(FiltersOptimizationTestCase, 'test')) + return suite + +if __name__ == '__main__': + unittest.main(defaultTest='suite')