aronacher@339: # -*- coding: utf-8 -*- aronacher@339: # aronacher@339: # Copyright (C) 2008 Edgewall Software aronacher@339: # All rights reserved. aronacher@339: # aronacher@339: # This software is licensed as described in the file COPYING, which aronacher@339: # you should have received as part of this distribution. The terms aronacher@339: # are also available at http://babel.edgewall.org/wiki/License. aronacher@339: # aronacher@339: # This software consists of voluntary contributions made by many aronacher@339: # individuals. For the exact contribution history, see the revision aronacher@339: # history and logs, available at http://babel.edgewall.org/log/. aronacher@339: aronacher@339: """A simple JavaScript 1.5 lexer which is used for the JavaScript aronacher@339: extractor. aronacher@339: """ aronacher@339: jruigrok@527: from operator import itemgetter aronacher@339: import re pjenvey@414: aronacher@339: operators = [ aronacher@339: '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=', aronacher@339: '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=', aronacher@339: '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')', aronacher@341: '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':' aronacher@339: ] aronacher@339: operators.sort(lambda a, b: cmp(-len(a), -len(b))) aronacher@339: aronacher@339: escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'} aronacher@339: aronacher@339: rules = [ aronacher@339: (None, re.compile(r'\s+(?u)')), aronacher@339: (None, re.compile(r'