root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/jslexer.py

リビジョン 3, 5.5 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2008 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://babel.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://babel.edgewall.org/log/.
13
14"""A simple JavaScript 1.5 lexer which is used for the JavaScript
15extractor.
16"""
17
18import re
19from operator import itemgetter
20
21
22operators = [
23    '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
24    '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
25    '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
26    '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
27]
28operators.sort(lambda a, b: cmp(-len(a), -len(b)))
29
30escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
31
32rules = [
33    (None, re.compile(r'\s+(?u)')),
34    (None, re.compile(r'<!--.*')),
35    ('linecomment', re.compile(r'//.*')),
36    ('multilinecomment', re.compile(r'/\*.*?\*/(?us)')),
37    ('name', re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')),
38    ('number', re.compile(r'''(?x)(
39        (?:0|[1-9]\d*)
40        (\.\d+)?
41        ([eE][-+]?\d+)? |
42        (0x[a-fA-F0-9]+)
43    )''')),
44    ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
45    ('string', re.compile(r'''(?xs)(
46        '(?:[^'\\]*(?:\\.[^'\\]*)*)'  |
47        "(?:[^"\\]*(?:\\.[^"\\]*)*)"
48    )'''))
49]
50
51division_re = re.compile(r'/=?')
52regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)')
53line_re = re.compile(r'(\r\n|\n|\r)')
54line_join_re = re.compile(r'\\' + line_re.pattern)
55uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
56
57
58class Token(tuple):
59    """Represents a token as returned by `tokenize`."""
60    __slots__ = ()
61
62    def __new__(cls, type, value, lineno):
63        return tuple.__new__(cls, (type, value, lineno))
64
65    type = property(itemgetter(0))
66    value = property(itemgetter(1))
67    lineno = property(itemgetter(2))
68
69
70def indicates_division(token):
71    """A helper function that helps the tokenizer to decide if the current
72    token may be followed by a division operator.
73    """
74    if token.type == 'operator':
75        return token.value in (')', ']', '}', '++', '--')
76    return token.type in ('name', 'number', 'string', 'regexp')
77
78
79def unquote_string(string):
80    """Unquote a string with JavaScript rules.  The string has to start with
81    string delimiters (``'`` or ``"``.)
82
83    :return: a string
84    """
85    assert string and string[0] == string[-1] and string[0] in '"\'', \
86        'string provided is not properly delimited'
87    string = line_join_re.sub('\\1', string[1:-1])
88    result = []
89    add = result.append
90    pos = 0
91
92    while 1:
93        # scan for the next escape
94        escape_pos = string.find('\\', pos)
95        if escape_pos < 0:
96            break
97        add(string[pos:escape_pos])
98
99        # check which character is escaped
100        next_char = string[escape_pos + 1]
101        if next_char in escapes:
102            add(escapes[next_char])
103
104        # unicode escapes.  trie to consume up to four characters of
105        # hexadecimal characters and try to interpret them as unicode
106        # character point.  If there is no such character point, put
107        # all the consumed characters into the string.
108        elif next_char in 'uU':
109            escaped = uni_escape_re.match(string, escape_pos + 2)
110            if escaped is not None:
111                escaped_value = escaped.group()
112                if len(escaped_value) == 4:
113                    try:
114                        add(unichr(int(escaped_value, 16)))
115                    except ValueError:
116                        pass
117                    else:
118                        pos = escape_pos + 6
119                        continue
120                add(next_char + escaped_value)
121                pos = escaped.end()
122                continue
123            else:
124                add(next_char)
125
126        # bogus escape.  Just remove the backslash.
127        else:
128            add(next_char)
129        pos = escape_pos + 2
130
131    if pos < len(string):
132        add(string[pos:])
133
134    return u''.join(result)
135
136
137def tokenize(source):
138    """Tokenize a JavaScript source.
139
140    :return: generator of `Token`\s
141    """
142    may_divide = False
143    pos = 0
144    lineno = 1
145    end = len(source)
146
147    while pos < end:
148        # handle regular rules first
149        for token_type, rule in rules:
150            match = rule.match(source, pos)
151            if match is not None:
152                break
153        # if we don't have a match we don't give up yet, but check for
154        # division operators or regular expression literals, based on
155        # the status of `may_divide` which is determined by the last
156        # processed non-whitespace token using `indicates_division`.
157        else:
158            if may_divide:
159                match = division_re.match(source, pos)
160                token_type = 'operator'
161            else:
162                match = regex_re.match(source, pos)
163                token_type = 'regexp'
164            if match is None:
165                # woops. invalid syntax. jump one char ahead and try again.
166                pos += 1
167                continue
168
169        token_value = match.group()
170        if token_type is not None:
171            token = Token(token_type, token_value, lineno)
172            may_divide = indicates_division(token)
173            yield token
174        lineno += len(line_re.findall(token_value))
175        pos = match.end()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。