Context Navigation

jslexer.py

リビジョン 3, 5.5 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# -- coding: utf-8 --
2	#
3	# Copyright (C) 2008 Edgewall Software
4	# All rights reserved.
5	#
6	# This software is licensed as described in the file COPYING, which
7	# you should have received as part of this distribution. The terms
8	# are also available at http://babel.edgewall.org/wiki/License.
9	#
10	# This software consists of voluntary contributions made by many
11	# individuals. For the exact contribution history, see the revision
12	# history and logs, available at http://babel.edgewall.org/log/.
13
14	"""A simple JavaScript 1.5 lexer which is used for the JavaScript
15	extractor.
16	"""
17
18	import re
19	from operator import itemgetter
20
21
22	operators = [
23	'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
24	'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
25	'>>>=', '&', '&=', '\|', '\|=', '&&', '\|\|', '^', '^=', '(', ')',
26	'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
27	]
28	operators.sort(lambda a, b: cmp(-len(a), -len(b)))
29
30	escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
31
32	rules = [
33	(None, re.compile(r'\s+(?u)')),
34	(None, re.compile(r'<!--.*')),
35	('linecomment', re.compile(r'//.*')),
36	('multilinecomment', re.compile(r'/\.?\*/(?us)')),
37	('name', re.compile(r'(\$+\w\|[^\W\d]\w)(?u)')),
38	('number', re.compile(r'''(?x)(
39	(?:0\|[1-9]\d*)
40	(\.\d+)?
41	([eE][-+]?\d+)? \|
42	(0x[a-fA-F0-9]+)
43	)''')),
44	('operator', re.compile(r'(%s)' % '\|'.join(map(re.escape, operators)))),
45	('string', re.compile(r'''(?xs)(
46	'(?:[^'\\](?:\\.[^'\\])*)' \|
47	"(?:[^"\\](?:\\.[^"\\])*)"
48	)'''))
49	]
50
51	division_re = re.compile(r'/=?')
52	regex_re = re.compile(r'/(?:[^/\\](?:\\.[^/\\]))/[a-zA-Z](?s)')
53	line_re = re.compile(r'(\r\n\|\n\|\r)')
54	line_join_re = re.compile(r'\\' + line_re.pattern)
55	uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
56
57
58	class Token(tuple):
59	"""Represents a token as returned by `tokenize`."""
60	__slots__ = ()
61
62	def __new__(cls, type, value, lineno):
63	return tuple.__new__(cls, (type, value, lineno))
64
65	type = property(itemgetter(0))
66	value = property(itemgetter(1))
67	lineno = property(itemgetter(2))
68
69
70	def indicates_division(token):
71	"""A helper function that helps the tokenizer to decide if the current
72	token may be followed by a division operator.
73	"""
74	if token.type == 'operator':
75	return token.value in (')', ']', '}', '++', '--')
76	return token.type in ('name', 'number', 'string', 'regexp')
77
78
79	def unquote_string(string):
80	"""Unquote a string with JavaScript rules. The string has to start with
81	string delimiters (``'`` or ``"``.)
82
83	:return: a string
84	"""
85	assert string and string[0] == string[-1] and string[0] in '"\'', \
86	'string provided is not properly delimited'
87	string = line_join_re.sub('\\1', string[1:-1])
88	result = []
89	add = result.append
90	pos = 0
91
92	while 1:
93	# scan for the next escape
94	escape_pos = string.find('\\', pos)
95	if escape_pos < 0:
96	break
97	add(string[pos:escape_pos])
98
99	# check which character is escaped
100	next_char = string[escape_pos + 1]
101	if next_char in escapes:
102	add(escapes[next_char])
103
104	# unicode escapes. trie to consume up to four characters of
105	# hexadecimal characters and try to interpret them as unicode
106	# character point. If there is no such character point, put
107	# all the consumed characters into the string.
108	elif next_char in 'uU':
109	escaped = uni_escape_re.match(string, escape_pos + 2)
110	if escaped is not None:
111	escaped_value = escaped.group()
112	if len(escaped_value) == 4:
113	try:
114	add(unichr(int(escaped_value, 16)))
115	except ValueError:
116	pass
117	else:
118	pos = escape_pos + 6
119	continue
120	add(next_char + escaped_value)
121	pos = escaped.end()
122	continue
123	else:
124	add(next_char)
125
126	# bogus escape. Just remove the backslash.
127	else:
128	add(next_char)
129	pos = escape_pos + 2
130
131	if pos < len(string):
132	add(string[pos:])
133
134	return u''.join(result)
135
136
137	def tokenize(source):
138	"""Tokenize a JavaScript source.
139
140	:return: generator of `Token`\s
141	"""
142	may_divide = False
143	pos = 0
144	lineno = 1
145	end = len(source)
146
147	while pos < end:
148	# handle regular rules first
149	for token_type, rule in rules:
150	match = rule.match(source, pos)
151	if match is not None:
152	break
153	# if we don't have a match we don't give up yet, but check for
154	# division operators or regular expression literals, based on
155	# the status of `may_divide` which is determined by the last
156	# processed non-whitespace token using `indicates_division`.
157	else:
158	if may_divide:
159	match = division_re.match(source, pos)
160	token_type = 'operator'
161	else:
162	match = regex_re.match(source, pos)
163	token_type = 'regexp'
164	if match is None:
165	# woops. invalid syntax. jump one char ahead and try again.
166	pos += 1
167	continue
168
169	token_value = match.group()
170	if token_type is not None:
171	token = Token(token_type, token_value, lineno)
172	may_divide = indicates_division(token)
173	yield token
174	lineno += len(line_re.findall(token_value))
175	pos = match.end()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/jslexer.py

異なるフォーマットでダウンロード: