Context Navigation

lexer.py @ 3

リビジョン 3, 13.3 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	# lexer.py
	2	# Copyright (C) 2006, 2007, 2008, 2009 Michael Bayer mike_mp@zzzcomputing.com
	3	#
	4	# This module is part of Mako and is released under
	5	# the MIT License: http://www.opensource.org/licenses/mit-license.php
	6
	7	"""provides the Lexer class for parsing template strings into parse trees."""
	8
	9	import re, codecs
	10	from mako import parsetree, exceptions
	11	from mako.pygen import adjust_whitespace
	12
	13	_regexp_cache = {}
	14
	15	class Lexer(object):
	16	def __init__(self, text, filename=None, disable_unicode=False, input_encoding=None, preprocessor=None):
	17	self.text = text
	18	self.filename = filename
	19	self.template = parsetree.TemplateNode(self.filename)
	20	self.matched_lineno = 1
	21	self.matched_charpos = 0
	22	self.lineno = 1
	23	self.match_position = 0
	24	self.tag = []
	25	self.control_line = []
	26	self.disable_unicode = disable_unicode
	27	self.encoding = input_encoding
	28	if preprocessor is None:
	29	self.preprocessor = []
	30	elif not hasattr(preprocessor, '__iter__'):
	31	self.preprocessor = [preprocessor]
	32	else:
	33	self.preprocessor = preprocessor
	34
	35	exception_kwargs = property(lambda self:{'source':self.text, 'lineno':self.matched_lineno, 'pos':self.matched_charpos, 'filename':self.filename})
	36
	37	def match(self, regexp, flags=None):
	38	"""match the given regular expression string and flags to the current text position.
	39
	40	if a match occurs, update the current text and line position."""
	41	mp = self.match_position
	42	try:
	43	reg = _regexp_cache[(regexp, flags)]
	44	except KeyError:
	45	if flags:
	46	reg = re.compile(regexp, flags)
	47	else:
	48	reg = re.compile(regexp)
	49	_regexp_cache[(regexp, flags)] = reg
	50
	51	match = reg.match(self.text, self.match_position)
	52	if match:
	53	(start, end) = match.span()
	54	if end == start:
	55	self.match_position = end + 1
	56	else:
	57	self.match_position = end
	58	self.matched_lineno = self.lineno
	59	lines = re.findall(r"\n", self.text[mp:self.match_position])
	60	cp = mp - 1
	61	while (cp >= 0 and cp<self.textlength and self.text[cp] != '\n'):
	62	cp -=1
	63	self.matched_charpos = mp - cp
	64	self.lineno += len(lines)
	65	#print "MATCHED:", match.group(0), "LINE START:", self.matched_lineno, "LINE END:", self.lineno
	66	#print "MATCH:", regexp, "\n", self.text[mp : mp + 15], (match and "TRUE" or "FALSE")
	67	return match
	68
	69	def parse_until_text(self, *text):
	70	startpos = self.match_position
	71	while True:
	72	match = self.match(r'#.*\n')
	73	if match:
	74	continue
	75	match = self.match(r'(\"\"\"\|\'\'\'\|\"\|\')')
	76	if match:
	77	m = self.match(r'.*?%s' % match.group(1), re.S)
	78	if not m:
	79	raise exceptions.SyntaxException("Unmatched '%s'" % match.group(1), **self.exception_kwargs)
	80	else:
	81	match = self.match(r'(%s)' % r'\|'.join(text))
	82	if match:
	83	return (self.text[startpos:self.match_position-len(match.group(1))], match.group(1))
	84	else:
	85	match = self.match(r".*?(?=\"\|\'\|#\|%s)" % r'\|'.join(text), re.S)
	86	if not match:
	87	raise exceptions.SyntaxException("Expected: %s" % ','.join(text), **self.exception_kwargs)
	88
	89	def append_node(self, nodecls, args, *kwargs):
	90	kwargs.setdefault('source', self.text)
	91	kwargs.setdefault('lineno', self.matched_lineno)
	92	kwargs.setdefault('pos', self.matched_charpos)
	93	kwargs['filename'] = self.filename
	94	node = nodecls(args, *kwargs)
	95	if len(self.tag):
	96	self.tag[-1].nodes.append(node)
	97	else:
	98	self.template.nodes.append(node)
	99	if isinstance(node, parsetree.Tag):
	100	if len(self.tag):
	101	node.parent = self.tag[-1]
	102	self.tag.append(node)
	103	elif isinstance(node, parsetree.ControlLine):
	104	if node.isend:
	105	self.control_line.pop()
	106	elif node.is_primary:
	107	self.control_line.append(node)
	108	elif len(self.control_line) and not self.control_line[-1].is_ternary(node.keyword):
	109	raise exceptions.SyntaxException("Keyword '%s' not a legal ternary for keyword '%s'" % (node.keyword, self.control_line[-1].keyword), **self.exception_kwargs)
	110
	111	def escape_code(self, text):
	112	if not self.disable_unicode and self.encoding:
	113	return text.encode('ascii', 'backslashreplace')
	114	else:
	115	return text
	116
	117	def parse(self):
	118	for preproc in self.preprocessor:
	119	self.text = preproc(self.text)
	120	if not isinstance(self.text, unicode) and self.text.startswith(codecs.BOM_UTF8):
	121	self.text = self.text[len(codecs.BOM_UTF8):]
	122	parsed_encoding = 'utf-8'
	123	me = self.match_encoding()
	124	if me is not None and me != 'utf-8':
	125	raise exceptions.CompileException("Found utf-8 BOM in file, with conflicting magic encoding comment of '%s'" % me, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
	126	else:
	127	parsed_encoding = self.match_encoding()
	128	if parsed_encoding:
	129	self.encoding = parsed_encoding
	130	if not self.disable_unicode and not isinstance(self.text, unicode):
	131	if self.encoding:
	132	try:
	133	self.text = self.text.decode(self.encoding)
	134	except UnicodeDecodeError, e:
	135	raise exceptions.CompileException("Unicode decode operation of encoding '%s' failed" % self.encoding, self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
	136	else:
	137	try:
	138	self.text = self.text.decode()
	139	except UnicodeDecodeError, e:
	140	raise exceptions.CompileException("Could not read template using encoding of 'ascii'. Did you forget a magic encoding comment?", self.text.decode('utf-8', 'ignore'), 0, 0, self.filename)
	141
	142	self.textlength = len(self.text)
	143
	144	while (True):
	145	if self.match_position > self.textlength:
	146	break
	147
	148	if self.match_end():
	149	break
	150	if self.match_expression():
	151	continue
	152	if self.match_control_line():
	153	continue
	154	if self.match_comment():
	155	continue
	156	if self.match_tag_start():
	157	continue
	158	if self.match_tag_end():
	159	continue
	160	if self.match_python_block():
	161	continue
	162	if self.match_text():
	163	continue
	164
	165	if self.match_position > self.textlength:
	166	break
	167	raise exceptions.CompileException("assertion failed")
	168
	169	if len(self.tag):
	170	raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs)
	171	if len(self.control_line):
	172	raise exceptions.SyntaxException("Unterminated control keyword: '%s'" % self.control_line[-1].keyword, self.text, self.control_line[-1].lineno, self.control_line[-1].pos, self.filename)
	173	return self.template
	174
	175	def match_encoding(self):
	176	match = self.match(r'#.coding[:=]\s([-\w.]+).*\r?\n')
	177	if match:
	178	return match.group(1)
	179	else:
	180	return None
	181
	182	def match_tag_start(self):
	183	match = self.match(r'''
	184	\<% # opening tag
	185
	186	([\w\.\:]+) # keyword
	187
	188	((?:\s+\w+\|\s=\s\|".?"\|'.?')*) # attrname, = sign, string expression
	189
	190	\s* # more whitespace
	191
	192	(/)?> # closing
	193
	194	''',
	195
	196	re.I \| re.S \| re.X)
	197
	198	if match:
	199	(keyword, attr, isend) = (match.group(1), match.group(2), match.group(3))
	200	self.keyword = keyword
	201	attributes = {}
	202	if attr:
	203	for att in re.findall(r"\s(\w+)\s=\s(?:'([^'])'\|\"([^\"]*)\")", attr):
	204	(key, val1, val2) = att
	205	text = val1 or val2
	206	text = text.replace('\r\n', '\n')
	207	attributes[key] = self.escape_code(text)
	208	self.append_node(parsetree.Tag, keyword, attributes)
	209	if isend:
	210	self.tag.pop()
	211	else:
	212	if keyword == 'text':
	213	match = self.match(r'(.*?)(?=\</%text>)', re.S)
	214	if not match:
	215	raise exceptions.SyntaxException("Unclosed tag: <%%%s>" % self.tag[-1].keyword, **self.exception_kwargs)
	216	self.append_node(parsetree.Text, match.group(1))
	217	return self.match_tag_end()
	218	return True
	219	else:
	220	return False
	221
	222	def match_tag_end(self):
	223	match = self.match(r'\</%[\t ](.+?)[\t ]>')
	224	if match:
	225	if not len(self.tag):
	226	raise exceptions.SyntaxException("Closing tag without opening tag: </%%%s>" % match.group(1), **self.exception_kwargs)
	227	elif self.tag[-1].keyword != match.group(1):
	228	raise exceptions.SyntaxException("Closing tag </%%%s> does not match tag: <%%%s>" % (match.group(1), self.tag[-1].keyword), **self.exception_kwargs)
	229	self.tag.pop()
	230	return True
	231	else:
	232	return False
	233
	234	def match_end(self):
	235	match = self.match(r'\Z', re.S)
	236	if match:
	237	string = match.group()
	238	if string:
	239	return string
	240	else:
	241	return True
	242	else:
	243	return False
	244
	245	def match_text(self):
	246	match = self.match(r"""
	247	(.*?) # anything, followed by:
	248	(
	249	(?<=\n)(?=[ \t]*(?=%\|\#\#)) # an eval or line-based comment preceded by a consumed \n and whitespace
	250	\|
	251	(?=\${) # an expression
	252	\|
	253	(?=\#\*) # multiline comment
	254	\|
	255	(?=</?[%&]) # a substitution or block or call start or end
	256	# - don't consume
	257	\|
	258	(\\\r?\n) # an escaped newline - throw away
	259	\|
	260	\Z # end of string
	261	)""", re.X \| re.S)
	262
	263	if match:
	264	text = match.group(1)
	265	self.append_node(parsetree.Text, text)
	266	return True
	267	else:
	268	return False
	269
	270	def match_python_block(self):
	271	match = self.match(r"<%(!)?")
	272	if match:
	273	(line, pos) = (self.matched_lineno, self.matched_charpos)
	274	(text, end) = self.parse_until_text(r'%>')
	275	text = adjust_whitespace(text) + "\n" # the trailing newline helps compiler.parse() not complain about indentation
	276	self.append_node(parsetree.Code, self.escape_code(text), match.group(1)=='!', lineno=line, pos=pos)
	277	return True
	278	else:
	279	return False
	280
	281	def match_expression(self):
	282	match = self.match(r"\${")
	283	if match:
	284	(line, pos) = (self.matched_lineno, self.matched_charpos)
	285	(text, end) = self.parse_until_text(r'\\|', r'}')
	286	if end == '\|':
	287	(escapes, end) = self.parse_until_text(r'}')
	288	else:
	289	escapes = ""
	290	text = text.replace('\r\n', '\n')
	291	self.append_node(parsetree.Expression, self.escape_code(text), escapes.strip(), lineno=line, pos=pos)
	292	return True
	293	else:
	294	return False
	295
	296	def match_control_line(self):
	297	match = self.match(r"(?<=^)[\t ](%\|##)[\t ]((?:(?:\\r?\n)\|[^\r\n])*)(?:\r?\n\|\Z)", re.M)
	298	if match:
	299	operator = match.group(1)
	300	text = match.group(2)
	301	if operator == '%':
	302	m2 = re.match(r'(end)?(\w+)\s(.)', text)
	303	if not m2:
	304	raise exceptions.SyntaxException("Invalid control line: '%s'" % text, **self.exception_kwargs)
	305	(isend, keyword) = m2.group(1, 2)
	306	isend = (isend is not None)
	307
	308	if isend:
	309	if not len(self.control_line):
	310	raise exceptions.SyntaxException("No starting keyword '%s' for '%s'" % (keyword, text), **self.exception_kwargs)
	311	elif self.control_line[-1].keyword != keyword:
	312	raise exceptions.SyntaxException("Keyword '%s' doesn't match keyword '%s'" % (text, self.control_line[-1].keyword), **self.exception_kwargs)
	313	self.append_node(parsetree.ControlLine, keyword, isend, self.escape_code(text))
	314	else:
	315	self.append_node(parsetree.Comment, text)
	316	return True
	317	else:
	318	return False
	319
	320	def match_comment(self):
	321	"""matches the multiline version of a comment"""
	322	match = self.match(r"<%doc>(.*?)</%doc>", re.S)
	323	if match:
	324	self.append_node(parsetree.Comment, match.group(1))
	325	return True
	326	else:
	327	return False
	328

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/Mako-0.2.5-py2.6.egg/mako/lexer.py @ 3

異なるフォーマットでダウンロード: