Context Navigation

pyparsing.py @ 3

リビジョン 3, 144.1 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# module pyparsing.py
2	#
3	# Copyright (c) 2003-2008 Paul T. McGuire
4	#
5	# Permission is hereby granted, free of charge, to any person obtaining
6	# a copy of this software and associated documentation files (the
7	# "Software"), to deal in the Software without restriction, including
8	# without limitation the rights to use, copy, modify, merge, publish,
9	# distribute, sublicense, and/or sell copies of the Software, and to
10	# permit persons to whom the Software is furnished to do so, subject to
11	# the following conditions:
12	#
13	# The above copyright notice and this permission notice shall be
14	# included in all copies or substantial portions of the Software.
15	#
16	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19	# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20	# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21	# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22	# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23	#
24	#from __future__ import generators
25
26	__doc__ = \
27	"""
28	pyparsing module - Classes and methods to define and execute parsing grammars
29
30	The pyparsing module is an alternative approach to creating and executing simple grammars,
31	vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32	don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33	provides a library of classes that you use to construct the grammar directly in Python.
34
35	Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37	from pyparsing import Word, alphas
38
39	# define grammar of a greeting
40	greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42	hello = "Hello, World!"
43	print hello, "->", greet.parseString( hello )
44
45	The program outputs the following::
46
47	Hello, World! -> ['Hello', ',', 'World', '!']
48
49	The Python representation of the grammar is quite readable, owing to the self-explanatory
50	class names, and the use of '+', '\|' and '^' operators.
51
52	The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53	object with named attributes.
54
55	The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56	- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57	- quoted strings
58	- embedded comments
59	"""
60
61	__version__ = "1.5.0"
62	__versionTime__ = "28 May 2008 10:05"
63	__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65	import string
66	from weakref import ref as wkref
67	import copy,sys
68	import warnings
69	import re
70	import sre_constants
71	import xml.sax.saxutils
72	#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74	__all__ = [
75	'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76	'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77	'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78	'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79	'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80	'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81	'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82	'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83	'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84	'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85	'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86	'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87	'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88	'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89	'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90	'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91	'indentedBlock',
92	]
93
94
95	"""
96	Detect if we are running version 3.X and make appropriate changes
97	Robert A. Clark
98	"""
99	if sys.version_info[0] > 2:
100	_PY3K = True
101	_MAX_INT = sys.maxsize
102	basestring = str
103	else:
104	_PY3K = False
105	_MAX_INT = sys.maxint
106
107	if not _PY3K:
108	def _ustr(obj):
109	"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110	str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111	then < returns the unicode object \| encodes it with the default encoding \| ... >.
112	"""
113	try:
114	# If this works, then _ustr(obj) has the same behaviour as str(obj), so
115	# it won't break any existing code.
116	return str(obj)
117
118	except UnicodeEncodeError:
119	# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
120	# state that "The return value must be a string object". However, does a
121	# unicode object (being a subclass of basestring) count as a "string
122	# object"?
123	# If so, then return a unicode object:
124	return unicode(obj)
125	# Else encode it... but how? There are many choices... :)
126	# Replace unprintables with escape codes?
127	#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
128	# Replace unprintables with question marks?
129	#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
130	# ...
131	else:
132	_ustr = str
133
134	def _str2dict(strg):
135	return dict( [(c,0) for c in strg] )
136	#~ return set( [c for c in strg] )
137
138	class _Constants(object):
139	pass
140
141	if not _PY3K:
142	alphas = string.lowercase + string.uppercase
143	else:
144	alphas = string.ascii_lowercase + string.ascii_uppercase
145	nums = string.digits
146	hexnums = nums + "ABCDEFabcdef"
147	alphanums = alphas + nums
148	_bslash = "\\"
149	printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
150
151	class ParseBaseException(Exception):
152	"""base exception class for all parsing runtime exceptions"""
153	__slots__ = ( "loc","msg","pstr","parserElement" )
154	# Performance tuning: we construct a lot of these, so keep this
155	# constructor as small and fast as possible
156	def __init__( self, pstr, loc=0, msg=None, elem=None ):
157	self.loc = loc
158	if msg is None:
159	self.msg = pstr
160	self.pstr = ""
161	else:
162	self.msg = msg
163	self.pstr = pstr
164	self.parserElement = elem
165
166	def __getattr__( self, aname ):
167	"""supported attributes by name are:
168	- lineno - returns the line number of the exception text
169	- col - returns the column number of the exception text
170	- line - returns the line containing the exception text
171	"""
172	if( aname == "lineno" ):
173	return lineno( self.loc, self.pstr )
174	elif( aname in ("col", "column") ):
175	return col( self.loc, self.pstr )
176	elif( aname == "line" ):
177	return line( self.loc, self.pstr )
178	else:
179	raise AttributeError(aname)
180
181	def __str__( self ):
182	return "%s (at char %d), (line:%d, col:%d)" % \
183	( self.msg, self.loc, self.lineno, self.column )
184	def __repr__( self ):
185	return _ustr(self)
186	def markInputline( self, markerString = ">!<" ):
187	"""Extracts the exception line from the input string, and marks
188	the location of the exception with a special symbol.
189	"""
190	line_str = self.line
191	line_column = self.column - 1
192	if markerString:
193	line_str = "".join( [line_str[:line_column],
194	markerString, line_str[line_column:]])
195	return line_str.strip()
196
197	class ParseException(ParseBaseException):
198	"""exception thrown when parse expressions don't match class;
199	supported attributes by name are:
200	- lineno - returns the line number of the exception text
201	- col - returns the column number of the exception text
202	- line - returns the line containing the exception text
203	"""
204	pass
205
206	class ParseFatalException(ParseBaseException):
207	"""user-throwable exception thrown when inconsistent parse content
208	is found; stops all parsing immediately"""
209	pass
210
211	class ParseSyntaxException(ParseFatalException):
212	"""just like ParseFatalException, but thrown internally when an
213	ErrorStop indicates that parsing is to stop immediately because
214	an unbacktrackable syntax error has been found"""
215	def __init__(self, pe):
216	super(ParseSyntaxException, self).__init__(
217	pe.pstr, pe.loc, pe.msg, pe.parserElement)
218
219	#~ class ReparseException(ParseBaseException):
220	#~ """Experimental class - parse actions can raise this exception to cause
221	#~ pyparsing to reparse the input string:
222	#~ - with a modified input string, and/or
223	#~ - with a modified start location
224	#~ Set the values of the ReparseException in the constructor, and raise the
225	#~ exception in a parse action to cause pyparsing to use the new string/location.
226	#~ Setting the values as None causes no change to be made.
227	#~ """
228	#~ def __init_( self, newstring, restartLoc ):
229	#~ self.newParseText = newstring
230	#~ self.reparseLoc = restartLoc
231
232	class RecursiveGrammarException(Exception):
233	"""exception thrown by validate() if the grammar could be improperly recursive"""
234	def __init__( self, parseElementList ):
235	self.parseElementTrace = parseElementList
236
237	def __str__( self ):
238	return "RecursiveGrammarException: %s" % self.parseElementTrace
239
240	class _ParseResultsWithOffset(object):
241	def __init__(self,p1,p2):
242	self.tup = (p1,p2)
243	def __getitem__(self,i):
244	return self.tup[i]
245	def __repr__(self):
246	return repr(self.tup)
247
248	class ParseResults(object):
249	"""Structured parse results, to provide multiple means of access to the parsed data:
250	- as a list (len(results))
251	- by list index (results[0], results[1], etc.)
252	- by attribute (results.<resultsName>)
253	"""
254	__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
255	def __new__(cls, toklist, name=None, asList=True, modal=True ):
256	if isinstance(toklist, cls):
257	return toklist
258	retobj = object.__new__(cls)
259	retobj.__doinit = True
260	return retobj
261
262	# Performance tuning: we construct a lot of these, so keep this
263	# constructor as small and fast as possible
264	def __init__( self, toklist, name=None, asList=True, modal=True ):
265	if self.__doinit:
266	self.__doinit = False
267	self.__name = None
268	self.__parent = None
269	self.__accumNames = {}
270	if isinstance(toklist, list):
271	self.__toklist = toklist[:]
272	else:
273	self.__toklist = [toklist]
274	self.__tokdict = dict()
275
276	# this line is related to debugging the asXML bug
277	#~ asList = False
278
279	if name:
280	if not modal:
281	self.__accumNames[name] = 0
282	if isinstance(name,int):
283	name = _ustr(name) # will always return a str, but use _ustr for consistency
284	self.__name = name
285	if not toklist in (None,'',[]):
286	if isinstance(toklist,basestring):
287	toklist = [ toklist ]
288	if asList:
289	if isinstance(toklist,ParseResults):
290	self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
291	else:
292	self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
293	self[name].__name = name
294	else:
295	try:
296	self[name] = toklist[0]
297	except (KeyError,TypeError):
298	self[name] = toklist
299
300	def __getitem__( self, i ):
301	if isinstance( i, (int,slice) ):
302	return self.__toklist[i]
303	else:
304	if i not in self.__accumNames:
305	return self.__tokdict[i][-1][0]
306	else:
307	return ParseResults([ v[0] for v in self.__tokdict[i] ])
308
309	def __setitem__( self, k, v ):
310	if isinstance(v,_ParseResultsWithOffset):
311	self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
312	sub = v[0]
313	elif isinstance(k,int):
314	self.__toklist[k] = v
315	sub = v
316	else:
317	self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
318	sub = v
319	if isinstance(sub,ParseResults):
320	sub.__parent = wkref(self)
321
322	def __delitem__( self, i ):
323	if isinstance(i,(int,slice)):
324	mylen = len( self.__toklist )
325	del self.__toklist[i]
326
327	# convert int to slice
328	if isinstance(i, int):
329	if i < 0:
330	i += mylen
331	i = slice(i, i+1)
332	# get removed indices
333	removed = list(range(*i.indices(mylen)))
334	removed.reverse()
335	# fixup indices in token dictionary
336	for name in self.__tokdict:
337	occurrences = self.__tokdict[name]
338	for j in removed:
339	for k, (value, position) in enumerate(occurrences):
340	occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
341	else:
342	del self.__tokdict[i]
343
344	def __contains__( self, k ):
345	return k in self.__tokdict
346
347	def __len__( self ): return len( self.__toklist )
348	def __bool__(self): return len( self.__toklist ) > 0
349	__nonzero__ = __bool__
350	def __iter__( self ): return iter( self.__toklist )
351	def __reversed__( self ): return iter( reversed(self.__toklist) )
352	def keys( self ):
353	"""Returns all named result keys."""
354	return self.__tokdict.keys()
355
356	def pop( self, index=-1 ):
357	"""Removes and returns item at specified index (default=last).
358	Will work with either numeric indices or dict-key indicies."""
359	ret = self[index]
360	del self[index]
361	return ret
362
363	def get(self, key, defaultValue=None):
364	"""Returns named result matching the given key, or if there is no
365	such name, then returns the given defaultValue or None if no
366	defaultValue is specified."""
367	if key in self:
368	return self[key]
369	else:
370	return defaultValue
371
372	def insert( self, index, insStr ):
373	self.__toklist.insert(index, insStr)
374	# fixup indices in token dictionary
375	for name in self.__tokdict:
376	occurrences = self.__tokdict[name]
377	for k, (value, position) in enumerate(occurrences):
378	occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
379
380	def items( self ):
381	"""Returns all named result keys and values as a list of tuples."""
382	return [(k,self[k]) for k in self.__tokdict]
383
384	def values( self ):
385	"""Returns all named result values."""
386	return [ v[-1][0] for v in self.__tokdict.values() ]
387
388	def __getattr__( self, name ):
389	if name not in self.__slots__:
390	if name in self.__tokdict:
391	if name not in self.__accumNames:
392	return self.__tokdict[name][-1][0]
393	else:
394	return ParseResults([ v[0] for v in self.__tokdict[name] ])
395	else:
396	return ""
397	return None
398
399	def __add__( self, other ):
400	ret = self.copy()
401	ret += other
402	return ret
403
404	def __iadd__( self, other ):
405	if other.__tokdict:
406	offset = len(self.__toklist)
407	addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
408	otheritems = other.__tokdict.items()
409	otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
410	for (k,vlist) in otheritems for v in vlist]
411	for k,v in otherdictitems:
412	self[k] = v
413	if isinstance(v[0],ParseResults):
414	v[0].__parent = wkref(self)
415	self.__toklist += other.__toklist
416	self.__accumNames.update( other.__accumNames )
417	del other
418	return self
419
420	def __repr__( self ):
421	return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
422
423	def __str__( self ):
424	out = "["
425	sep = ""
426	for i in self.__toklist:
427	if isinstance(i, ParseResults):
428	out += sep + _ustr(i)
429	else:
430	out += sep + repr(i)
431	sep = ", "
432	out += "]"
433	return out
434
435	def _asStringList( self, sep='' ):
436	out = []
437	for item in self.__toklist:
438	if out and sep:
439	out.append(sep)
440	if isinstance( item, ParseResults ):
441	out += item._asStringList()
442	else:
443	out.append( _ustr(item) )
444	return out
445
446	def asList( self ):
447	"""Returns the parse results as a nested list of matching tokens, all converted to strings."""
448	out = []
449	for res in self.__toklist:
450	if isinstance(res,ParseResults):
451	out.append( res.asList() )
452	else:
453	out.append( res )
454	return out
455
456	def asDict( self ):
457	"""Returns the named parse results as dictionary."""
458	return dict( self.items() )
459
460	def copy( self ):
461	"""Returns a new copy of a ParseResults object."""
462	ret = ParseResults( self.__toklist )
463	ret.__tokdict = self.__tokdict.copy()
464	ret.__parent = self.__parent
465	ret.__accumNames.update( self.__accumNames )
466	ret.__name = self.__name
467	return ret
468
469	def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
470	"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
471	nl = "\n"
472	out = []
473	namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
474	for v in vlist ] )
475	nextLevelIndent = indent + " "
476
477	# collapse out indents if formatting is not desired
478	if not formatted:
479	indent = ""
480	nextLevelIndent = ""
481	nl = ""
482
483	selfTag = None
484	if doctag is not None:
485	selfTag = doctag
486	else:
487	if self.__name:
488	selfTag = self.__name
489
490	if not selfTag:
491	if namedItemsOnly:
492	return ""
493	else:
494	selfTag = "ITEM"
495
496	out += [ nl, indent, "<", selfTag, ">" ]
497
498	worklist = self.__toklist
499	for i,res in enumerate(worklist):
500	if isinstance(res,ParseResults):
501	if i in namedItems:
502	out += [ res.asXML(namedItems[i],
503	namedItemsOnly and doctag is None,
504	nextLevelIndent,
505	formatted)]
506	else:
507	out += [ res.asXML(None,
508	namedItemsOnly and doctag is None,
509	nextLevelIndent,
510	formatted)]
511	else:
512	# individual token, see if there is a name for it
513	resTag = None
514	if i in namedItems:
515	resTag = namedItems[i]
516	if not resTag:
517	if namedItemsOnly:
518	continue
519	else:
520	resTag = "ITEM"
521	xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
522	out += [ nl, nextLevelIndent, "<", resTag, ">",
523	xmlBodyText,
524	"</", resTag, ">" ]
525
526	out += [ nl, indent, "</", selfTag, ">" ]
527	return "".join(out)
528
529	def __lookup(self,sub):
530	for k,vlist in self.__tokdict.items():
531	for v,loc in vlist:
532	if sub is v:
533	return k
534	return None
535
536	def getName(self):
537	"""Returns the results name for this token expression."""
538	if self.__name:
539	return self.__name
540	elif self.__parent:
541	par = self.__parent()
542	if par:
543	return par.__lookup(self)
544	else:
545	return None
546	elif (len(self) == 1 and
547	len(self.__tokdict) == 1 and
548	self.__tokdict.values()[0][0][1] in (0,-1)):
549	return self.__tokdict.keys()[0]
550	else:
551	return None
552
553	def dump(self,indent='',depth=0):
554	"""Diagnostic method for listing out the contents of a ParseResults.
555	Accepts an optional indent argument so that this string can be embedded
556	in a nested display of other data."""
557	out = []
558	out.append( indent+_ustr(self.asList()) )
559	keys = self.items()
560	keys.sort()
561	for k,v in keys:
562	if out:
563	out.append('\n')
564	out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
565	if isinstance(v,ParseResults):
566	if v.keys():
567	#~ out.append('\n')
568	out.append( v.dump(indent,depth+1) )
569	#~ out.append('\n')
570	else:
571	out.append(_ustr(v))
572	else:
573	out.append(_ustr(v))
574	#~ out.append('\n')
575	return "".join(out)
576
577	# add support for pickle protocol
578	def __getstate__(self):
579	return ( self.__toklist,
580	( self.__tokdict.copy(),
581	self.__parent is not None and self.__parent() or None,
582	self.__accumNames,
583	self.__name ) )
584
585	def __setstate__(self,state):
586	self.__toklist = state[0]
587	self.__tokdict, \
588	par, \
589	inAccumNames, \
590	self.__name = state[1]
591	self.__accumNames = {}
592	self.__accumNames.update(inAccumNames)
593	if par is not None:
594	self.__parent = wkref(par)
595	else:
596	self.__parent = None
597
598
599	def col (loc,strg):
600	"""Returns current column within a string, counting newlines as line separators.
601	The first column is number 1.
602
603	Note: the default parsing behavior is to expand tabs in the input string
604	before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
605	on parsing strings containing <TAB>s, and suggested methods to maintain a
606	consistent view of the parsed string, the parse location, and line and column
607	positions within the parsed string.
608	"""
609	return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
610
611	def lineno(loc,strg):
612	"""Returns current line number within a string, counting newlines as line separators.
613	The first line is number 1.
614
615	Note: the default parsing behavior is to expand tabs in the input string
616	before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
617	on parsing strings containing <TAB>s, and suggested methods to maintain a
618	consistent view of the parsed string, the parse location, and line and column
619	positions within the parsed string.
620	"""
621	return strg.count("\n",0,loc) + 1
622
623	def line( loc, strg ):
624	"""Returns the line of text containing loc within a string, counting newlines as line separators.
625	"""
626	lastCR = strg.rfind("\n", 0, loc)
627	nextCR = strg.find("\n", loc)
628	if nextCR > 0:
629	return strg[lastCR+1:nextCR]
630	else:
631	return strg[lastCR+1:]
632
633	def _defaultStartDebugAction( instring, loc, expr ):
634	print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
635
636	def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
637	print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
638
639	def _defaultExceptionDebugAction( instring, loc, expr, exc ):
640	print ("Exception raised:" + _ustr(exc))
641
642	def nullDebugAction(*args):
643	"""'Do-nothing' debug action, to suppress debugging output during parsing."""
644	pass
645
646	class ParserElement(object):
647	"""Abstract base level parser element class."""
648	DEFAULT_WHITE_CHARS = " \n\t\r"
649
650	def setDefaultWhitespaceChars( chars ):
651	"""Overrides the default whitespace chars
652	"""
653	ParserElement.DEFAULT_WHITE_CHARS = chars
654	setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
655
656	def __init__( self, savelist=False ):
657	self.parseAction = list()
658	self.failAction = None
659	#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
660	self.strRepr = None
661	self.resultsName = None
662	self.saveAsList = savelist
663	self.skipWhitespace = True
664	self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
665	self.copyDefaultWhiteChars = True
666	self.mayReturnEmpty = False # used when checking for left-recursion
667	self.keepTabs = False
668	self.ignoreExprs = list()
669	self.debug = False
670	self.streamlined = False
671	self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
672	self.errmsg = ""
673	self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
674	self.debugActions = ( None, None, None ) #custom debug actions
675	self.re = None
676	self.callPreparse = True # used to avoid redundant calls to preParse
677	self.callDuringTry = False
678
679	def copy( self ):
680	"""Make a copy of this ParserElement. Useful for defining different parse actions
681	for the same parsing pattern, using copies of the original parse element."""
682	cpy = copy.copy( self )
683	cpy.parseAction = self.parseAction[:]
684	cpy.ignoreExprs = self.ignoreExprs[:]
685	if self.copyDefaultWhiteChars:
686	cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
687	return cpy
688
689	def setName( self, name ):
690	"""Define name for this expression, for use in debugging."""
691	self.name = name
692	self.errmsg = "Expected " + self.name
693	if hasattr(self,"exception"):
694	self.exception.msg = self.errmsg
695	return self
696
697	def setResultsName( self, name, listAllMatches=False ):
698	"""Define name for referencing matching tokens as a nested attribute
699	of the returned parse results.
700	NOTE: this returns a copy of the original ParserElement object;
701	this is so that the client can define a basic element, such as an
702	integer, and reference it in multiple places with different names.
703	"""
704	newself = self.copy()
705	newself.resultsName = name
706	newself.modalResults = not listAllMatches
707	return newself
708
709	def setBreak(self,breakFlag = True):
710	"""Method to invoke the Python pdb debugger when this element is
711	about to be parsed. Set breakFlag to True to enable, False to
712	disable.
713	"""
714	if breakFlag:
715	_parseMethod = self._parse
716	def breaker(instring, loc, doActions=True, callPreParse=True):
717	import pdb
718	pdb.set_trace()
719	_parseMethod( instring, loc, doActions, callPreParse )
720	breaker._originalParseMethod = _parseMethod
721	self._parse = breaker
722	else:
723	if hasattr(self._parse,"_originalParseMethod"):
724	self._parse = self._parse._originalParseMethod
725	return self
726
727	def _normalizeParseActionArgs( f ):
728	"""Internal method used to decorate parse actions that take fewer than 3 arguments,
729	so that all parse actions can be called as f(s,l,t)."""
730	STAR_ARGS = 4
731
732	try:
733	restore = None
734	if isinstance(f,type):
735	restore = f
736	f = f.__init__
737	if not _PY3K:
738	codeObj = f.func_code
739	else:
740	codeObj = f.code
741	if codeObj.co_flags & STAR_ARGS:
742	return f
743	numargs = codeObj.co_argcount
744	if not _PY3K:
745	if hasattr(f,"im_self"):
746	numargs -= 1
747	else:
748	if hasattr(f,"__self__"):
749	numargs -= 1
750	if restore:
751	f = restore
752	except AttributeError:
753	try:
754	if not _PY3K:
755	call_im_func_code = f.__call__.im_func.func_code
756	else:
757	call_im_func_code = f.__code__
758
759	# not a function, must be a callable object, get info from the
760	# im_func binding of its bound __call__ method
761	if call_im_func_code.co_flags & STAR_ARGS:
762	return f
763	numargs = call_im_func_code.co_argcount
764	if not _PY3K:
765	if hasattr(f.__call__,"im_self"):
766	numargs -= 1
767	else:
768	if hasattr(f.__call__,"__self__"):
769	numargs -= 0
770	except AttributeError:
771	if not _PY3K:
772	call_func_code = f.__call__.func_code
773	else:
774	call_func_code = f.__call__.__code__
775	# not a bound method, get info directly from __call__ method
776	if call_func_code.co_flags & STAR_ARGS:
777	return f
778	numargs = call_func_code.co_argcount
779	if not _PY3K:
780	if hasattr(f.__call__,"im_self"):
781	numargs -= 1
782	else:
783	if hasattr(f.__call__,"__self__"):
784	numargs -= 1
785
786
787	#~ print ("adding function %s with %d args" % (f.func_name,numargs))
788	if numargs == 3:
789	return f
790	else:
791	if numargs > 3:
792	def tmp(s,l,t):
793	return f(f.__call__.__self__, s,l,t)
794	if numargs == 2:
795	def tmp(s,l,t):
796	return f(l,t)
797	elif numargs == 1:
798	def tmp(s,l,t):
799	return f(t)
800	else: #~ numargs == 0:
801	def tmp(s,l,t):
802	return f()
803	try:
804	tmp.__name__ = f.__name__
805	except (AttributeError,TypeError):
806	# no need for special handling if attribute doesnt exist
807	pass
808	try:
809	tmp.__doc__ = f.__doc__
810	except (AttributeError,TypeError):
811	# no need for special handling if attribute doesnt exist
812	pass
813	try:
814	tmp.__dict__.update(f.__dict__)
815	except (AttributeError,TypeError):
816	# no need for special handling if attribute doesnt exist
817	pass
818	return tmp
819	_normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
820
821	def setParseAction( self, fns, *kwargs ):
822	"""Define action to perform when successfully matching parse element definition.
823	Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
824	fn(loc,toks), fn(toks), or just fn(), where:
825	- s = the original string being parsed (see note below)
826	- loc = the location of the matching substring
827	- toks = a list of the matched tokens, packaged as a ParseResults object
828	If the functions in fns modify the tokens, they can return them as the return
829	value from fn, and the modified list of tokens will replace the original.
830	Otherwise, fn does not need to return any value.
831
832	Note: the default parsing behavior is to expand tabs in the input string
833	before starting the parsing process. See L{I{parseString}<parseString>} for more information
834	on parsing strings containing <TAB>s, and suggested methods to maintain a
835	consistent view of the parsed string, the parse location, and line and column
836	positions within the parsed string.
837	"""
838	self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
839	self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
840	return self
841
842	def addParseAction( self, fns, *kwargs ):
843	"""Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
844	self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
845	self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
846	return self
847
848	def setFailAction( self, fn ):
849	"""Define action to perform if parsing fails at this expression.
850	Fail acton fn is a callable function that takes the arguments
851	fn(s,loc,expr,err) where:
852	- s = string being parsed
853	- loc = location where expression match was attempted and failed
854	- expr = the parse expression that failed
855	- err = the exception thrown
856	The function returns no value. It may throw ParseFatalException
857	if it is desired to stop parsing immediately."""
858	self.failAction = fn
859	return self
860
861	def _skipIgnorables( self, instring, loc ):
862	exprsFound = True
863	while exprsFound:
864	exprsFound = False
865	for e in self.ignoreExprs:
866	try:
867	while 1:
868	loc,dummy = e._parse( instring, loc )
869	exprsFound = True
870	except ParseException:
871	pass
872	return loc
873
874	def preParse( self, instring, loc ):
875	if self.ignoreExprs:
876	loc = self._skipIgnorables( instring, loc )
877
878	if self.skipWhitespace:
879	wt = self.whiteChars
880	instrlen = len(instring)
881	while loc < instrlen and instring[loc] in wt:
882	loc += 1
883
884	return loc
885
886	def parseImpl( self, instring, loc, doActions=True ):
887	return loc, []
888
889	def postParse( self, instring, loc, tokenlist ):
890	return tokenlist
891
892	#~ @profile
893	def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
894	debugging = ( self.debug ) #and doActions )
895
896	if debugging or self.failAction:
897	#~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
898	if (self.debugActions[0] ):
899	self.debugActions[0]( instring, loc, self )
900	if callPreParse and self.callPreparse:
901	preloc = self.preParse( instring, loc )
902	else:
903	preloc = loc
904	tokensStart = loc
905	try:
906	try:
907	loc,tokens = self.parseImpl( instring, preloc, doActions )
908	except IndexError:
909	raise ParseException( instring, len(instring), self.errmsg, self )
910	except ParseBaseException, err:
911	#~ print ("Exception raised:", err)
912	if self.debugActions[2]:
913	self.debugActions[2]( instring, tokensStart, self, err )
914	if self.failAction:
915	self.failAction( instring, tokensStart, self, err )
916	raise
917	else:
918	if callPreParse and self.callPreparse:
919	preloc = self.preParse( instring, loc )
920	else:
921	preloc = loc
922	tokensStart = loc
923	if self.mayIndexError or loc >= len(instring):
924	try:
925	loc,tokens = self.parseImpl( instring, preloc, doActions )
926	except IndexError:
927	raise ParseException( instring, len(instring), self.errmsg, self )
928	else:
929	loc,tokens = self.parseImpl( instring, preloc, doActions )
930
931	tokens = self.postParse( instring, loc, tokens )
932
933	retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
934	if self.parseAction and (doActions or self.callDuringTry):
935	if debugging:
936	try:
937	for fn in self.parseAction:
938	tokens = fn( instring, tokensStart, retTokens )
939	if tokens is not None:
940	retTokens = ParseResults( tokens,
941	self.resultsName,
942	asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
943	modal=self.modalResults )
944	except ParseBaseException, err:
945	#~ print "Exception raised in user parse action:", err
946	if (self.debugActions[2] ):
947	self.debugActions[2]( instring, tokensStart, self, err )
948	raise
949	else:
950	for fn in self.parseAction:
951	tokens = fn( instring, tokensStart, retTokens )
952	if tokens is not None:
953	retTokens = ParseResults( tokens,
954	self.resultsName,
955	asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
956	modal=self.modalResults )
957
958	if debugging:
959	#~ print ("Matched",self,"->",retTokens.asList())
960	if (self.debugActions[1] ):
961	self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
962
963	return loc, retTokens
964
965	def tryParse( self, instring, loc ):
966	try:
967	return self._parse( instring, loc, doActions=False )[0]
968	except ParseFatalException:
969	raise ParseException( instring, loc, self.errmsg, self)
970
971	# this method gets repeatedly called during backtracking with the same arguments -
972	# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
973	def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
974	lookup = (self,instring,loc,callPreParse,doActions)
975	if lookup in ParserElement._exprArgCache:
976	value = ParserElement._exprArgCache[ lookup ]
977	if isinstance(value,Exception):
978	raise value
979	return value
980	else:
981	try:
982	value = self._parseNoCache( instring, loc, doActions, callPreParse )
983	ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
984	return value
985	except ParseBaseException, pe:
986	ParserElement._exprArgCache[ lookup ] = pe
987	raise
988
989	_parse = _parseNoCache
990
991	# argument cache for optimizing repeated calls when backtracking through recursive expressions
992	_exprArgCache = {}
993	def resetCache():
994	ParserElement._exprArgCache.clear()
995	resetCache = staticmethod(resetCache)
996
997	_packratEnabled = False
998	def enablePackrat():
999	"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
1000	Repeated parse attempts at the same string location (which happens
1001	often in many complex grammars) can immediately return a cached value,
1002	instead of re-executing parsing/validating code. Memoizing is done of
1003	both valid results and parsing exceptions.
1004
1005	This speedup may break existing programs that use parse actions that
1006	have side-effects. For this reason, packrat parsing is disabled when
1007	you first import pyparsing. To activate the packrat feature, your
1008	program must call the class method ParserElement.enablePackrat(). If
1009	your program uses psyco to "compile as you go", you must call
1010	enablePackrat before calling psyco.full(). If you do not do this,
1011	Python will crash. For best results, call enablePackrat() immediately
1012	after importing pyparsing.
1013	"""
1014	if not ParserElement._packratEnabled:
1015	ParserElement._packratEnabled = True
1016	ParserElement._parse = ParserElement._parseCache
1017	enablePackrat = staticmethod(enablePackrat)
1018
1019	def parseString( self, instring, parseAll=False ):
1020	"""Execute the parse expression with the given string.
1021	This is the main interface to the client code, once the complete
1022	expression has been built.
1023
1024	If you want the grammar to require that the entire input string be
1025	successfully parsed, then set parseAll to True (equivalent to ending
1026	the grammar with StringEnd()).
1027
1028	Note: parseString implicitly calls expandtabs() on the input string,
1029	in order to report proper column numbers in parse actions.
1030	If the input string contains tabs and
1031	the grammar uses parse actions that use the loc argument to index into the
1032	string being parsed, you can ensure you have a consistent view of the input
1033	string by:
1034	- calling parseWithTabs on your grammar before calling parseString
1035	(see L{I{parseWithTabs}<parseWithTabs>})
1036	- define your parse action using the full (s,loc,toks) signature, and
1037	reference the input string using the parse action's s argument
1038	- explictly expand the tabs in your input string before calling
1039	parseString
1040	"""
1041	ParserElement.resetCache()
1042	if not self.streamlined:
1043	self.streamline()
1044	#~ self.saveAsList = True
1045	for e in self.ignoreExprs:
1046	e.streamline()
1047	if not self.keepTabs:
1048	instring = instring.expandtabs()
1049	loc, tokens = self._parse( instring, 0 )
1050	if parseAll:
1051	StringEnd()._parse( instring, loc )
1052	return tokens
1053
1054	def scanString( self, instring, maxMatches=_MAX_INT ):
1055	"""Scan the input string for expression matches. Each match will return the
1056	matching tokens, start location, and end location. May be called with optional
1057	maxMatches argument, to clip scanning after 'n' matches are found.
1058
1059	Note that the start and end locations are reported relative to the string
1060	being parsed. See L{I{parseString}<parseString>} for more information on parsing
1061	strings with embedded tabs."""
1062	if not self.streamlined:
1063	self.streamline()
1064	for e in self.ignoreExprs:
1065	e.streamline()
1066
1067	if not self.keepTabs:
1068	instring = _ustr(instring).expandtabs()
1069	instrlen = len(instring)
1070	loc = 0
1071	preparseFn = self.preParse
1072	parseFn = self._parse
1073	ParserElement.resetCache()
1074	matches = 0
1075	while loc <= instrlen and matches < maxMatches:
1076	try:
1077	preloc = preparseFn( instring, loc )
1078	nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1079	except ParseException:
1080	loc = preloc+1
1081	else:
1082	matches += 1
1083	yield tokens, preloc, nextLoc
1084	loc = nextLoc
1085
1086	def transformString( self, instring ):
1087	"""Extension to scanString, to modify matching text with modified tokens that may
1088	be returned from a parse action. To use transformString, define a grammar and
1089	attach a parse action to it that modifies the returned token list.
1090	Invoking transformString() on a target string will then scan for matches,
1091	and replace the matched text patterns according to the logic in the parse
1092	action. transformString() returns the resulting transformed string."""
1093	out = []
1094	lastE = 0
1095	# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1096	# keep string locs straight between transformString and scanString
1097	self.keepTabs = True
1098	for t,s,e in self.scanString( instring ):
1099	out.append( instring[lastE:s] )
1100	if t:
1101	if isinstance(t,ParseResults):
1102	out += t.asList()
1103	elif isinstance(t,list):
1104	out += t
1105	else:
1106	out.append(t)
1107	lastE = e
1108	out.append(instring[lastE:])
1109	return "".join(map(_ustr,out))
1110
1111	def searchString( self, instring, maxMatches=_MAX_INT ):
1112	"""Another extension to scanString, simplifying the access to the tokens found
1113	to match the given parse expression. May be called with optional
1114	maxMatches argument, to clip searching after 'n' matches are found.
1115	"""
1116	return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1117
1118	def __add__(self, other ):
1119	"""Implementation of + operator - returns And"""
1120	if isinstance( other, basestring ):
1121	other = Literal( other )
1122	if not isinstance( other, ParserElement ):
1123	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1124	SyntaxWarning, stacklevel=2)
1125	return None
1126	return And( [ self, other ] )
1127
1128	def __radd__(self, other ):
1129	"""Implementation of + operator when left operand is not a ParserElement"""
1130	if isinstance( other, basestring ):
1131	other = Literal( other )
1132	if not isinstance( other, ParserElement ):
1133	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1134	SyntaxWarning, stacklevel=2)
1135	return None
1136	return other + self
1137
1138	def __sub__(self, other):
1139	"""Implementation of - operator, returns And with error stop"""
1140	if isinstance( other, basestring ):
1141	other = Literal( other )
1142	if not isinstance( other, ParserElement ):
1143	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1144	SyntaxWarning, stacklevel=2)
1145	return None
1146	return And( [ self, And._ErrorStop(), other ] )
1147
1148	def __rsub__(self, other ):
1149	"""Implementation of - operator when left operand is not a ParserElement"""
1150	if isinstance( other, basestring ):
1151	other = Literal( other )
1152	if not isinstance( other, ParserElement ):
1153	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1154	SyntaxWarning, stacklevel=2)
1155	return None
1156	return other - self
1157
1158	def __mul__(self,other):
1159	if isinstance(other,int):
1160	minElements, optElements = other,0
1161	elif isinstance(other,tuple):
1162	if len(other)==0:
1163	other = (None,None)
1164	elif len(other)==1:
1165	other = (other[0],None)
1166	if len(other)==2:
1167	if other[0] is None:
1168	other = (0, other[1])
1169	if isinstance(other[0],int) and other[1] is None:
1170	if other[0] == 0:
1171	return ZeroOrMore(self)
1172	if other[0] == 1:
1173	return OneOrMore(self)
1174	else:
1175	return self*other[0] + ZeroOrMore(self)
1176	elif isinstance(other[0],int) and isinstance(other[1],int):
1177	minElements, optElements = other
1178	optElements -= minElements
1179	else:
1180	raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1181	else:
1182	raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
1183	else:
1184	raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1185
1186	if minElements < 0:
1187	raise ValueError("cannot multiply ParserElement by negative value")
1188	if optElements < 0:
1189	raise ValueError("second tuple value must be greater or equal to first tuple value")
1190	if minElements == optElements == 0:
1191	raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1192
1193	if (optElements):
1194	def makeOptionalList(n):
1195	if n>1:
1196	return Optional(self + makeOptionalList(n-1))
1197	else:
1198	return Optional(self)
1199	if minElements:
1200	if minElements == 1:
1201	ret = self + makeOptionalList(optElements)
1202	else:
1203	ret = And([self]*minElements) + makeOptionalList(optElements)
1204	else:
1205	ret = makeOptionalList(optElements)
1206	else:
1207	if minElements == 1:
1208	ret = self
1209	else:
1210	ret = And([self]*minElements)
1211	return ret
1212
1213	def __rmul__(self, other):
1214	return self.__mul__(other)
1215
1216	def __or__(self, other ):
1217	"""Implementation of \| operator - returns MatchFirst"""
1218	if isinstance( other, basestring ):
1219	other = Literal( other )
1220	if not isinstance( other, ParserElement ):
1221	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1222	SyntaxWarning, stacklevel=2)
1223	return None
1224	return MatchFirst( [ self, other ] )
1225
1226	def __ror__(self, other ):
1227	"""Implementation of \| operator when left operand is not a ParserElement"""
1228	if isinstance( other, basestring ):
1229	other = Literal( other )
1230	if not isinstance( other, ParserElement ):
1231	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1232	SyntaxWarning, stacklevel=2)
1233	return None
1234	return other \| self
1235
1236	def __xor__(self, other ):
1237	"""Implementation of ^ operator - returns Or"""
1238	if isinstance( other, basestring ):
1239	other = Literal( other )
1240	if not isinstance( other, ParserElement ):
1241	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1242	SyntaxWarning, stacklevel=2)
1243	return None
1244	return Or( [ self, other ] )
1245
1246	def __rxor__(self, other ):
1247	"""Implementation of ^ operator when left operand is not a ParserElement"""
1248	if isinstance( other, basestring ):
1249	other = Literal( other )
1250	if not isinstance( other, ParserElement ):
1251	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1252	SyntaxWarning, stacklevel=2)
1253	return None
1254	return other ^ self
1255
1256	def __and__(self, other ):
1257	"""Implementation of & operator - returns Each"""
1258	if isinstance( other, basestring ):
1259	other = Literal( other )
1260	if not isinstance( other, ParserElement ):
1261	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1262	SyntaxWarning, stacklevel=2)
1263	return None
1264	return Each( [ self, other ] )
1265
1266	def __rand__(self, other ):
1267	"""Implementation of & operator when left operand is not a ParserElement"""
1268	if isinstance( other, basestring ):
1269	other = Literal( other )
1270	if not isinstance( other, ParserElement ):
1271	warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1272	SyntaxWarning, stacklevel=2)
1273	return None
1274	return other & self
1275
1276	def __invert__( self ):
1277	"""Implementation of ~ operator - returns NotAny"""
1278	return NotAny( self )
1279
1280	def __call__(self, name):
1281	"""Shortcut for setResultsName, with listAllMatches=default::
1282	userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1283	could be written as::
1284	userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1285	"""
1286	return self.setResultsName(name)
1287
1288	def suppress( self ):
1289	"""Suppresses the output of this ParserElement; useful to keep punctuation from
1290	cluttering up returned output.
1291	"""
1292	return Suppress( self )
1293
1294	def leaveWhitespace( self ):
1295	"""Disables the skipping of whitespace before matching the characters in the
1296	ParserElement's defined pattern. This is normally only used internally by
1297	the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1298	"""
1299	self.skipWhitespace = False
1300	return self
1301
1302	def setWhitespaceChars( self, chars ):
1303	"""Overrides the default whitespace chars
1304	"""
1305	self.skipWhitespace = True
1306	self.whiteChars = chars
1307	self.copyDefaultWhiteChars = False
1308	return self
1309
1310	def parseWithTabs( self ):
1311	"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1312	Must be called before parseString when the input grammar contains elements that
1313	match <TAB> characters."""
1314	self.keepTabs = True
1315	return self
1316
1317	def ignore( self, other ):
1318	"""Define expression to be ignored (e.g., comments) while doing pattern
1319	matching; may be called repeatedly, to define multiple comment or other
1320	ignorable patterns.
1321	"""
1322	if isinstance( other, Suppress ):
1323	if other not in self.ignoreExprs:
1324	self.ignoreExprs.append( other )
1325	else:
1326	self.ignoreExprs.append( Suppress( other ) )
1327	return self
1328
1329	def setDebugActions( self, startAction, successAction, exceptionAction ):
1330	"""Enable display of debugging messages while doing pattern matching."""
1331	self.debugActions = (startAction or _defaultStartDebugAction,
1332	successAction or _defaultSuccessDebugAction,
1333	exceptionAction or _defaultExceptionDebugAction)
1334	self.debug = True
1335	return self
1336
1337	def setDebug( self, flag=True ):
1338	"""Enable display of debugging messages while doing pattern matching.
1339	Set flag to True to enable, False to disable."""
1340	if flag:
1341	self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1342	else:
1343	self.debug = False
1344	return self
1345
1346	def __str__( self ):
1347	return self.name
1348
1349	def __repr__( self ):
1350	return _ustr(self)
1351
1352	def streamline( self ):
1353	self.streamlined = True
1354	self.strRepr = None
1355	return self
1356
1357	def checkRecursion( self, parseElementList ):
1358	pass
1359
1360	def validate( self, validateTrace=[] ):
1361	"""Check defined expressions for valid structure, check for infinite recursive definitions."""
1362	self.checkRecursion( [] )
1363
1364	def parseFile( self, file_or_filename ):
1365	"""Execute the parse expression on the given file or filename.
1366	If a filename is specified (instead of a file object),
1367	the entire file is opened, read, and closed before parsing.
1368	"""
1369	try:
1370	file_contents = file_or_filename.read()
1371	except AttributeError:
1372	f = open(file_or_filename, "rb")
1373	file_contents = f.read()
1374	f.close()
1375	return self.parseString(file_contents)
1376
1377	def getException(self):
1378	return ParseException("",0,self.errmsg,self)
1379
1380	def __getattr__(self,aname):
1381	if aname == "myException":
1382	self.myException = ret = self.getException();
1383	return ret;
1384	else:
1385	raise AttributeError("no such attribute " + aname)
1386
1387	def __eq__(self,other):
1388	if isinstance(other, basestring):
1389	try:
1390	(self + StringEnd()).parseString(_ustr(other))
1391	return True
1392	except ParseBaseException:
1393	return False
1394	else:
1395	return super(ParserElement,self)==other
1396
1397	def __hash__(self):
1398	return hash(id(self))
1399
1400	def __req__(self,other):
1401	return self == other
1402
1403
1404	class Token(ParserElement):
1405	"""Abstract ParserElement subclass, for defining atomic matching patterns."""
1406	def __init__( self ):
1407	super(Token,self).__init__( savelist=False )
1408	#self.myException = ParseException("",0,"",self)
1409
1410	def setName(self, name):
1411	s = super(Token,self).setName(name)
1412	self.errmsg = "Expected " + self.name
1413	#s.myException.msg = self.errmsg
1414	return s
1415
1416
1417	class Empty(Token):
1418	"""An empty token, will always match."""
1419	def __init__( self ):
1420	super(Empty,self).__init__()
1421	self.name = "Empty"
1422	self.mayReturnEmpty = True
1423	self.mayIndexError = False
1424
1425
1426	class NoMatch(Token):
1427	"""A token that will never match."""
1428	def __init__( self ):
1429	super(NoMatch,self).__init__()
1430	self.name = "NoMatch"
1431	self.mayReturnEmpty = True
1432	self.mayIndexError = False
1433	self.errmsg = "Unmatchable token"
1434	#self.myException.msg = self.errmsg
1435
1436	def parseImpl( self, instring, loc, doActions=True ):
1437	exc = self.myException
1438	exc.loc = loc
1439	exc.pstr = instring
1440	raise exc
1441
1442
1443	class Literal(Token):
1444	"""Token to exactly match a specified string."""
1445	def __init__( self, matchString ):
1446	super(Literal,self).__init__()
1447	self.match = matchString
1448	self.matchLen = len(matchString)
1449	try:
1450	self.firstMatchChar = matchString[0]
1451	except IndexError:
1452	warnings.warn("null string passed to Literal; use Empty() instead",
1453	SyntaxWarning, stacklevel=2)
1454	self.__class__ = Empty
1455	self.name = '"%s"' % _ustr(self.match)
1456	self.errmsg = "Expected " + self.name
1457	self.mayReturnEmpty = False
1458	#self.myException.msg = self.errmsg
1459	self.mayIndexError = False
1460
1461	# Performance tuning: this routine gets called a lot
1462	# if this is a single character match string and the first character matches,
1463	# short-circuit as quickly as possible, and avoid calling startswith
1464	#~ @profile
1465	def parseImpl( self, instring, loc, doActions=True ):
1466	if (instring[loc] == self.firstMatchChar and
1467	(self.matchLen==1 or instring.startswith(self.match,loc)) ):
1468	return loc+self.matchLen, self.match
1469	#~ raise ParseException( instring, loc, self.errmsg )
1470	exc = self.myException
1471	exc.loc = loc
1472	exc.pstr = instring
1473	raise exc
1474	_L = Literal
1475
1476	class Keyword(Token):
1477	"""Token to exactly match a specified string as a keyword, that is, it must be
1478	immediately followed by a non-keyword character. Compare with Literal::
1479	Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1480	Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1481	Accepts two optional constructor arguments in addition to the keyword string:
1482	identChars is a string of characters that would be valid identifier characters,
1483	defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1484	matching, default is False.
1485	"""
1486	DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1487
1488	def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1489	super(Keyword,self).__init__()
1490	self.match = matchString
1491	self.matchLen = len(matchString)
1492	try:
1493	self.firstMatchChar = matchString[0]
1494	except IndexError:
1495	warnings.warn("null string passed to Keyword; use Empty() instead",
1496	SyntaxWarning, stacklevel=2)
1497	self.name = '"%s"' % self.match
1498	self.errmsg = "Expected " + self.name
1499	self.mayReturnEmpty = False
1500	#self.myException.msg = self.errmsg
1501	self.mayIndexError = False
1502	self.caseless = caseless
1503	if caseless:
1504	self.caselessmatch = matchString.upper()
1505	identChars = identChars.upper()
1506	self.identChars = _str2dict(identChars)
1507
1508	def parseImpl( self, instring, loc, doActions=True ):
1509	if self.caseless:
1510	if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1511	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1512	(loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1513	return loc+self.matchLen, self.match
1514	else:
1515	if (instring[loc] == self.firstMatchChar and
1516	(self.matchLen==1 or instring.startswith(self.match,loc)) and
1517	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1518	(loc == 0 or instring[loc-1] not in self.identChars) ):
1519	return loc+self.matchLen, self.match
1520	#~ raise ParseException( instring, loc, self.errmsg )
1521	exc = self.myException
1522	exc.loc = loc
1523	exc.pstr = instring
1524	raise exc
1525
1526	def copy(self):
1527	c = super(Keyword,self).copy()
1528	c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1529	return c
1530
1531	def setDefaultKeywordChars( chars ):
1532	"""Overrides the default Keyword chars
1533	"""
1534	Keyword.DEFAULT_KEYWORD_CHARS = chars
1535	setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1536
1537
1538	class CaselessLiteral(Literal):
1539	"""Token to match a specified string, ignoring case of letters.
1540	Note: the matched results will always be in the case of the given
1541	match string, NOT the case of the input text.
1542	"""
1543	def __init__( self, matchString ):
1544	super(CaselessLiteral,self).__init__( matchString.upper() )
1545	# Preserve the defining literal.
1546	self.returnString = matchString
1547	self.name = "'%s'" % self.returnString
1548	self.errmsg = "Expected " + self.name
1549	#self.myException.msg = self.errmsg
1550
1551	def parseImpl( self, instring, loc, doActions=True ):
1552	if instring[ loc:loc+self.matchLen ].upper() == self.match:
1553	return loc+self.matchLen, self.returnString
1554	#~ raise ParseException( instring, loc, self.errmsg )
1555	exc = self.myException
1556	exc.loc = loc
1557	exc.pstr = instring
1558	raise exc
1559
1560	class CaselessKeyword(Keyword):
1561	def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1562	super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1563
1564	def parseImpl( self, instring, loc, doActions=True ):
1565	if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1566	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1567	return loc+self.matchLen, self.match
1568	#~ raise ParseException( instring, loc, self.errmsg )
1569	exc = self.myException
1570	exc.loc = loc
1571	exc.pstr = instring
1572	raise exc
1573
1574	class Word(Token):
1575	"""Token for matching words composed of allowed character sets.
1576	Defined with string containing all allowed initial characters,
1577	an optional string containing allowed body characters (if omitted,
1578	defaults to the initial character set), and an optional minimum,
1579	maximum, and/or exact length. The default value for min is 1 (a
1580	minimum value < 1 is not valid); the default values for max and exact
1581	are 0, meaning no maximum or exact length restriction.
1582	"""
1583	def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1584	super(Word,self).__init__()
1585	self.initCharsOrig = initChars
1586	self.initChars = _str2dict(initChars)
1587	if bodyChars :
1588	self.bodyCharsOrig = bodyChars
1589	self.bodyChars = _str2dict(bodyChars)
1590	else:
1591	self.bodyCharsOrig = initChars
1592	self.bodyChars = _str2dict(initChars)
1593
1594	self.maxSpecified = max > 0
1595
1596	if min < 1:
1597	raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1598
1599	self.minLen = min
1600
1601	if max > 0:
1602	self.maxLen = max
1603	else:
1604	self.maxLen = _MAX_INT
1605
1606	if exact > 0:
1607	self.maxLen = exact
1608	self.minLen = exact
1609
1610	self.name = _ustr(self)
1611	self.errmsg = "Expected " + self.name
1612	#self.myException.msg = self.errmsg
1613	self.mayIndexError = False
1614	self.asKeyword = asKeyword
1615
1616	if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1617	if self.bodyCharsOrig == self.initCharsOrig:
1618	self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1619	elif len(self.bodyCharsOrig) == 1:
1620	self.reString = "%s[%s]*" % \
1621	(re.escape(self.initCharsOrig),
1622	_escapeRegexRangeChars(self.bodyCharsOrig),)
1623	else:
1624	self.reString = "[%s][%s]*" % \
1625	(_escapeRegexRangeChars(self.initCharsOrig),
1626	_escapeRegexRangeChars(self.bodyCharsOrig),)
1627	if self.asKeyword:
1628	self.reString = r"\b"+self.reString+r"\b"
1629	try:
1630	self.re = re.compile( self.reString )
1631	except:
1632	self.re = None
1633
1634	def parseImpl( self, instring, loc, doActions=True ):
1635	if self.re:
1636	result = self.re.match(instring,loc)
1637	if not result:
1638	exc = self.myException
1639	exc.loc = loc
1640	exc.pstr = instring
1641	raise exc
1642
1643	loc = result.end()
1644	return loc,result.group()
1645
1646	if not(instring[ loc ] in self.initChars):
1647	#~ raise ParseException( instring, loc, self.errmsg )
1648	exc = self.myException
1649	exc.loc = loc
1650	exc.pstr = instring
1651	raise exc
1652	start = loc
1653	loc += 1
1654	instrlen = len(instring)
1655	bodychars = self.bodyChars
1656	maxloc = start + self.maxLen
1657	maxloc = min( maxloc, instrlen )
1658	while loc < maxloc and instring[loc] in bodychars:
1659	loc += 1
1660
1661	throwException = False
1662	if loc - start < self.minLen:
1663	throwException = True
1664	if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1665	throwException = True
1666	if self.asKeyword:
1667	if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1668	throwException = True
1669
1670	if throwException:
1671	#~ raise ParseException( instring, loc, self.errmsg )
1672	exc = self.myException
1673	exc.loc = loc
1674	exc.pstr = instring
1675	raise exc
1676
1677	return loc, instring[start:loc]
1678
1679	def __str__( self ):
1680	try:
1681	return super(Word,self).__str__()
1682	except:
1683	pass
1684
1685
1686	if self.strRepr is None:
1687
1688	def charsAsStr(s):
1689	if len(s)>4:
1690	return s[:4]+"..."
1691	else:
1692	return s
1693
1694	if ( self.initCharsOrig != self.bodyCharsOrig ):
1695	self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1696	else:
1697	self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1698
1699	return self.strRepr
1700
1701
1702	class Regex(Token):
1703	"""Token for matching strings that match a given regular expression.
1704	Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1705	"""
1706	def __init__( self, pattern, flags=0):
1707	"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1708	super(Regex,self).__init__()
1709
1710	if len(pattern) == 0:
1711	warnings.warn("null string passed to Regex; use Empty() instead",
1712	SyntaxWarning, stacklevel=2)
1713
1714	self.pattern = pattern
1715	self.flags = flags
1716
1717	try:
1718	self.re = re.compile(self.pattern, self.flags)
1719	self.reString = self.pattern
1720	except sre_constants.error:
1721	warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1722	SyntaxWarning, stacklevel=2)
1723	raise
1724
1725	self.name = _ustr(self)
1726	self.errmsg = "Expected " + self.name
1727	#self.myException.msg = self.errmsg
1728	self.mayIndexError = False
1729	self.mayReturnEmpty = True
1730
1731	def parseImpl( self, instring, loc, doActions=True ):
1732	result = self.re.match(instring,loc)
1733	if not result:
1734	exc = self.myException
1735	exc.loc = loc
1736	exc.pstr = instring
1737	raise exc
1738
1739	loc = result.end()
1740	d = result.groupdict()
1741	ret = ParseResults(result.group())
1742	if d:
1743	for k in d:
1744	ret[k] = d[k]
1745	return loc,ret
1746
1747	def __str__( self ):
1748	try:
1749	return super(Regex,self).__str__()
1750	except:
1751	pass
1752
1753	if self.strRepr is None:
1754	self.strRepr = "Re:(%s)" % repr(self.pattern)
1755
1756	return self.strRepr
1757
1758
1759	class QuotedString(Token):
1760	"""Token for matching strings that are delimited by quoting characters.
1761	"""
1762	def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1763	"""
1764	Defined with the following parameters:
1765	- quoteChar - string of one or more characters defining the quote delimiting string
1766	- escChar - character to escape quotes, typically backslash (default=None)
1767	- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1768	- multiline - boolean indicating whether quotes can span multiple lines (default=False)
1769	- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1770	- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1771	"""
1772	super(QuotedString,self).__init__()
1773
1774	# remove white space from quote chars - wont work anyway
1775	quoteChar = quoteChar.strip()
1776	if len(quoteChar) == 0:
1777	warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1778	raise SyntaxError()
1779
1780	if endQuoteChar is None:
1781	endQuoteChar = quoteChar
1782	else:
1783	endQuoteChar = endQuoteChar.strip()
1784	if len(endQuoteChar) == 0:
1785	warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1786	raise SyntaxError()
1787
1788	self.quoteChar = quoteChar
1789	self.quoteCharLen = len(quoteChar)
1790	self.firstQuoteChar = quoteChar[0]
1791	self.endQuoteChar = endQuoteChar
1792	self.endQuoteCharLen = len(endQuoteChar)
1793	self.escChar = escChar
1794	self.escQuote = escQuote
1795	self.unquoteResults = unquoteResults
1796
1797	if multiline:
1798	self.flags = re.MULTILINE \| re.DOTALL
1799	self.pattern = r'%s(?:[^%s%s]' % \
1800	( re.escape(self.quoteChar),
1801	_escapeRegexRangeChars(self.endQuoteChar[0]),
1802	(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1803	else:
1804	self.flags = 0
1805	self.pattern = r'%s(?:[^%s\n\r%s]' % \
1806	( re.escape(self.quoteChar),
1807	_escapeRegexRangeChars(self.endQuoteChar[0]),
1808	(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1809	if len(self.endQuoteChar) > 1:
1810	self.pattern += (
1811	'\|(?:' + ')\|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1812	_escapeRegexRangeChars(self.endQuoteChar[i]))
1813	for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1814	)
1815	if escQuote:
1816	self.pattern += (r'\|(?:%s)' % re.escape(escQuote))
1817	if escChar:
1818	self.pattern += (r'\|(?:%s.)' % re.escape(escChar))
1819	self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1820	self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1821
1822	try:
1823	self.re = re.compile(self.pattern, self.flags)
1824	self.reString = self.pattern
1825	except sre_constants.error:
1826	warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1827	SyntaxWarning, stacklevel=2)
1828	raise
1829
1830	self.name = _ustr(self)
1831	self.errmsg = "Expected " + self.name
1832	#self.myException.msg = self.errmsg
1833	self.mayIndexError = False
1834	self.mayReturnEmpty = True
1835
1836	def parseImpl( self, instring, loc, doActions=True ):
1837	result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1838	if not result:
1839	exc = self.myException
1840	exc.loc = loc
1841	exc.pstr = instring
1842	raise exc
1843
1844	loc = result.end()
1845	ret = result.group()
1846
1847	if self.unquoteResults:
1848
1849	# strip off quotes
1850	ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1851
1852	if isinstance(ret,basestring):
1853	# replace escaped characters
1854	if self.escChar:
1855	ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1856
1857	# replace escaped quotes
1858	if self.escQuote:
1859	ret = ret.replace(self.escQuote, self.endQuoteChar)
1860
1861	return loc, ret
1862
1863	def __str__( self ):
1864	try:
1865	return super(QuotedString,self).__str__()
1866	except:
1867	pass
1868
1869	if self.strRepr is None:
1870	self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1871
1872	return self.strRepr
1873
1874
1875	class CharsNotIn(Token):
1876	"""Token for matching words composed of characters not in a given set.
1877	Defined with string containing all disallowed characters, and an optional
1878	minimum, maximum, and/or exact length. The default value for min is 1 (a
1879	minimum value < 1 is not valid); the default values for max and exact
1880	are 0, meaning no maximum or exact length restriction.
1881	"""
1882	def __init__( self, notChars, min=1, max=0, exact=0 ):
1883	super(CharsNotIn,self).__init__()
1884	self.skipWhitespace = False
1885	self.notChars = notChars
1886
1887	if min < 1:
1888	raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1889
1890	self.minLen = min
1891
1892	if max > 0:
1893	self.maxLen = max
1894	else:
1895	self.maxLen = _MAX_INT
1896
1897	if exact > 0:
1898	self.maxLen = exact
1899	self.minLen = exact
1900
1901	self.name = _ustr(self)
1902	self.errmsg = "Expected " + self.name
1903	self.mayReturnEmpty = ( self.minLen == 0 )
1904	#self.myException.msg = self.errmsg
1905	self.mayIndexError = False
1906
1907	def parseImpl( self, instring, loc, doActions=True ):
1908	if instring[loc] in self.notChars:
1909	#~ raise ParseException( instring, loc, self.errmsg )
1910	exc = self.myException
1911	exc.loc = loc
1912	exc.pstr = instring
1913	raise exc
1914
1915	start = loc
1916	loc += 1
1917	notchars = self.notChars
1918	maxlen = min( start+self.maxLen, len(instring) )
1919	while loc < maxlen and \
1920	(instring[loc] not in notchars):
1921	loc += 1
1922
1923	if loc - start < self.minLen:
1924	#~ raise ParseException( instring, loc, self.errmsg )
1925	exc = self.myException
1926	exc.loc = loc
1927	exc.pstr = instring
1928	raise exc
1929
1930	return loc, instring[start:loc]
1931
1932	def __str__( self ):
1933	try:
1934	return super(CharsNotIn, self).__str__()
1935	except:
1936	pass
1937
1938	if self.strRepr is None:
1939	if len(self.notChars) > 4:
1940	self.strRepr = "!W:(%s...)" % self.notChars[:4]
1941	else:
1942	self.strRepr = "!W:(%s)" % self.notChars
1943
1944	return self.strRepr
1945
1946	class White(Token):
1947	"""Special matching class for matching whitespace. Normally, whitespace is ignored
1948	by pyparsing grammars. This class is included when some whitespace structures
1949	are significant. Define with a string containing the whitespace characters to be
1950	matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1951	as defined for the Word class."""
1952	whiteStrs = {
1953	" " : "<SPC>",
1954	"\t": "<TAB>",
1955	"\n": "<LF>",
1956	"\r": "<CR>",
1957	"\f": "<FF>",
1958	}
1959	def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1960	super(White,self).__init__()
1961	self.matchWhite = ws
1962	self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1963	#~ self.leaveWhitespace()
1964	self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1965	self.mayReturnEmpty = True
1966	self.errmsg = "Expected " + self.name
1967	#self.myException.msg = self.errmsg
1968
1969	self.minLen = min
1970
1971	if max > 0:
1972	self.maxLen = max
1973	else:
1974	self.maxLen = _MAX_INT
1975
1976	if exact > 0:
1977	self.maxLen = exact
1978	self.minLen = exact
1979
1980	def parseImpl( self, instring, loc, doActions=True ):
1981	if not(instring[ loc ] in self.matchWhite):
1982	#~ raise ParseException( instring, loc, self.errmsg )
1983	exc = self.myException
1984	exc.loc = loc
1985	exc.pstr = instring
1986	raise exc
1987	start = loc
1988	loc += 1
1989	maxloc = start + self.maxLen
1990	maxloc = min( maxloc, len(instring) )
1991	while loc < maxloc and instring[loc] in self.matchWhite:
1992	loc += 1
1993
1994	if loc - start < self.minLen:
1995	#~ raise ParseException( instring, loc, self.errmsg )
1996	exc = self.myException
1997	exc.loc = loc
1998	exc.pstr = instring
1999	raise exc
2000
2001	return loc, instring[start:loc]
2002
2003
2004	class _PositionToken(Token):
2005	def __init__( self ):
2006	super(_PositionToken,self).__init__()
2007	self.name=self.__class__.__name__
2008	self.mayReturnEmpty = True
2009	self.mayIndexError = False
2010
2011	class GoToColumn(_PositionToken):
2012	"""Token to advance to a specific column of input text; useful for tabular report scraping."""
2013	def __init__( self, colno ):
2014	super(GoToColumn,self).__init__()
2015	self.col = colno
2016
2017	def preParse( self, instring, loc ):
2018	if col(loc,instring) != self.col:
2019	instrlen = len(instring)
2020	if self.ignoreExprs:
2021	loc = self._skipIgnorables( instring, loc )
2022	while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2023	loc += 1
2024	return loc
2025
2026	def parseImpl( self, instring, loc, doActions=True ):
2027	thiscol = col( loc, instring )
2028	if thiscol > self.col:
2029	raise ParseException( instring, loc, "Text not in expected column", self )
2030	newloc = loc + self.col - thiscol
2031	ret = instring[ loc: newloc ]
2032	return newloc, ret
2033
2034	class LineStart(_PositionToken):
2035	"""Matches if current position is at the beginning of a line within the parse string"""
2036	def __init__( self ):
2037	super(LineStart,self).__init__()
2038	self.setWhitespaceChars( " \t" )
2039	self.errmsg = "Expected start of line"
2040	#self.myException.msg = self.errmsg
2041
2042	def preParse( self, instring, loc ):
2043	preloc = super(LineStart,self).preParse(instring,loc)
2044	if instring[preloc] == "\n":
2045	loc += 1
2046	return loc
2047
2048	def parseImpl( self, instring, loc, doActions=True ):
2049	if not( loc==0 or
2050	(loc == self.preParse( instring, 0 )) or
2051	(instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2052	#~ raise ParseException( instring, loc, "Expected start of line" )
2053	exc = self.myException
2054	exc.loc = loc
2055	exc.pstr = instring
2056	raise exc
2057	return loc, []
2058
2059	class LineEnd(_PositionToken):
2060	"""Matches if current position is at the end of a line within the parse string"""
2061	def __init__( self ):
2062	super(LineEnd,self).__init__()
2063	self.setWhitespaceChars( " \t" )
2064	self.errmsg = "Expected end of line"
2065	#self.myException.msg = self.errmsg
2066
2067	def parseImpl( self, instring, loc, doActions=True ):
2068	if loc<len(instring):
2069	if instring[loc] == "\n":
2070	return loc+1, "\n"
2071	else:
2072	#~ raise ParseException( instring, loc, "Expected end of line" )
2073	exc = self.myException
2074	exc.loc = loc
2075	exc.pstr = instring
2076	raise exc
2077	elif loc == len(instring):
2078	return loc+1, []
2079	else:
2080	exc = self.myException
2081	exc.loc = loc
2082	exc.pstr = instring
2083	raise exc
2084
2085	class StringStart(_PositionToken):
2086	"""Matches if current position is at the beginning of the parse string"""
2087	def __init__( self ):
2088	super(StringStart,self).__init__()
2089	self.errmsg = "Expected start of text"
2090	#self.myException.msg = self.errmsg
2091
2092	def parseImpl( self, instring, loc, doActions=True ):
2093	if loc != 0:
2094	# see if entire string up to here is just whitespace and ignoreables
2095	if loc != self.preParse( instring, 0 ):
2096	#~ raise ParseException( instring, loc, "Expected start of text" )
2097	exc = self.myException
2098	exc.loc = loc
2099	exc.pstr = instring
2100	raise exc
2101	return loc, []
2102
2103	class StringEnd(_PositionToken):
2104	"""Matches if current position is at the end of the parse string"""
2105	def __init__( self ):
2106	super(StringEnd,self).__init__()
2107	self.errmsg = "Expected end of text"
2108	#self.myException.msg = self.errmsg
2109
2110	def parseImpl( self, instring, loc, doActions=True ):
2111	if loc < len(instring):
2112	#~ raise ParseException( instring, loc, "Expected end of text" )
2113	exc = self.myException
2114	exc.loc = loc
2115	exc.pstr = instring
2116	raise exc
2117	elif loc == len(instring):
2118	return loc+1, []
2119	elif loc > len(instring):
2120	return loc, []
2121	else:
2122	exc = self.myException
2123	exc.loc = loc
2124	exc.pstr = instring
2125	raise exc
2126
2127	class WordStart(_PositionToken):
2128	"""Matches if the current position is at the beginning of a Word, and
2129	is not preceded by any character in a given set of wordChars
2130	(default=printables). To emulate the \b behavior of regular expressions,
2131	use WordStart(alphanums). WordStart will also match at the beginning of
2132	the string being parsed, or at the beginning of a line.
2133	"""
2134	def __init__(self, wordChars = printables):
2135	super(WordStart,self).__init__()
2136	self.wordChars = _str2dict(wordChars)
2137	self.errmsg = "Not at the start of a word"
2138
2139	def parseImpl(self, instring, loc, doActions=True ):
2140	if loc != 0:
2141	if (instring[loc-1] in self.wordChars or
2142	instring[loc] not in self.wordChars):
2143	exc = self.myException
2144	exc.loc = loc
2145	exc.pstr = instring
2146	raise exc
2147	return loc, []
2148
2149	class WordEnd(_PositionToken):
2150	"""Matches if the current position is at the end of a Word, and
2151	is not followed by any character in a given set of wordChars
2152	(default=printables). To emulate the \b behavior of regular expressions,
2153	use WordEnd(alphanums). WordEnd will also match at the end of
2154	the string being parsed, or at the end of a line.
2155	"""
2156	def __init__(self, wordChars = printables):
2157	super(WordEnd,self).__init__()
2158	self.wordChars = _str2dict(wordChars)
2159	self.skipWhitespace = False
2160	self.errmsg = "Not at the end of a word"
2161
2162	def parseImpl(self, instring, loc, doActions=True ):
2163	instrlen = len(instring)
2164	if instrlen>0 and loc<instrlen:
2165	if (instring[loc] in self.wordChars or
2166	instring[loc-1] not in self.wordChars):
2167	#~ raise ParseException( instring, loc, "Expected end of word" )
2168	exc = self.myException
2169	exc.loc = loc
2170	exc.pstr = instring
2171	raise exc
2172	return loc, []
2173
2174
2175	class ParseExpression(ParserElement):
2176	"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2177	def __init__( self, exprs, savelist = False ):
2178	super(ParseExpression,self).__init__(savelist)
2179	if isinstance( exprs, list ):
2180	self.exprs = exprs
2181	elif isinstance( exprs, basestring ):
2182	self.exprs = [ Literal( exprs ) ]
2183	else:
2184	self.exprs = [ exprs ]
2185	self.callPreparse = False
2186
2187	def __getitem__( self, i ):
2188	return self.exprs[i]
2189
2190	def append( self, other ):
2191	self.exprs.append( other )
2192	self.strRepr = None
2193	return self
2194
2195	def leaveWhitespace( self ):
2196	"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2197	all contained expressions."""
2198	self.skipWhitespace = False
2199	self.exprs = [ e.copy() for e in self.exprs ]
2200	for e in self.exprs:
2201	e.leaveWhitespace()
2202	return self
2203
2204	def ignore( self, other ):
2205	if isinstance( other, Suppress ):
2206	if other not in self.ignoreExprs:
2207	super( ParseExpression, self).ignore( other )
2208	for e in self.exprs:
2209	e.ignore( self.ignoreExprs[-1] )
2210	else:
2211	super( ParseExpression, self).ignore( other )
2212	for e in self.exprs:
2213	e.ignore( self.ignoreExprs[-1] )
2214	return self
2215
2216	def __str__( self ):
2217	try:
2218	return super(ParseExpression,self).__str__()
2219	except:
2220	pass
2221
2222	if self.strRepr is None:
2223	self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2224	return self.strRepr
2225
2226	def streamline( self ):
2227	super(ParseExpression,self).streamline()
2228
2229	for e in self.exprs:
2230	e.streamline()
2231
2232	# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2233	# but only if there are no parse actions or resultsNames on the nested And's
2234	# (likewise for Or's and MatchFirst's)
2235	if ( len(self.exprs) == 2 ):
2236	other = self.exprs[0]
2237	if ( isinstance( other, self.__class__ ) and
2238	not(other.parseAction) and
2239	other.resultsName is None and
2240	not other.debug ):
2241	self.exprs = other.exprs[:] + [ self.exprs[1] ]
2242	self.strRepr = None
2243	self.mayReturnEmpty \|= other.mayReturnEmpty
2244	self.mayIndexError \|= other.mayIndexError
2245
2246	other = self.exprs[-1]
2247	if ( isinstance( other, self.__class__ ) and
2248	not(other.parseAction) and
2249	other.resultsName is None and
2250	not other.debug ):
2251	self.exprs = self.exprs[:-1] + other.exprs[:]
2252	self.strRepr = None
2253	self.mayReturnEmpty \|= other.mayReturnEmpty
2254	self.mayIndexError \|= other.mayIndexError
2255
2256	return self
2257
2258	def setResultsName( self, name, listAllMatches=False ):
2259	ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2260	return ret
2261
2262	def validate( self, validateTrace=[] ):
2263	tmp = validateTrace[:]+[self]
2264	for e in self.exprs:
2265	e.validate(tmp)
2266	self.checkRecursion( [] )
2267
2268	class And(ParseExpression):
2269	"""Requires all given ParseExpressions to be found in the given order.
2270	Expressions may be separated by whitespace.
2271	May be constructed using the '+' operator.
2272	"""
2273
2274	class _ErrorStop(Empty):
2275	def __new__(cls,args,*kwargs):
2276	return And._ErrorStop.instance
2277	_ErrorStop.instance = Empty()
2278	_ErrorStop.instance.leaveWhitespace()
2279
2280	def __init__( self, exprs, savelist = True ):
2281	super(And,self).__init__(exprs, savelist)
2282	self.mayReturnEmpty = True
2283	for e in self.exprs:
2284	if not e.mayReturnEmpty:
2285	self.mayReturnEmpty = False
2286	break
2287	self.setWhitespaceChars( exprs[0].whiteChars )
2288	self.skipWhitespace = exprs[0].skipWhitespace
2289	self.callPreparse = True
2290
2291	def parseImpl( self, instring, loc, doActions=True ):
2292	# pass False as last arg to _parse for first element, since we already
2293	# pre-parsed the string as part of our And pre-parsing
2294	loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2295	errorStop = False
2296	for e in self.exprs[1:]:
2297	if e is And._ErrorStop.instance:
2298	errorStop = True
2299	continue
2300	if errorStop:
2301	try:
2302	loc, exprtokens = e._parse( instring, loc, doActions )
2303	except ParseBaseException, pe:
2304	raise ParseSyntaxException(pe)
2305	except IndexError, ie:
2306	raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2307	else:
2308	loc, exprtokens = e._parse( instring, loc, doActions )
2309	if exprtokens or exprtokens.keys():
2310	resultlist += exprtokens
2311	return loc, resultlist
2312
2313	def __iadd__(self, other ):
2314	if isinstance( other, basestring ):
2315	other = Literal( other )
2316	return self.append( other ) #And( [ self, other ] )
2317
2318	def checkRecursion( self, parseElementList ):
2319	subRecCheckList = parseElementList[:] + [ self ]
2320	for e in self.exprs:
2321	e.checkRecursion( subRecCheckList )
2322	if not e.mayReturnEmpty:
2323	break
2324
2325	def __str__( self ):
2326	if hasattr(self,"name"):
2327	return self.name
2328
2329	if self.strRepr is None:
2330	self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2331
2332	return self.strRepr
2333
2334
2335	class Or(ParseExpression):
2336	"""Requires that at least one ParseExpression is found.
2337	If two expressions match, the expression that matches the longest string will be used.
2338	May be constructed using the '^' operator.
2339	"""
2340	def __init__( self, exprs, savelist = False ):
2341	super(Or,self).__init__(exprs, savelist)
2342	self.mayReturnEmpty = False
2343	for e in self.exprs:
2344	if e.mayReturnEmpty:
2345	self.mayReturnEmpty = True
2346	break
2347
2348	def parseImpl( self, instring, loc, doActions=True ):
2349	maxExcLoc = -1
2350	maxMatchLoc = -1
2351	maxException = None
2352	for e in self.exprs:
2353	try:
2354	loc2 = e.tryParse( instring, loc )
2355	except ParseException, err:
2356	if err.loc > maxExcLoc:
2357	maxException = err
2358	maxExcLoc = err.loc
2359	except IndexError:
2360	if len(instring) > maxExcLoc:
2361	maxException = ParseException(instring,len(instring),e.errmsg,self)
2362	maxExcLoc = len(instring)
2363	else:
2364	if loc2 > maxMatchLoc:
2365	maxMatchLoc = loc2
2366	maxMatchExp = e
2367
2368	if maxMatchLoc < 0:
2369	if maxException is not None:
2370	raise maxException
2371	else:
2372	raise ParseException(instring, loc, "no defined alternatives to match", self)
2373
2374	return maxMatchExp._parse( instring, loc, doActions )
2375
2376	def __ixor__(self, other ):
2377	if isinstance( other, basestring ):
2378	other = Literal( other )
2379	return self.append( other ) #Or( [ self, other ] )
2380
2381	def __str__( self ):
2382	if hasattr(self,"name"):
2383	return self.name
2384
2385	if self.strRepr is None:
2386	self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2387
2388	return self.strRepr
2389
2390	def checkRecursion( self, parseElementList ):
2391	subRecCheckList = parseElementList[:] + [ self ]
2392	for e in self.exprs:
2393	e.checkRecursion( subRecCheckList )
2394
2395
2396	class MatchFirst(ParseExpression):
2397	"""Requires that at least one ParseExpression is found.
2398	If two expressions match, the first one listed is the one that will match.
2399	May be constructed using the '\|' operator.
2400	"""
2401	def __init__( self, exprs, savelist = False ):
2402	super(MatchFirst,self).__init__(exprs, savelist)
2403	if exprs:
2404	self.mayReturnEmpty = False
2405	for e in self.exprs:
2406	if e.mayReturnEmpty:
2407	self.mayReturnEmpty = True
2408	break
2409	else:
2410	self.mayReturnEmpty = True
2411
2412	def parseImpl( self, instring, loc, doActions=True ):
2413	maxExcLoc = -1
2414	maxException = None
2415	for e in self.exprs:
2416	try:
2417	ret = e._parse( instring, loc, doActions )
2418	return ret
2419	except ParseException, err:
2420	if err.loc > maxExcLoc:
2421	maxException = err
2422	maxExcLoc = err.loc
2423	except IndexError:
2424	if len(instring) > maxExcLoc:
2425	maxException = ParseException(instring,len(instring),e.errmsg,self)
2426	maxExcLoc = len(instring)
2427
2428	# only got here if no expression matched, raise exception for match that made it the furthest
2429	else:
2430	if maxException is not None:
2431	raise maxException
2432	else:
2433	raise ParseException(instring, loc, "no defined alternatives to match", self)
2434
2435	def __ior__(self, other ):
2436	if isinstance( other, basestring ):
2437	other = Literal( other )
2438	return self.append( other ) #MatchFirst( [ self, other ] )
2439
2440	def __str__( self ):
2441	if hasattr(self,"name"):
2442	return self.name
2443
2444	if self.strRepr is None:
2445	self.strRepr = "{" + " \| ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2446
2447	return self.strRepr
2448
2449	def checkRecursion( self, parseElementList ):
2450	subRecCheckList = parseElementList[:] + [ self ]
2451	for e in self.exprs:
2452	e.checkRecursion( subRecCheckList )
2453
2454
2455	class Each(ParseExpression):
2456	"""Requires all given ParseExpressions to be found, but in any order.
2457	Expressions may be separated by whitespace.
2458	May be constructed using the '&' operator.
2459	"""
2460	def __init__( self, exprs, savelist = True ):
2461	super(Each,self).__init__(exprs, savelist)
2462	self.mayReturnEmpty = True
2463	for e in self.exprs:
2464	if not e.mayReturnEmpty:
2465	self.mayReturnEmpty = False
2466	break
2467	self.skipWhitespace = True
2468	self.initExprGroups = True
2469
2470	def parseImpl( self, instring, loc, doActions=True ):
2471	if self.initExprGroups:
2472	self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2473	self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2474	self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2475	self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2476	self.required += self.multirequired
2477	self.initExprGroups = False
2478	tmpLoc = loc
2479	tmpReqd = self.required[:]
2480	tmpOpt = self.optionals[:]
2481	matchOrder = []
2482
2483	keepMatching = True
2484	while keepMatching:
2485	tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2486	failed = []
2487	for e in tmpExprs:
2488	try:
2489	tmpLoc = e.tryParse( instring, tmpLoc )
2490	except ParseException:
2491	failed.append(e)
2492	else:
2493	matchOrder.append(e)
2494	if e in tmpReqd:
2495	tmpReqd.remove(e)
2496	elif e in tmpOpt:
2497	tmpOpt.remove(e)
2498	if len(failed) == len(tmpExprs):
2499	keepMatching = False
2500
2501	if tmpReqd:
2502	missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2503	raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2504
2505	# add any unmatched Optionals, in case they have default values defined
2506	matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2507
2508	resultlist = []
2509	for e in matchOrder:
2510	loc,results = e._parse(instring,loc,doActions)
2511	resultlist.append(results)
2512
2513	finalResults = ParseResults([])
2514	for r in resultlist:
2515	dups = {}
2516	for k in r.keys():
2517	if k in finalResults.keys():
2518	tmp = ParseResults(finalResults[k])
2519	tmp += ParseResults(r[k])
2520	dups[k] = tmp
2521	finalResults += ParseResults(r)
2522	for k,v in dups.items():
2523	finalResults[k] = v
2524	return loc, finalResults
2525
2526	def __str__( self ):
2527	if hasattr(self,"name"):
2528	return self.name
2529
2530	if self.strRepr is None:
2531	self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2532
2533	return self.strRepr
2534
2535	def checkRecursion( self, parseElementList ):
2536	subRecCheckList = parseElementList[:] + [ self ]
2537	for e in self.exprs:
2538	e.checkRecursion( subRecCheckList )
2539
2540
2541	class ParseElementEnhance(ParserElement):
2542	"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2543	def __init__( self, expr, savelist=False ):
2544	super(ParseElementEnhance,self).__init__(savelist)
2545	if isinstance( expr, basestring ):
2546	expr = Literal(expr)
2547	self.expr = expr
2548	self.strRepr = None
2549	if expr is not None:
2550	self.mayIndexError = expr.mayIndexError
2551	self.mayReturnEmpty = expr.mayReturnEmpty
2552	self.setWhitespaceChars( expr.whiteChars )
2553	self.skipWhitespace = expr.skipWhitespace
2554	self.saveAsList = expr.saveAsList
2555	self.callPreparse = expr.callPreparse
2556	self.ignoreExprs.extend(expr.ignoreExprs)
2557
2558	def parseImpl( self, instring, loc, doActions=True ):
2559	if self.expr is not None:
2560	return self.expr._parse( instring, loc, doActions, callPreParse=False )
2561	else:
2562	raise ParseException("",loc,self.errmsg,self)
2563
2564	def leaveWhitespace( self ):
2565	self.skipWhitespace = False
2566	self.expr = self.expr.copy()
2567	if self.expr is not None:
2568	self.expr.leaveWhitespace()
2569	return self
2570
2571	def ignore( self, other ):
2572	if isinstance( other, Suppress ):
2573	if other not in self.ignoreExprs:
2574	super( ParseElementEnhance, self).ignore( other )
2575	if self.expr is not None:
2576	self.expr.ignore( self.ignoreExprs[-1] )
2577	else:
2578	super( ParseElementEnhance, self).ignore( other )
2579	if self.expr is not None:
2580	self.expr.ignore( self.ignoreExprs[-1] )
2581	return self
2582
2583	def streamline( self ):
2584	super(ParseElementEnhance,self).streamline()
2585	if self.expr is not None:
2586	self.expr.streamline()
2587	return self
2588
2589	def checkRecursion( self, parseElementList ):
2590	if self in parseElementList:
2591	raise RecursiveGrammarException( parseElementList+[self] )
2592	subRecCheckList = parseElementList[:] + [ self ]
2593	if self.expr is not None:
2594	self.expr.checkRecursion( subRecCheckList )
2595
2596	def validate( self, validateTrace=[] ):
2597	tmp = validateTrace[:]+[self]
2598	if self.expr is not None:
2599	self.expr.validate(tmp)
2600	self.checkRecursion( [] )
2601
2602	def __str__( self ):
2603	try:
2604	return super(ParseElementEnhance,self).__str__()
2605	except:
2606	pass
2607
2608	if self.strRepr is None and self.expr is not None:
2609	self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2610	return self.strRepr
2611
2612
2613	class FollowedBy(ParseElementEnhance):
2614	"""Lookahead matching of the given parse expression. FollowedBy
2615	does not advance the parsing position within the input string, it only
2616	verifies that the specified parse expression matches at the current
2617	position. FollowedBy always returns a null token list."""
2618	def __init__( self, expr ):
2619	super(FollowedBy,self).__init__(expr)
2620	self.mayReturnEmpty = True
2621
2622	def parseImpl( self, instring, loc, doActions=True ):
2623	self.expr.tryParse( instring, loc )
2624	return loc, []
2625
2626
2627	class NotAny(ParseElementEnhance):
2628	"""Lookahead to disallow matching with the given parse expression. NotAny
2629	does not advance the parsing position within the input string, it only
2630	verifies that the specified parse expression does not match at the current
2631	position. Also, NotAny does not skip over leading whitespace. NotAny
2632	always returns a null token list. May be constructed using the '~' operator."""
2633	def __init__( self, expr ):
2634	super(NotAny,self).__init__(expr)
2635	#~ self.leaveWhitespace()
2636	self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2637	self.mayReturnEmpty = True
2638	self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2639	#self.myException = ParseException("",0,self.errmsg,self)
2640
2641	def parseImpl( self, instring, loc, doActions=True ):
2642	try:
2643	self.expr.tryParse( instring, loc )
2644	except (ParseException,IndexError):
2645	pass
2646	else:
2647	#~ raise ParseException(instring, loc, self.errmsg )
2648	exc = self.myException
2649	exc.loc = loc
2650	exc.pstr = instring
2651	raise exc
2652	return loc, []
2653
2654	def __str__( self ):
2655	if hasattr(self,"name"):
2656	return self.name
2657
2658	if self.strRepr is None:
2659	self.strRepr = "~{" + _ustr(self.expr) + "}"
2660
2661	return self.strRepr
2662
2663
2664	class ZeroOrMore(ParseElementEnhance):
2665	"""Optional repetition of zero or more of the given expression."""
2666	def __init__( self, expr ):
2667	super(ZeroOrMore,self).__init__(expr)
2668	self.mayReturnEmpty = True
2669
2670	def parseImpl( self, instring, loc, doActions=True ):
2671	tokens = []
2672	try:
2673	loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2674	hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2675	while 1:
2676	if hasIgnoreExprs:
2677	preloc = self._skipIgnorables( instring, loc )
2678	else:
2679	preloc = loc
2680	loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2681	if tmptokens or tmptokens.keys():
2682	tokens += tmptokens
2683	except (ParseException,IndexError):
2684	pass
2685
2686	return loc, tokens
2687
2688	def __str__( self ):
2689	if hasattr(self,"name"):
2690	return self.name
2691
2692	if self.strRepr is None:
2693	self.strRepr = "[" + _ustr(self.expr) + "]..."
2694
2695	return self.strRepr
2696
2697	def setResultsName( self, name, listAllMatches=False ):
2698	ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2699	ret.saveAsList = True
2700	return ret
2701
2702
2703	class OneOrMore(ParseElementEnhance):
2704	"""Repetition of one or more of the given expression."""
2705	def parseImpl( self, instring, loc, doActions=True ):
2706	# must be at least one
2707	loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2708	try:
2709	hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2710	while 1:
2711	if hasIgnoreExprs:
2712	preloc = self._skipIgnorables( instring, loc )
2713	else:
2714	preloc = loc
2715	loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2716	if tmptokens or tmptokens.keys():
2717	tokens += tmptokens
2718	except (ParseException,IndexError):
2719	pass
2720
2721	return loc, tokens
2722
2723	def __str__( self ):
2724	if hasattr(self,"name"):
2725	return self.name
2726
2727	if self.strRepr is None:
2728	self.strRepr = "{" + _ustr(self.expr) + "}..."
2729
2730	return self.strRepr
2731
2732	def setResultsName( self, name, listAllMatches=False ):
2733	ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2734	ret.saveAsList = True
2735	return ret
2736
2737	class _NullToken(object):
2738	def __bool__(self):
2739	return False
2740	__nonzero__ = __bool__
2741	def __str__(self):
2742	return ""
2743
2744	_optionalNotMatched = _NullToken()
2745	class Optional(ParseElementEnhance):
2746	"""Optional matching of the given expression.
2747	A default return string can also be specified, if the optional expression
2748	is not found.
2749	"""
2750	def __init__( self, exprs, default=_optionalNotMatched ):
2751	super(Optional,self).__init__( exprs, savelist=False )
2752	self.defaultValue = default
2753	self.mayReturnEmpty = True
2754
2755	def parseImpl( self, instring, loc, doActions=True ):
2756	try:
2757	loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2758	except (ParseException,IndexError):
2759	if self.defaultValue is not _optionalNotMatched:
2760	if self.expr.resultsName:
2761	tokens = ParseResults([ self.defaultValue ])
2762	tokens[self.expr.resultsName] = self.defaultValue
2763	else:
2764	tokens = [ self.defaultValue ]
2765	else:
2766	tokens = []
2767	return loc, tokens
2768
2769	def __str__( self ):
2770	if hasattr(self,"name"):
2771	return self.name
2772
2773	if self.strRepr is None:
2774	self.strRepr = "[" + _ustr(self.expr) + "]"
2775
2776	return self.strRepr
2777
2778
2779	class SkipTo(ParseElementEnhance):
2780	"""Token for skipping over all undefined text until the matched expression is found.
2781	If include is set to true, the matched expression is also consumed. The ignore
2782	argument is used to define grammars (typically quoted strings and comments) that
2783	might contain false matches.
2784	"""
2785	def __init__( self, other, include=False, ignore=None ):
2786	super( SkipTo, self ).__init__( other )
2787	if ignore is not None:
2788	self.expr = self.expr.copy()
2789	self.expr.ignore(ignore)
2790	self.mayReturnEmpty = True
2791	self.mayIndexError = False
2792	self.includeMatch = include
2793	self.asList = False
2794	self.errmsg = "No match found for "+_ustr(self.expr)
2795	#self.myException = ParseException("",0,self.errmsg,self)
2796
2797	def parseImpl( self, instring, loc, doActions=True ):
2798	startLoc = loc
2799	instrlen = len(instring)
2800	expr = self.expr
2801	while loc <= instrlen:
2802	try:
2803	loc = expr._skipIgnorables( instring, loc )
2804	expr._parse( instring, loc, doActions=False, callPreParse=False )
2805	if self.includeMatch:
2806	skipText = instring[startLoc:loc]
2807	loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2808	if mat:
2809	skipRes = ParseResults( skipText )
2810	skipRes += mat
2811	return loc, [ skipRes ]
2812	else:
2813	return loc, [ skipText ]
2814	else:
2815	return loc, [ instring[startLoc:loc] ]
2816	except (ParseException,IndexError):
2817	loc += 1
2818	exc = self.myException
2819	exc.loc = loc
2820	exc.pstr = instring
2821	raise exc
2822
2823	class Forward(ParseElementEnhance):
2824	"""Forward declaration of an expression to be defined later -
2825	used for recursive grammars, such as algebraic infix notation.
2826	When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2827
2828	Note: take care when assigning to Forward not to overlook precedence of operators.
2829	Specifically, '\|' has a lower precedence than '<<', so that::
2830	fwdExpr << a \| b \| c
2831	will actually be evaluated as::
2832	(fwdExpr << a) \| b \| c
2833	thereby leaving b and c out as parseable alternatives. It is recommended that you
2834	explicitly group the values inserted into the Forward::
2835	fwdExpr << (a \| b \| c)
2836	"""
2837	def __init__( self, other=None ):
2838	super(Forward,self).__init__( other, savelist=False )
2839
2840	def __lshift__( self, other ):
2841	if isinstance( other, basestring ):
2842	other = Literal(other)
2843	self.expr = other
2844	self.mayReturnEmpty = other.mayReturnEmpty
2845	self.strRepr = None
2846	self.mayIndexError = self.expr.mayIndexError
2847	self.mayReturnEmpty = self.expr.mayReturnEmpty
2848	self.setWhitespaceChars( self.expr.whiteChars )
2849	self.skipWhitespace = self.expr.skipWhitespace
2850	self.saveAsList = self.expr.saveAsList
2851	self.ignoreExprs.extend(self.expr.ignoreExprs)
2852	return None
2853
2854	def leaveWhitespace( self ):
2855	self.skipWhitespace = False
2856	return self
2857
2858	def streamline( self ):
2859	if not self.streamlined:
2860	self.streamlined = True
2861	if self.expr is not None:
2862	self.expr.streamline()
2863	return self
2864
2865	def validate( self, validateTrace=[] ):
2866	if self not in validateTrace:
2867	tmp = validateTrace[:]+[self]
2868	if self.expr is not None:
2869	self.expr.validate(tmp)
2870	self.checkRecursion([])
2871
2872	def __str__( self ):
2873	if hasattr(self,"name"):
2874	return self.name
2875
2876	self.__class__ = _ForwardNoRecurse
2877	try:
2878	if self.expr is not None:
2879	retString = _ustr(self.expr)
2880	else:
2881	retString = "None"
2882	finally:
2883	self.__class__ = Forward
2884	return "Forward: "+retString
2885
2886	def copy(self):
2887	if self.expr is not None:
2888	return super(Forward,self).copy()
2889	else:
2890	ret = Forward()
2891	ret << self
2892	return ret
2893
2894	class _ForwardNoRecurse(Forward):
2895	def __str__( self ):
2896	return "..."
2897
2898	class TokenConverter(ParseElementEnhance):
2899	"""Abstract subclass of ParseExpression, for converting parsed results."""
2900	def __init__( self, expr, savelist=False ):
2901	super(TokenConverter,self).__init__( expr )#, savelist )
2902	self.saveAsList = False
2903
2904	class Upcase(TokenConverter):
2905	"""Converter to upper case all matching tokens."""
2906	def __init__(self, *args):
2907	super(Upcase,self).__init__(*args)
2908	warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2909	DeprecationWarning,stacklevel=2)
2910
2911	def postParse( self, instring, loc, tokenlist ):
2912	return list(map( string.upper, tokenlist ))
2913
2914
2915	class Combine(TokenConverter):
2916	"""Converter to concatenate all matching tokens to a single string.
2917	By default, the matching patterns must also be contiguous in the input string;
2918	this can be disabled by specifying 'adjacent=False' in the constructor.
2919	"""
2920	def __init__( self, expr, joinString="", adjacent=True ):
2921	super(Combine,self).__init__( expr )
2922	# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2923	if adjacent:
2924	self.leaveWhitespace()
2925	self.adjacent = adjacent
2926	self.skipWhitespace = True
2927	self.joinString = joinString
2928
2929	def ignore( self, other ):
2930	if self.adjacent:
2931	ParserElement.ignore(self, other)
2932	else:
2933	super( Combine, self).ignore( other )
2934	return self
2935
2936	def postParse( self, instring, loc, tokenlist ):
2937	retToks = tokenlist.copy()
2938	del retToks[:]
2939	retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2940
2941	if self.resultsName and len(retToks.keys())>0:
2942	return [ retToks ]
2943	else:
2944	return retToks
2945
2946	class Group(TokenConverter):
2947	"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2948	def __init__( self, expr ):
2949	super(Group,self).__init__( expr )
2950	self.saveAsList = True
2951
2952	def postParse( self, instring, loc, tokenlist ):
2953	return [ tokenlist ]
2954
2955	class Dict(TokenConverter):
2956	"""Converter to return a repetitive expression as a list, but also as a dictionary.
2957	Each element can also be referenced using the first token in the expression as its key.
2958	Useful for tabular report scraping when the first column can be used as a item key.
2959	"""
2960	def __init__( self, exprs ):
2961	super(Dict,self).__init__( exprs )
2962	self.saveAsList = True
2963
2964	def postParse( self, instring, loc, tokenlist ):
2965	for i,tok in enumerate(tokenlist):
2966	if len(tok) == 0:
2967	continue
2968	ikey = tok[0]
2969	if isinstance(ikey,int):
2970	ikey = _ustr(tok[0]).strip()
2971	if len(tok)==1:
2972	tokenlist[ikey] = _ParseResultsWithOffset("",i)
2973	elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2974	tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
2975	else:
2976	dictvalue = tok.copy() #ParseResults(i)
2977	del dictvalue[0]
2978	if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2979	tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
2980	else:
2981	tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
2982
2983	if self.resultsName:
2984	return [ tokenlist ]
2985	else:
2986	return tokenlist
2987
2988
2989	class Suppress(TokenConverter):
2990	"""Converter for ignoring the results of a parsed expression."""
2991	def postParse( self, instring, loc, tokenlist ):
2992	return []
2993
2994	def suppress( self ):
2995	return self
2996
2997
2998	class OnlyOnce(object):
2999	"""Wrapper for parse actions, to ensure they are only called once."""
3000	def __init__(self, methodCall):
3001	self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3002	self.called = False
3003	def __call__(self,s,l,t):
3004	if not self.called:
3005	results = self.callable(s,l,t)
3006	self.called = True
3007	return results
3008	raise ParseException(s,l,"")
3009	def reset(self):
3010	self.called = False
3011
3012	def traceParseAction(f):
3013	"""Decorator for debugging parse actions."""
3014	f = ParserElement._normalizeParseActionArgs(f)
3015	def z(*paArgs):
3016	thisFunc = f.func_name
3017	s,l,t = paArgs[-3:]
3018	if len(paArgs)>3:
3019	thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3020	sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3021	try:
3022	ret = f(*paArgs)
3023	except Exception, exc:
3024	sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3025	raise
3026	sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3027	return ret
3028	try:
3029	z.__name__ = f.__name__
3030	except AttributeError:
3031	pass
3032	return z
3033
3034	#
3035	# global helpers
3036	#
3037	def delimitedList( expr, delim=",", combine=False ):
3038	"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
3039	By default, the list elements and delimiters can have intervening whitespace, and
3040	comments, but this can be overridden by passing 'combine=True' in the constructor.
3041	If combine is set to True, the matching tokens are returned as a single token
3042	string, with the delimiters included; otherwise, the matching tokens are returned
3043	as a list of tokens, with the delimiters suppressed.
3044	"""
3045	dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3046	if combine:
3047	return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3048	else:
3049	return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3050
3051	def countedArray( expr ):
3052	"""Helper to define a counted list of expressions.
3053	This helper defines a pattern of the form::
3054	integer expr expr expr...
3055	where the leading integer tells how many expr expressions follow.
3056	The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3057	"""
3058	arrayExpr = Forward()
3059	def countFieldParseAction(s,l,t):
3060	n = int(t[0])
3061	arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3062	return []
3063	return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3064
3065	def _flatten(L):
3066	if type(L) is not list: return [L]
3067	if L == []: return L
3068	return _flatten(L[0]) + _flatten(L[1:])
3069
3070	def matchPreviousLiteral(expr):
3071	"""Helper to define an expression that is indirectly defined from
3072	the tokens matched in a previous expression, that is, it looks
3073	for a 'repeat' of a previous expression. For example::
3074	first = Word(nums)
3075	second = matchPreviousLiteral(first)
3076	matchExpr = first + ":" + second
3077	will match "1:1", but not "1:2". Because this matches a
3078	previous literal, will also match the leading "1:1" in "1:10".
3079	If this is not desired, use matchPreviousExpr.
3080	Do not use with packrat parsing enabled.
3081	"""
3082	rep = Forward()
3083	def copyTokenToRepeater(s,l,t):
3084	if t:
3085	if len(t) == 1:
3086	rep << t[0]
3087	else:
3088	# flatten t tokens
3089	tflat = _flatten(t.asList())
3090	rep << And( [ Literal(tt) for tt in tflat ] )
3091	else:
3092	rep << Empty()
3093	expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3094	return rep
3095
3096	def matchPreviousExpr(expr):
3097	"""Helper to define an expression that is indirectly defined from
3098	the tokens matched in a previous expression, that is, it looks
3099	for a 'repeat' of a previous expression. For example::
3100	first = Word(nums)
3101	second = matchPreviousExpr(first)
3102	matchExpr = first + ":" + second
3103	will match "1:1", but not "1:2". Because this matches by
3104	expressions, will not match the leading "1:1" in "1:10";
3105	the expressions are evaluated first, and then compared, so
3106	"1" is compared with "10".
3107	Do not use with packrat parsing enabled.
3108	"""
3109	rep = Forward()
3110	e2 = expr.copy()
3111	rep << e2
3112	def copyTokenToRepeater(s,l,t):
3113	matchTokens = _flatten(t.asList())
3114	def mustMatchTheseTokens(s,l,t):
3115	theseTokens = _flatten(t.asList())
3116	if theseTokens != matchTokens:
3117	raise ParseException("",0,"")
3118	rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3119	expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3120	return rep
3121
3122	def _escapeRegexRangeChars(s):
3123	#~ escape these chars: ^-]
3124	for c in r"\^-]":
3125	s = s.replace(c,"\\"+c)
3126	s = s.replace("\n",r"\n")
3127	s = s.replace("\t",r"\t")
3128	return _ustr(s)
3129
3130	def oneOf( strs, caseless=False, useRegex=True ):
3131	"""Helper to quickly define a set of alternative Literals, and makes sure to do
3132	longest-first testing when there is a conflict, regardless of the input order,
3133	but returns a MatchFirst for best performance.
3134
3135	Parameters:
3136	- strs - a string of space-delimited literals, or a list of string literals
3137	- caseless - (default=False) - treat all literals as caseless
3138	- useRegex - (default=True) - as an optimization, will generate a Regex
3139	object; otherwise, will generate a MatchFirst object (if caseless=True, or
3140	if creating a Regex raises an exception)
3141	"""
3142	if caseless:
3143	isequal = ( lambda a,b: a.upper() == b.upper() )
3144	masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3145	parseElementClass = CaselessLiteral
3146	else:
3147	isequal = ( lambda a,b: a == b )
3148	masks = ( lambda a,b: b.startswith(a) )
3149	parseElementClass = Literal
3150
3151	if isinstance(strs,(list,tuple)):
3152	symbols = strs[:]
3153	elif isinstance(strs,basestring):
3154	symbols = strs.split()
3155	else:
3156	warnings.warn("Invalid argument to oneOf, expected string or list",
3157	SyntaxWarning, stacklevel=2)
3158
3159	i = 0
3160	while i < len(symbols)-1:
3161	cur = symbols[i]
3162	for j,other in enumerate(symbols[i+1:]):
3163	if ( isequal(other, cur) ):
3164	del symbols[i+j+1]
3165	break
3166	elif ( masks(cur, other) ):
3167	del symbols[i+j+1]
3168	symbols.insert(i,other)
3169	cur = other
3170	break
3171	else:
3172	i += 1
3173
3174	if not caseless and useRegex:
3175	#~ print (strs,"->", "\|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3176	try:
3177	if len(symbols)==len("".join(symbols)):
3178	return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3179	else:
3180	return Regex( "\|".join( [ re.escape(sym) for sym in symbols] ) )
3181	except:
3182	warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3183	SyntaxWarning, stacklevel=2)
3184
3185
3186	# last resort, just use MatchFirst
3187	return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3188
3189	def dictOf( key, value ):
3190	"""Helper to easily and clearly define a dictionary by specifying the respective patterns
3191	for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3192	in the proper order. The key pattern can include delimiting markers or punctuation,
3193	as long as they are suppressed, thereby leaving the significant key text. The value
3194	pattern can include named results, so that the Dict results can include named token
3195	fields.
3196	"""
3197	return Dict( ZeroOrMore( Group ( key + value ) ) )
3198
3199	# convenience constants for positional expressions
3200	empty = Empty().setName("empty")
3201	lineStart = LineStart().setName("lineStart")
3202	lineEnd = LineEnd().setName("lineEnd")
3203	stringStart = StringStart().setName("stringStart")
3204	stringEnd = StringEnd().setName("stringEnd")
3205
3206	_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3207	_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3208	_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3209	_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3210	_singleChar = _escapedPunc \| _escapedHexChar \| _escapedOctChar \| Word(_printables_less_backslash,exact=1)
3211	_charRange = Group(_singleChar + Suppress("-") + _singleChar)
3212	_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange \| _singleChar ) ).setResultsName("body") + "]"
3213
3214	_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3215
3216	def srange(s):
3217	r"""Helper to easily define string ranges for use in Word construction. Borrows
3218	syntax from regexp '[]' string range definitions::
3219	srange("[0-9]") -> "0123456789"
3220	srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3221	srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3222	The input string must be enclosed in []'s, and the returned string is the expanded
3223	character set joined into a single string.
3224	The values enclosed in the []'s may be::
3225	a single character
3226	an escaped character with a leading backslash (such as \- or \])
3227	an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3228	an escaped octal character with a leading '\0' (\041, which is a '!' character)
3229	a range of any of the above, separated by a dash ('a-z', etc.)
3230	any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3231	"""
3232	try:
3233	return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3234	except:
3235	return ""
3236
3237	def matchOnlyAtCol(n):
3238	"""Helper method for defining parse actions that require matching at a specific
3239	column in the input text.
3240	"""
3241	def verifyCol(strg,locn,toks):
3242	if col(locn,strg) != n:
3243	raise ParseException(strg,locn,"matched token not at column %d" % n)
3244	return verifyCol
3245
3246	def replaceWith(replStr):
3247	"""Helper method for common parse actions that simply return a literal value. Especially
3248	useful when used with transformString().
3249	"""
3250	def _replFunc(*args):
3251	return [replStr]
3252	return _replFunc
3253
3254	def removeQuotes(s,l,t):
3255	"""Helper parse action for removing quotation marks from parsed quoted strings.
3256	To use, add this parse action to quoted string using::
3257	quotedString.setParseAction( removeQuotes )
3258	"""
3259	return t[0][1:-1]
3260
3261	def upcaseTokens(s,l,t):
3262	"""Helper parse action to convert tokens to upper case."""
3263	return [ tt.upper() for tt in map(_ustr,t) ]
3264
3265	def downcaseTokens(s,l,t):
3266	"""Helper parse action to convert tokens to lower case."""
3267	return [ tt.lower() for tt in map(_ustr,t) ]
3268
3269	def keepOriginalText(s,startLoc,t):
3270	"""Helper parse action to preserve original parsed text,
3271	overriding any nested parse actions."""
3272	try:
3273	endloc = getTokensEndLoc()
3274	except ParseException:
3275	raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3276	del t[:]
3277	t += ParseResults(s[startLoc:endloc])
3278	return t
3279
3280	def getTokensEndLoc():
3281	"""Method to be called from within a parse action to determine the end
3282	location of the parsed tokens."""
3283	import inspect
3284	fstack = inspect.stack()
3285	try:
3286	# search up the stack (through intervening argument normalizers) for correct calling routine
3287	for f in fstack[2:]:
3288	if f[3] == "_parseNoCache":
3289	endloc = f[0].f_locals["loc"]
3290	return endloc
3291	else:
3292	raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3293	finally:
3294	del fstack
3295
3296	def _makeTags(tagStr, xml):
3297	"""Internal helper to construct opening and closing tag expressions, given a tag name"""
3298	if isinstance(tagStr,basestring):
3299	resname = tagStr
3300	tagStr = Keyword(tagStr, caseless=not xml)
3301	else:
3302	resname = tagStr.name
3303
3304	tagAttrName = Word(alphas,alphanums+"_-:")
3305	if (xml):
3306	tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3307	openTag = Suppress("<") + tagStr + \
3308	Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3309	Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3310	else:
3311	printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3312	tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) \| Word(printablesLessRAbrack)
3313	openTag = Suppress("<") + tagStr + \
3314	Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3315	Optional( Suppress("=") + tagAttrValue ) ))) + \
3316	Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3317	closeTag = Combine(_L("</") + tagStr + ">")
3318
3319	openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3320	closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3321
3322	return openTag, closeTag
3323
3324	def makeHTMLTags(tagStr):
3325	"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3326	return _makeTags( tagStr, False )
3327
3328	def makeXMLTags(tagStr):
3329	"""Helper to construct opening and closing tag expressions for XML, given a tag name"""
3330	return _makeTags( tagStr, True )
3331
3332	def withAttribute(args,*attrDict):
3333	"""Helper to create a validating parse action to be used with start tags created
3334	with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3335	with a required attribute value, to avoid false matches on common tags such as
3336	<TD> or <DIV>.
3337
3338	Call withAttribute with a series of attribute names and values. Specify the list
3339	of filter attributes names and values as:
3340	- keyword arguments, as in (class="Customer",align="right"), or
3341	- a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3342	For attribute names with a namespace prefix, you must use the second form. Attribute
3343	names are matched insensitive to upper/lower case.
3344
3345	To verify that the attribute exists, but without specifying a value, pass
3346	withAttribute.ANY_VALUE as the value.
3347	"""
3348	if args:
3349	attrs = args[:]
3350	else:
3351	attrs = attrDict.items()
3352	attrs = [(k,v) for k,v in attrs]
3353	def pa(s,l,tokens):
3354	for attrName,attrValue in attrs:
3355	if attrName not in tokens:
3356	raise ParseException(s,l,"no matching attribute " + attrName)
3357	if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3358	raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3359	(attrName, tokens[attrName], attrValue))
3360	return pa
3361	withAttribute.ANY_VALUE = object()
3362
3363	opAssoc = _Constants()
3364	opAssoc.LEFT = object()
3365	opAssoc.RIGHT = object()
3366
3367	def operatorPrecedence( baseExpr, opList ):
3368	"""Helper method for constructing grammars of expressions made up of
3369	operators working in a precedence hierarchy. Operators may be unary or
3370	binary, left- or right-associative. Parse actions can also be attached
3371	to operator expressions.
3372
3373	Parameters:
3374	- baseExpr - expression representing the most basic element for the nested
3375	- opList - list of tuples, one for each operator precedence level in the
3376	expression grammar; each tuple is of the form
3377	(opExpr, numTerms, rightLeftAssoc, parseAction), where:
3378	- opExpr is the pyparsing expression for the operator;
3379	may also be a string, which will be converted to a Literal;
3380	if numTerms is 3, opExpr is a tuple of two expressions, for the
3381	two operators separating the 3 terms
3382	- numTerms is the number of terms for this operator (must
3383	be 1, 2, or 3)
3384	- rightLeftAssoc is the indicator whether the operator is
3385	right or left associative, using the pyparsing-defined
3386	constants opAssoc.RIGHT and opAssoc.LEFT.
3387	- parseAction is the parse action to be associated with
3388	expressions matching this operator expression (the
3389	parse action tuple member may be omitted)
3390	"""
3391	ret = Forward()
3392	lastExpr = baseExpr \| ( Suppress('(') + ret + Suppress(')') )
3393	for i,operDef in enumerate(opList):
3394	opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3395	if arity == 3:
3396	if opExpr is None or len(opExpr) != 2:
3397	raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3398	opExpr1, opExpr2 = opExpr
3399	thisExpr = Forward()#.setName("expr%d" % i)
3400	if rightLeftAssoc == opAssoc.LEFT:
3401	if arity == 1:
3402	matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3403	elif arity == 2:
3404	if opExpr is not None:
3405	matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3406	else:
3407	matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3408	elif arity == 3:
3409	matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3410	Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3411	else:
3412	raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3413	elif rightLeftAssoc == opAssoc.RIGHT:
3414	if arity == 1:
3415	# try to avoid LR with this extra test
3416	if not isinstance(opExpr, Optional):
3417	opExpr = Optional(opExpr)
3418	matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3419	elif arity == 2:
3420	if opExpr is not None:
3421	matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3422	else:
3423	matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3424	elif arity == 3:
3425	matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3426	Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3427	else:
3428	raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3429	else:
3430	raise ValueError("operator must indicate right or left associativity")
3431	if pa:
3432	matchExpr.setParseAction( pa )
3433	thisExpr << ( matchExpr \| lastExpr )
3434	lastExpr = thisExpr
3435	ret << lastExpr
3436	return ret
3437
3438	dblQuotedString = Regex(r'"(?:[^"\n\r\\]\|(?:"")\|(?:\\x[0-9a-fA-F]+)\|(?:\\.))*"').setName("string enclosed in double quotes")
3439	sglQuotedString = Regex(r"'(?:[^'\n\r\\]\|(?:'')\|(?:\\x[0-9a-fA-F]+)\|(?:\\.))*'").setName("string enclosed in single quotes")
3440	quotedString = Regex(r'''(?:"(?:[^"\n\r\\]\|(?:"")\|(?:\\x[0-9a-fA-F]+)\|(?:\\.))")\|(?:'(?:[^'\n\r\\]\|(?:'')\|(?:\\x[0-9a-fA-F]+)\|(?:\\.))')''').setName("quotedString using single or double quotes")
3441	unicodeString = Combine(_L('u') + quotedString.copy())
3442
3443	def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3444	"""Helper method for defining nested lists enclosed in opening and closing
3445	delimiters ("(" and ")" are the default).
3446
3447	Parameters:
3448	- opener - opening character for a nested list (default="("); can also be a pyparsing expression
3449	- closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3450	- content - expression for items within the nested lists (default=None)
3451	- ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3452
3453	If an expression is not provided for the content argument, the nested
3454	expression will capture all whitespace-delimited content between delimiters
3455	as a list of separate values.
3456
3457	Use the ignoreExpr argument to define expressions that may contain
3458	opening or closing characters that should not be treated as opening
3459	or closing characters for nesting, such as quotedString or a comment
3460	expression. Specify multiple expressions using an Or or MatchFirst.
3461	The default is quotedString, but if no expressions are to be ignored,
3462	then pass None for this argument.
3463	"""
3464	if opener == closer:
3465	raise ValueError("opening and closing strings cannot be the same")
3466	if content is None:
3467	if isinstance(opener,basestring) and isinstance(closer,basestring):
3468	if ignoreExpr is not None:
3469	content = (Combine(OneOrMore(~ignoreExpr +
3470	CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3471	).setParseAction(lambda t:t[0].strip()))
3472	else:
3473	content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
3474	else:
3475	raise ValueError("opening and closing arguments must be strings if no content expression is given")
3476	ret = Forward()
3477	if ignoreExpr is not None:
3478	ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr \| ret \| content ) + Suppress(closer) )
3479	else:
3480	ret << Group( Suppress(opener) + ZeroOrMore( ret \| content ) + Suppress(closer) )
3481	return ret
3482
3483	def indentedBlock(blockStatementExpr, indentStack, indent=True):
3484	"""Helper method for defining space-delimited indentation blocks, such as
3485	those used to define block statements in Python source code.
3486
3487	Parameters:
3488	- blockStatementExpr - expression defining syntax of statement that
3489	is repeated within the indented block
3490	- indentStack - list created by caller to manage indentation stack
3491	(multiple statementWithIndentedBlock expressions within a single grammar
3492	should share a common indentStack)
3493	- indent - boolean indicating whether block must be indented beyond the
3494	the current level; set to False for block of left-most statements
3495	(default=True)
3496
3497	A valid block must contain at least one blockStatement.
3498	"""
3499	def checkPeerIndent(s,l,t):
3500	if l >= len(s): return
3501	curCol = col(l,s)
3502	if curCol != indentStack[-1]:
3503	if curCol > indentStack[-1]:
3504	raise ParseFatalException(s,l,"illegal nesting")
3505	raise ParseException(s,l,"not a peer entry")
3506
3507	def checkSubIndent(s,l,t):
3508	curCol = col(l,s)
3509	if curCol > indentStack[-1]:
3510	indentStack.append( curCol )
3511	else:
3512	raise ParseException(s,l,"not a subentry")
3513
3514	def checkUnindent(s,l,t):
3515	if l >= len(s): return
3516	curCol = col(l,s)
3517	if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3518	raise ParseException(s,l,"not an unindent")
3519	indentStack.pop()
3520
3521	NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3522	INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3523	PEER = Empty().setParseAction(checkPeerIndent)
3524	UNDENT = Empty().setParseAction(checkUnindent)
3525	if indent:
3526	smExpr = Group( Optional(NL) +
3527	FollowedBy(blockStatementExpr) +
3528	INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3529	else:
3530	smExpr = Group( Optional(NL) +
3531	(OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3532	blockStatementExpr.ignore("\\" + LineEnd())
3533	return smExpr
3534
3535	alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3536	punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3537
3538	anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3539	commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3540	_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3541	replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3542
3543	# it's easy to get these comment structures wrong - they're very common, so may as well make them available
3544	cStyleComment = Regex(r"/\(?:[^]\+)+?/").setName("C style comment")
3545
3546	htmlComment = Regex(r"<!--[\s\S]*?-->")
3547	restOfLine = Regex(r".*").leaveWhitespace()
3548	dblSlashComment = Regex(r"\/\/(\\\n\|.)*").setName("// comment")
3549	cppStyleComment = Regex(r"/(?:\(?:[^]\+)+?/\|/[^\n](?:\n[^\n])*?(?:(?<!\\)\|\Z))").setName("C++ style comment")
3550
3551	javaStyleComment = cppStyleComment
3552	pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3553	_noncomma = "".join( [ c for c in printables if c != "," ] )
3554	_commasepitem = Combine(OneOrMore(Word(_noncomma) +
3555	Optional( Word(" \t") +
3556	~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3557	commaSeparatedList = delimitedList( Optional( quotedString \| _commasepitem, default="") ).setName("commaSeparatedList")
3558
3559
3560	if __name__ == "__main__":
3561
3562	def test( teststring ):
3563	try:
3564	tokens = simpleSQL.parseString( teststring )
3565	tokenlist = tokens.asList()
3566	print (teststring + "->" + str(tokenlist))
3567	print ("tokens = " + str(tokens))
3568	print ("tokens.columns = " + str(tokens.columns))
3569	print ("tokens.tables = " + str(tokens.tables))
3570	print (tokens.asXML("SQL",True))
3571	except ParseBaseException,err:
3572	print (teststring + "->")
3573	print (err.line)
3574	print (" "*(err.column-1) + "^")
3575	print (err)
3576	print()
3577
3578	selectToken = CaselessLiteral( "select" )
3579	fromToken = CaselessLiteral( "from" )
3580
3581	ident = Word( alphas, alphanums + "_$" )
3582	columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3583	columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3584	tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3585	tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3586	simpleSQL = ( selectToken + \
3587	( '*' \| columnNameList ).setResultsName( "columns" ) + \
3588	fromToken + \
3589	tableNameList.setResultsName( "tables" ) )
3590
3591	test( "SELECT * from XYZZY, ABC" )
3592	test( "select * from SYS.XYZZY" )
3593	test( "Select A from Sys.dual" )
3594	test( "Select AA,BB,CC from Sys.dual" )
3595	test( "Select A, B, C from Sys.dual" )
3596	test( "Select A, B, C from Sys.dual" )
3597	test( "Xelect A, B, C from Sys.dual" )
3598	test( "Select A, B, C frox Sys.dual" )
3599	test( "Select" )
3600	test( "Select ^^^ frox Sys.dual" )
3601	test( "Select A, B, C from Sys.dual, Table2 " )

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx_extras/pyparsing.py @ 3

異なるフォーマットでダウンロード: