Context Navigation

pyparsing.py @ 3

リビジョン 3, 97.0 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	# module pyparsing.py
2	#
3	# Copyright (c) 2003-2006 Paul T. McGuire
4	#
5	# Permission is hereby granted, free of charge, to any person obtaining
6	# a copy of this software and associated documentation files (the
7	# "Software"), to deal in the Software without restriction, including
8	# without limitation the rights to use, copy, modify, merge, publish,
9	# distribute, sublicense, and/or sell copies of the Software, and to
10	# permit persons to whom the Software is furnished to do so, subject to
11	# the following conditions:
12	#
13	# The above copyright notice and this permission notice shall be
14	# included in all copies or substantial portions of the Software.
15	#
16	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19	# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20	# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21	# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22	# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23	#
24	# Todo:
25	# - add pprint() - pretty-print output of defined BNF
26	#
27	#from __future__ import generators
28
29	__doc__ = \
30	"""
31	pyparsing module - Classes and methods to define and execute parsing grammars
32
33	The pyparsing module is an alternative approach to creating and executing simple grammars,
34	vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
35	don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
36	provides a library of classes that you use to construct the grammar directly in Python.
37
38	Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
39
40	from pyparsing import Word, alphas
41
42	# define grammar of a greeting
43	greet = Word( alphas ) + "," + Word( alphas ) + "!"
44
45	hello = "Hello, World!"
46	print hello, "->", greet.parseString( hello )
47
48	The program outputs the following::
49
50	Hello, World! -> ['Hello', ',', 'World', '!']
51
52	The Python representation of the grammar is quite readable, owing to the self-explanatory
53	class names, and the use of '+', '\|' and '^' operators.
54
55	The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
56	object with named attributes.
57
58	The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59	- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
60	- quoted strings
61	- embedded comments
62	"""
63	__version__ = "1.4.1"
64	__versionTime__ = "05 February 2006 12:24"
65	__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
66
67	import string
68	import copy,sys
69	import warnings
70	import re
71	#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
72
73	def _ustr(obj):
74	"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
75	str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
76	then < returns the unicode object \| encodes it with the default encoding \| ... >.
77	"""
78	try:
79	# If this works, then _ustr(obj) has the same behaviour as str(obj), so
80	# it won't break any existing code.
81	return str(obj)
82
83	except UnicodeEncodeError, e:
84	# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
85	# state that "The return value must be a string object". However, does a
86	# unicode object (being a subclass of basestring) count as a "string
87	# object"?
88	# If so, then return a unicode object:
89	return unicode(obj)
90	# Else encode it... but how? There are many choices... :)
91	# Replace unprintables with escape codes?
92	#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
93	# Replace unprintables with question marks?
94	#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
95	# ...
96
97	def _str2dict(strg):
98	return dict( [(c,0) for c in strg] )
99
100	alphas = string.lowercase + string.uppercase
101	nums = string.digits
102	hexnums = nums + "ABCDEFabcdef"
103	alphanums = alphas + nums
104
105	class ParseBaseException(Exception):
106	"""base exception class for all parsing runtime exceptions"""
107	__slots__ = ( "loc","msg","pstr","parserElement" )
108	# Performance tuning: we construct a lot of these, so keep this
109	# constructor as small and fast as possible
110	def __init__( self, pstr, loc, msg, elem=None ):
111	self.loc = loc
112	self.msg = msg
113	self.pstr = pstr
114	self.parserElement = elem
115
116	def __getattr__( self, aname ):
117	"""supported attributes by name are:
118	- lineno - returns the line number of the exception text
119	- col - returns the column number of the exception text
120	- line - returns the line containing the exception text
121	"""
122	if( aname == "lineno" ):
123	return lineno( self.loc, self.pstr )
124	elif( aname in ("col", "column") ):
125	return col( self.loc, self.pstr )
126	elif( aname == "line" ):
127	return line( self.loc, self.pstr )
128	else:
129	raise AttributeError, aname
130
131	def __str__( self ):
132	return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
133	def __repr__( self ):
134	return _ustr(self)
135	def markInputline( self, markerString = ">!<" ):
136	"""Extracts the exception line from the input string, and marks
137	the location of the exception with a special symbol.
138	"""
139	line_str = self.line
140	line_column = self.column - 1
141	if markerString:
142	line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
143	return line_str.strip()
144
145	class ParseException(ParseBaseException):
146	"""exception thrown when parse expressions don't match class"""
147	"""supported attributes by name are:
148	- lineno - returns the line number of the exception text
149	- col - returns the column number of the exception text
150	- line - returns the line containing the exception text
151	"""
152	pass
153
154	class ParseFatalException(ParseBaseException):
155	"""user-throwable exception thrown when inconsistent parse content
156	is found; stops all parsing immediately"""
157	pass
158
159	class RecursiveGrammarException(Exception):
160	"""exception thrown by validate() if the grammar could be improperly recursive"""
161	def __init__( self, parseElementList ):
162	self.parseElementTrace = parseElementList
163
164	def __str__( self ):
165	return "RecursiveGrammarException: %s" % self.parseElementTrace
166
167	class ParseResults(object):
168	"""Structured parse results, to provide multiple means of access to the parsed data:
169	- as a list (len(results))
170	- by list index (results[0], results[1], etc.)
171	- by attribute (results.<resultsName>)
172	"""
173	__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
174	def __new__(cls, toklist, name=None, asList=True, modal=True ):
175	if isinstance(toklist, cls):
176	return toklist
177	retobj = object.__new__(cls)
178	retobj.__doinit = True
179	return retobj
180
181	# Performance tuning: we construct a lot of these, so keep this
182	# constructor as small and fast as possible
183	def __init__( self, toklist, name=None, asList=True, modal=True ):
184	if self.__doinit:
185	self.__doinit = False
186	self.__name = None
187	self.__parent = None
188	self.__modal = modal
189	if isinstance(toklist, list):
190	self.__toklist = toklist[:]
191	else:
192	self.__toklist = [toklist]
193	self.__tokdict = dict()
194
195	if name:
196	if not self.__name:
197	self.__modal = self.__modal and modal
198	if isinstance(name,int):
199	name = _ustr(name) # will always return a str, but use _ustr for consistency
200	self.__name = name
201	if not toklist in (None,'',[]):
202	if isinstance(toklist,basestring):
203	toklist = [ toklist ]
204	if asList:
205	if isinstance(toklist,ParseResults):
206	self[name] = (toklist.copy(),-1)
207	else:
208	self[name] = (ParseResults(toklist[0]),-1)
209	self[name].__name = name
210	else:
211	try:
212	self[name] = toklist[0]
213	except TypeError:
214	self[name] = toklist
215
216	def __getitem__( self, i ):
217	if isinstance( i, (int,slice) ):
218	return self.__toklist[i]
219	else:
220	if self.__modal:
221	return self.__tokdict[i][-1][0]
222	else:
223	return ParseResults([ v[0] for v in self.__tokdict[i] ])
224
225	def __setitem__( self, k, v ):
226	if isinstance(v,tuple):
227	self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
228	sub = v[0]
229	else:
230	self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
231	sub = v
232	if isinstance(sub,ParseResults):
233	sub.__parent = self
234
235	def __delitem__( self, i ):
236	del self.__toklist[i]
237
238	def __contains__( self, k ):
239	return self.__tokdict.has_key(k)
240
241	def __len__( self ): return len( self.__toklist )
242	def __iter__( self ): return iter( self.__toklist )
243	def keys( self ):
244	"""Returns all named result keys."""
245	return self.__tokdict.keys()
246
247	def items( self ):
248	"""Returns all named result keys and values as a list of tuples."""
249	return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
250
251	def values( self ):
252	"""Returns all named result values."""
253	return [ v[-1][0] for v in self.__tokdict.values() ]
254
255	def __getattr__( self, name ):
256	if name not in self.__slots__:
257	if self.__tokdict.has_key( name ):
258	if self.__modal:
259	return self.__tokdict[name][-1][0]
260	else:
261	return ParseResults([ v[0] for v in self.__tokdict[name] ])
262	else:
263	return ""
264	return None
265
266	def __iadd__( self, other ):
267	if other.__tokdict:
268	offset = len(self.__toklist)
269	addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
270	otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
271	for k,v in otherdictitems:
272	self[k] = v
273	if isinstance(v[0],ParseResults):
274	v[0].__parent = self
275	self.__toklist += other.__toklist
276	del other
277	return self
278
279	def __repr__( self ):
280	return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
281
282	def __str__( self ):
283	out = "["
284	sep = ""
285	for i in self.__toklist:
286	if isinstance(i, ParseResults):
287	out += sep + _ustr(i)
288	else:
289	out += sep + repr(i)
290	sep = ", "
291	out += "]"
292	return out
293
294	def _asStringList( self, sep='' ):
295	out = []
296	for item in self.__toklist:
297	if out and sep:
298	out.append(sep)
299	if isinstance( item, ParseResults ):
300	out += item._asStringList()
301	else:
302	out.append( _ustr(item) )
303	return out
304
305	def asList( self ):
306	"""Returns the parse results as a nested list of matching tokens, all converted to strings."""
307	out = []
308	for res in self.__toklist:
309	if isinstance(res,ParseResults):
310	out.append( res.asList() )
311	else:
312	out.append( res )
313	return out
314
315	def asDict( self ):
316	"""Returns the named parse results as dictionary."""
317	return dict( self.items() )
318
319	def copy( self ):
320	"""Returns a new copy of a ParseResults object."""
321	ret = ParseResults( self.__toklist )
322	ret.__tokdict = self.__tokdict.copy()
323	ret.__parent = self.__parent
324	ret.__modal = self.__modal
325	ret.__name = self.__name
326	return ret
327
328	def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
329	"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
330	nl = "\n"
331	out = []
332	namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
333	nextLevelIndent = indent + " "
334
335	# collapse out indents if formatting is not desired
336	if not formatted:
337	indent = ""
338	nextLevelIndent = ""
339	nl = ""
340
341	selfTag = None
342	if doctag is not None:
343	selfTag = doctag
344	else:
345	if self.__name:
346	selfTag = self.__name
347
348	if not selfTag:
349	if namedItemsOnly:
350	return ""
351	else:
352	selfTag = "ITEM"
353
354	out += [ nl, indent, "<", selfTag, ">" ]
355
356	worklist = self.__toklist
357	for i,res in enumerate(worklist):
358	if isinstance(res,ParseResults):
359	if i in namedItems:
360	out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
361	else:
362	out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
363	else:
364	# individual token, see if there is a name for it
365	resTag = None
366	if i in namedItems:
367	resTag = namedItems[i]
368	if not resTag:
369	if namedItemsOnly:
370	continue
371	else:
372	resTag = "ITEM"
373	out += [ nl, nextLevelIndent, "<", resTag, ">", _ustr(res), "</", resTag, ">" ]
374
375	out += [ nl, indent, "</", selfTag, ">" ]
376	return "".join(out)
377
378
379	def __lookup(self,sub):
380	for k,vlist in self.__tokdict.items():
381	for v,loc in vlist:
382	if sub is v:
383	return k
384	return None
385
386	def getName(self):
387	"""Returns the results name for this token expression."""
388	if self.__name:
389	return self.__name
390	elif self.__parent:
391	par = self.__parent
392	if par:
393	return par.__lookup(self)
394	else:
395	return None
396	elif (len(self) == 1 and
397	len(self.__tokdict) == 1 and
398	self.__tokdict.values()[0][0][1] in (0,-1)):
399	return self.__tokdict.keys()[0]
400	else:
401	return None
402
403	def col (loc,strg):
404	"""Returns current column within a string, counting newlines as line separators.
405	The first column is number 1.
406	"""
407	return loc - strg.rfind("\n", 0, loc)
408
409	def lineno(loc,strg):
410	"""Returns current line number within a string, counting newlines as line separators.
411	The first line is number 1.
412	"""
413	return strg.count("\n",0,loc) + 1
414
415	def line( loc, strg ):
416	"""Returns the line of text containing loc within a string, counting newlines as line separators.
417	"""
418	lastCR = strg.rfind("\n", 0, loc)
419	nextCR = strg.find("\n", loc)
420	if nextCR > 0:
421	return strg[lastCR+1:nextCR]
422	else:
423	return strg[lastCR+1:]
424
425	def _defaultStartDebugAction( instring, loc, expr ):
426	print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
427
428	def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
429	print "Matched",expr,"->",toks.asList()
430
431	def _defaultExceptionDebugAction( instring, loc, expr, exc ):
432	print "Exception raised:", exc
433
434	def nullDebugAction(*args):
435	"""'Do-nothing' debug action, to suppress debugging output during parsing."""
436	pass
437
438	class ParserElement(object):
439	"""Abstract base level parser element class."""
440	DEFAULT_WHITE_CHARS = " \n\t\r"
441
442	def setDefaultWhitespaceChars( chars ):
443	"""Overrides the default whitespace chars
444	"""
445	ParserElement.DEFAULT_WHITE_CHARS = chars
446	setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
447
448	def __init__( self, savelist=False ):
449	self.parseAction = None
450	#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
451	self.strRepr = None
452	self.resultsName = None
453	self.saveAsList = savelist
454	self.skipWhitespace = True
455	self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
456	self.mayReturnEmpty = False
457	self.keepTabs = False
458	self.ignoreExprs = []
459	self.debug = False
460	self.streamlined = False
461	self.mayIndexError = True
462	self.errmsg = ""
463	self.modalResults = True
464	self.debugActions = ( None, None, None )
465	self.re = None
466
467	def copy( self ):
468	"""Make a copy of this ParserElement. Useful for defining different parse actions
469	for the same parsing pattern, using copies of the original parse element."""
470	cpy = copy.copy( self )
471	cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
472	return cpy
473
474	def setName( self, name ):
475	"""Define name for this expression, for use in debugging."""
476	self.name = name
477	self.errmsg = "Expected " + self.name
478	return self
479
480	def setResultsName( self, name, listAllMatches=False ):
481	"""Define name for referencing matching tokens as a nested attribute
482	of the returned parse results.
483	NOTE: this returns a copy of the original ParserElement object;
484	this is so that the client can define a basic element, such as an
485	integer, and reference it in multiple places with different names.
486	"""
487	newself = self.copy()
488	newself.resultsName = name
489	newself.modalResults = not listAllMatches
490	return newself
491
492	def setParseAction( self, fn ):
493	"""Define action to perform when successfully matching parse element definition.
494	Parse action fn is a callable method with the arguments (s, loc, toks) where:
495	- s = the original string being parsed
496	- loc = the location of the matching substring
497	- toks = a list of the matched tokens, packaged as a ParseResults object
498	If the function fn modifies the tokens, it can return them as the return
499	value from fn, and the modified list of tokens will replace the original.
500	Otherwise, fn does not need to return any value.
501	"""
502	self.parseAction = fn
503	return self
504
505	def skipIgnorables( self, instring, loc ):
506	exprsFound = True
507	while exprsFound:
508	exprsFound = False
509	for e in self.ignoreExprs:
510	try:
511	while 1:
512	loc,dummy = e.parse( instring, loc )
513	exprsFound = True
514	except ParseException:
515	pass
516	return loc
517
518	def preParse( self, instring, loc ):
519	if self.ignoreExprs:
520	loc = self.skipIgnorables( instring, loc )
521
522	if self.skipWhitespace:
523	wt = self.whiteChars
524	instrlen = len(instring)
525	while loc < instrlen and instring[loc] in wt:
526	loc += 1
527
528	return loc
529
530	def parseImpl( self, instring, loc, doActions=True ):
531	return loc, []
532
533	def postParse( self, instring, loc, tokenlist ):
534	return tokenlist
535
536	#~ @profile
537	def parse( self, instring, loc, doActions=True, callPreParse=True ):
538	debugging = ( self.debug ) #and doActions )
539
540	if debugging:
541	#~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
542	if (self.debugActions[0] ):
543	self.debugActions[0]( instring, loc, self )
544	if callPreParse:
545	loc = self.preParse( instring, loc )
546	tokensStart = loc
547	try:
548	try:
549	loc,tokens = self.parseImpl( instring, loc, doActions )
550	except IndexError:
551	raise ParseException, ( instring, len(instring), self.errmsg, self )
552	except ParseException, err:
553	#~ print "Exception raised:", err
554	if (self.debugActions[2] ):
555	self.debugActions[2]( instring, tokensStart, self, err )
556	raise
557	else:
558	if callPreParse:
559	loc = self.preParse( instring, loc )
560	tokensStart = loc
561	if self.mayIndexError or loc >= len(instring):
562	try:
563	loc,tokens = self.parseImpl( instring, loc, doActions )
564	except IndexError:
565	raise ParseException, ( instring, len(instring), self.errmsg, self )
566	else:
567	loc,tokens = self.parseImpl( instring, loc, doActions )
568
569	tokens = self.postParse( instring, loc, tokens )
570
571	retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
572	if self.parseAction and doActions:
573	if debugging:
574	try:
575	tokens = self.parseAction( instring, tokensStart, retTokens )
576	if tokens is not None:
577	if isinstance(tokens,tuple):
578	tokens = tokens[1]
579	retTokens = ParseResults( tokens,
580	self.resultsName,
581	asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
582	modal=self.modalResults )
583	except ParseException, err:
584	#~ print "Exception raised in user parse action:", err
585	if (self.debugActions[2] ):
586	self.debugActions[2]( instring, tokensStart, self, err )
587	raise
588	else:
589	tokens = self.parseAction( instring, tokensStart, retTokens )
590	if tokens is not None:
591	if isinstance(tokens,tuple):
592	tokens = tokens[1]
593	retTokens = ParseResults( tokens,
594	self.resultsName,
595	asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
596	modal=self.modalResults )
597
598	if debugging:
599	#~ print "Matched",self,"->",retTokens.asList()
600	if (self.debugActions[1] ):
601	self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
602
603	return loc, retTokens
604
605	def tryParse( self, instring, loc ):
606	return self.parse( instring, loc, doActions=False )[0]
607
608	def parseString( self, instring ):
609	"""Execute the parse expression with the given string.
610	This is the main interface to the client code, once the complete
611	expression has been built.
612	"""
613	if not self.streamlined:
614	self.streamline()
615	self.saveAsList = True
616	for e in self.ignoreExprs:
617	e.streamline()
618	if self.keepTabs:
619	loc, tokens = self.parse( instring, 0 )
620	else:
621	loc, tokens = self.parse( instring.expandtabs(), 0 )
622	return tokens
623
624	def scanString( self, instring ):
625	"""Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location."""
626	if not self.streamlined:
627	self.streamline()
628	for e in self.ignoreExprs:
629	e.streamline()
630
631	if not self.keepTabs:
632	instring = instring.expandtabs()
633	instrlen = len(instring)
634	loc = 0
635	preparseFn = self.preParse
636	parseFn = self.parse
637	while loc < instrlen:
638	try:
639	loc = preparseFn( instring, loc )
640	nextLoc,tokens = parseFn( instring, loc, callPreParse=False )
641	except ParseException:
642	loc += 1
643	else:
644	yield tokens, loc, nextLoc
645	loc = nextLoc
646
647	def transformString( self, instring ):
648	"""Extension to scanString, to modify matching text with modified tokens that may
649	be returned from a parse action. To use transformString, define a grammar and
650	attach a parse action to it that modifies the returned token list.
651	Invoking transformString() on a target string will then scan for matches,
652	and replace the matched text patterns according to the logic in the parse
653	action. transformString() returns the resulting transformed string."""
654	out = []
655	lastE = 0
656	# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
657	# keep string locs straight between transformString and scanString
658	self.keepTabs = True
659	for t,s,e in self.scanString( instring ):
660	out.append( instring[lastE:s] )
661	if t:
662	if isinstance(t,ParseResults):
663	out += t.asList()
664	elif isinstance(t,list):
665	out += t
666	else:
667	out.append(t)
668	lastE = e
669	out.append(instring[lastE:])
670	return "".join(out)
671
672	def searchString( self, instring ):
673	"""Another extension to scanString, simplifying the access to the tokens found
674	to match the given parse expression.
675	"""
676	return [ t[0] for t,s,e in self.scanString( instring ) ]
677
678	def __add__(self, other ):
679	"""Implementation of + operator - returns And"""
680	if isinstance( other, basestring ):
681	other = Literal( other )
682	if not isinstance( other, ParserElement ):
683	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
684	SyntaxWarning, stacklevel=2)
685	return And( [ self, other ] )
686
687	def __radd__(self, other ):
688	"""Implementation of += operator"""
689	if isinstance( other, basestring ):
690	other = Literal( other )
691	if not isinstance( other, ParserElement ):
692	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
693	SyntaxWarning, stacklevel=2)
694	return other + self
695
696	def __or__(self, other ):
697	"""Implementation of \| operator - returns MatchFirst"""
698	if isinstance( other, basestring ):
699	other = Literal( other )
700	if not isinstance( other, ParserElement ):
701	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
702	SyntaxWarning, stacklevel=2)
703	return MatchFirst( [ self, other ] )
704
705	def __ror__(self, other ):
706	"""Implementation of \|= operator"""
707	if isinstance( other, basestring ):
708	other = Literal( other )
709	if not isinstance( other, ParserElement ):
710	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
711	SyntaxWarning, stacklevel=2)
712	return other \| self
713
714	def __xor__(self, other ):
715	"""Implementation of ^ operator - returns Or"""
716	if isinstance( other, basestring ):
717	other = Literal( other )
718	if not isinstance( other, ParserElement ):
719	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
720	SyntaxWarning, stacklevel=2)
721	return Or( [ self, other ] )
722
723	def __rxor__(self, other ):
724	"""Implementation of ^= operator"""
725	if isinstance( other, basestring ):
726	other = Literal( other )
727	if not isinstance( other, ParserElement ):
728	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
729	SyntaxWarning, stacklevel=2)
730	return other ^ self
731
732	def __and__(self, other ):
733	"""Implementation of & operator - returns Each"""
734	if isinstance( other, basestring ):
735	other = Literal( other )
736	if not isinstance( other, ParserElement ):
737	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
738	SyntaxWarning, stacklevel=2)
739	return Each( [ self, other ] )
740
741	def __rand__(self, other ):
742	"""Implementation of right-& operator"""
743	if isinstance( other, basestring ):
744	other = Literal( other )
745	if not isinstance( other, ParserElement ):
746	warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
747	SyntaxWarning, stacklevel=2)
748	return other & self
749
750	def __invert__( self ):
751	"""Implementation of ~ operator - returns NotAny"""
752	return NotAny( self )
753
754	def suppress( self ):
755	"""Suppresses the output of this ParserElement; useful to keep punctuation from
756	cluttering up returned output.
757	"""
758	return Suppress( self )
759
760	def leaveWhitespace( self ):
761	"""Disables the skipping of whitespace before matching the characters in the
762	ParserElement's defined pattern. This is normally only used internally by
763	the pyparsing module, but may be needed in some whitespace-sensitive grammars.
764	"""
765	self.skipWhitespace = False
766	return self
767
768	def setWhitespaceChars( self, chars ):
769	"""Overrides the default whitespace chars
770	"""
771	self.skipWhitespace = True
772	self.whiteChars = chars
773
774	def parseWithTabs( self ):
775	"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
776	Must be called before parseString when the input grammar contains elements that
777	match <TAB> characters."""
778	self.keepTabs = True
779	return self
780
781	def ignore( self, other ):
782	"""Define expression to be ignored (e.g., comments) while doing pattern
783	matching; may be called repeatedly, to define multiple comment or other
784	ignorable patterns.
785	"""
786	if isinstance( other, Suppress ):
787	if other not in self.ignoreExprs:
788	self.ignoreExprs.append( other )
789	else:
790	self.ignoreExprs.append( Suppress( other ) )
791	return self
792
793	def setDebugActions( self, startAction, successAction, exceptionAction ):
794	"""Enable display of debugging messages while doing pattern matching."""
795	self.debugActions = (startAction or _defaultStartDebugAction,
796	successAction or _defaultSuccessDebugAction,
797	exceptionAction or _defaultExceptionDebugAction)
798	self.debug = True
799	return self
800
801	def setDebug( self, flag=True ):
802	"""Enable display of debugging messages while doing pattern matching."""
803	if flag:
804	self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
805	else:
806	self.debug = False
807	return self
808
809	def __str__( self ):
810	return self.name
811
812	def __repr__( self ):
813	return _ustr(self)
814
815	def streamline( self ):
816	self.streamlined = True
817	self.strRepr = None
818	return self
819
820	def checkRecursion( self, parseElementList ):
821	pass
822
823	def validate( self, validateTrace=[] ):
824	"""Check defined expressions for valid structure, check for infinite recursive definitions."""
825	self.checkRecursion( [] )
826
827	def parseFile( self, file_or_filename ):
828	"""Execute the parse expression on the given file or filename.
829	If a filename is specified (instead of a file object),
830	the entire file is opened, read, and closed before parsing.
831	"""
832	try:
833	file_contents = file_or_filename.read()
834	except AttributeError:
835	f = open(file_or_filename, "rb")
836	file_contents = f.read()
837	f.close()
838	return self.parseString(file_contents)
839
840
841	class Token(ParserElement):
842	"""Abstract ParserElement subclass, for defining atomic matching patterns."""
843	def __init__( self ):
844	super(Token,self).__init__( savelist=False )
845	self.myException = ParseException("",0,"",self)
846
847	def setName(self, name):
848	s = super(Token,self).setName(name)
849	self.errmsg = "Expected " + self.name
850	s.myException.msg = self.errmsg
851	return s
852
853
854	class Empty(Token):
855	"""An empty token, will always match."""
856	def __init__( self ):
857	super(Empty,self).__init__()
858	self.name = "Empty"
859	self.mayReturnEmpty = True
860	self.mayIndexError = False
861
862
863	class NoMatch(Token):
864	"""A token that will never match."""
865	def __init__( self ):
866	super(NoMatch,self).__init__()
867	self.name = "NoMatch"
868	self.mayReturnEmpty = True
869	self.mayIndexError = False
870	self.errmsg = "Unmatchable token"
871	self.myException.msg = self.errmsg
872
873	def parseImpl( self, instring, loc, doActions=True ):
874	exc = self.myException
875	exc.loc = loc
876	exc.pstr = instring
877	raise exc
878
879
880	class Literal(Token):
881	"""Token to exactly match a specified string."""
882	def __init__( self, matchString ):
883	super(Literal,self).__init__()
884	self.match = matchString
885	self.matchLen = len(matchString)
886	try:
887	self.firstMatchChar = matchString[0]
888	except IndexError:
889	warnings.warn("null string passed to Literal; use Empty() instead",
890	SyntaxWarning, stacklevel=2)
891	self.__class__ = Empty
892	self.name = '"%s"' % self.match
893	self.errmsg = "Expected " + self.name
894	self.mayReturnEmpty = False
895	self.myException.msg = self.errmsg
896	self.mayIndexError = False
897
898	# Performance tuning: this routine gets called a lot
899	# if this is a single character match string and the first character matches,
900	# short-circuit as quickly as possible, and avoid calling startswith
901	#~ @profile
902	def parseImpl( self, instring, loc, doActions=True ):
903	if (instring[loc] == self.firstMatchChar and
904	(self.matchLen==1 or instring.startswith(self.match,loc)) ):
905	return loc+self.matchLen, self.match
906	#~ raise ParseException, ( instring, loc, self.errmsg )
907	exc = self.myException
908	exc.loc = loc
909	exc.pstr = instring
910	raise exc
911
912	class Keyword(Token):
913	"""Token to exactly match a specified string as a keyword, that is, it must be
914	immediately followed by a non-keyword character. Compare with Literal::
915	Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
916	Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
917	Accepts two optional constructor arguments in addition to the keyword string:
918	identChars is a string of characters that would be valid identifier characters,
919	defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
920	matching, default is False.
921	"""
922	DEFAULT_KEYWORD_CHARS = alphanums+"_$"
923
924	def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
925	super(Keyword,self).__init__()
926	self.match = matchString
927	self.matchLen = len(matchString)
928	try:
929	self.firstMatchChar = matchString[0]
930	except IndexError:
931	warnings.warn("null string passed to Keyword; use Empty() instead",
932	SyntaxWarning, stacklevel=2)
933	self.name = '"%s"' % self.match
934	self.errmsg = "Expected " + self.name
935	self.mayReturnEmpty = False
936	self.myException.msg = self.errmsg
937	self.mayIndexError = False
938	self.caseless = caseless
939	if caseless:
940	self.caselessmatch = matchString.upper()
941	identChars = identChars.upper()
942	self.identChars = _str2dict(identChars)
943
944	def parseImpl( self, instring, loc, doActions=True ):
945	if self.caseless:
946	if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
947	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
948	(loc == 0 or instring[loc-1].upper() not in self.identChars) ):
949	return loc+self.matchLen, self.match
950	else:
951	if (instring[loc] == self.firstMatchChar and
952	(self.matchLen==1 or instring.startswith(self.match,loc)) and
953	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
954	(loc == 0 or instring[loc-1] not in self.identChars) ):
955	return loc+self.matchLen, self.match
956	#~ raise ParseException, ( instring, loc, self.errmsg )
957	exc = self.myException
958	exc.loc = loc
959	exc.pstr = instring
960	raise exc
961
962	def copy(self):
963	c = super(Keyword,self).copy()
964	c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
965	return c
966
967	def setDefaultKeywordChars( chars ):
968	"""Overrides the default Keyword chars
969	"""
970	Keyword.DEFAULT_KEYWORD_CHARS = chars
971	setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
972
973
974	class CaselessLiteral(Literal):
975	"""Token to match a specified string, ignoring case of letters.
976	Note: the matched results will always be in the case of the given
977	match string, NOT the case of the input text.
978	"""
979	def __init__( self, matchString ):
980	super(CaselessLiteral,self).__init__( matchString.upper() )
981	# Preserve the defining literal.
982	self.returnString = matchString
983	self.name = "'%s'" % self.returnString
984	self.errmsg = "Expected " + self.name
985	self.myException.msg = self.errmsg
986
987	def parseImpl( self, instring, loc, doActions=True ):
988	if instring[ loc:loc+self.matchLen ].upper() == self.match:
989	return loc+self.matchLen, self.returnString
990	#~ raise ParseException, ( instring, loc, self.errmsg )
991	exc = self.myException
992	exc.loc = loc
993	exc.pstr = instring
994	raise exc
995
996	class CaselessKeyword(Keyword):
997	def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
998	super(CaselessKeyword,self).__init__( matchString, identCars, caseless=True )
999
1000	def parseImpl( self, instring, loc, doActions=True ):
1001	if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1002	(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1003	return loc+self.matchLen, self.match
1004	#~ raise ParseException, ( instring, loc, self.errmsg )
1005	exc = self.myException
1006	exc.loc = loc
1007	exc.pstr = instring
1008	raise exc
1009
1010	class Word(Token):
1011	"""Token for matching words composed of allowed character sets.
1012	Defined with string containing all allowed initial characters,
1013	an optional string containing allowed body characters (if omitted,
1014	defaults to the initial character set), and an optional minimum,
1015	maximum, and/or exact length.
1016	"""
1017	def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
1018	super(Word,self).__init__()
1019	self.initCharsOrig = initChars
1020	self.initChars = _str2dict(initChars)
1021	if bodyChars :
1022	self.bodyCharsOrig = bodyChars
1023	self.bodyChars = _str2dict(bodyChars)
1024	else:
1025	self.bodyCharsOrig = initChars
1026	self.bodyChars = _str2dict(initChars)
1027
1028	self.maxSpecified = max > 0
1029
1030	self.minLen = min
1031
1032	if max > 0:
1033	self.maxLen = max
1034	else:
1035	self.maxLen = sys.maxint
1036
1037	if exact > 0:
1038	self.maxLen = exact
1039	self.minLen = exact
1040
1041	self.name = _ustr(self)
1042	self.errmsg = "Expected " + self.name
1043	self.myException.msg = self.errmsg
1044	self.mayIndexError = False
1045
1046	if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1047	if self.bodyCharsOrig == self.initCharsOrig:
1048	self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1049	elif len(self.bodyCharsOrig) == 1:
1050	self.reString = "%s[%s]*" % \
1051	(re.escape(self.initCharsOrig),
1052	_escapeRegexRangeChars(self.bodyCharsOrig),)
1053	else:
1054	self.reString = "[%s][%s]*" % \
1055	(_escapeRegexRangeChars(self.initCharsOrig),
1056	_escapeRegexRangeChars(self.bodyCharsOrig),)
1057	try:
1058	self.re = re.compile( self.reString )
1059	except:
1060	self.re = None
1061
1062	def parseImpl( self, instring, loc, doActions=True ):
1063	if self.re:
1064	result = self.re.match(instring,loc)
1065	if not result:
1066	exc = self.myException
1067	exc.loc = loc
1068	exc.pstr = instring
1069	raise exc
1070
1071	loc = result.end()
1072	return loc,result.group()
1073
1074	if not(instring[ loc ] in self.initChars):
1075	#~ raise ParseException, ( instring, loc, self.errmsg )
1076	exc = self.myException
1077	exc.loc = loc
1078	exc.pstr = instring
1079	raise exc
1080	start = loc
1081	loc += 1
1082	instrlen = len(instring)
1083	bodychars = self.bodyChars
1084	maxloc = start + self.maxLen
1085	maxloc = min( maxloc, instrlen )
1086	while loc < maxloc and instring[loc] in bodychars:
1087	loc += 1
1088
1089	throwException = False
1090	if loc - start < self.minLen:
1091	throwException = True
1092	if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1093	throwException = True
1094
1095	if throwException:
1096	#~ raise ParseException, ( instring, loc, self.errmsg )
1097	exc = self.myException
1098	exc.loc = loc
1099	exc.pstr = instring
1100	raise exc
1101
1102	return loc, instring[start:loc]
1103
1104	def __str__( self ):
1105	try:
1106	return super(Word,self).__str__()
1107	except:
1108	pass
1109
1110
1111	if self.strRepr is None:
1112
1113	def charsAsStr(s):
1114	if len(s)>4:
1115	return s[:4]+"..."
1116	else:
1117	return s
1118
1119	if ( self.initCharsOrig != self.bodyCharsOrig ):
1120	self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1121	else:
1122	self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1123
1124	return self.strRepr
1125
1126
1127	class Regex(Token):
1128	"""Token for matching strings that match a given regular expression.
1129	Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1130	"""
1131	def __init__( self, pattern, flags=0):
1132	"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1133	super(Regex,self).__init__()
1134
1135	if len(pattern) == 0:
1136	warnings.warn("null string passed to Regex; use Empty() instead",
1137	SyntaxWarning, stacklevel=2)
1138
1139	self.pattern = pattern
1140	self.flags = flags
1141
1142	try:
1143	self.re = re.compile(self.pattern, self.flags)
1144	self.reString = self.pattern
1145	except Exception,e:
1146	warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1147	SyntaxWarning, stacklevel=2)
1148	raise
1149
1150	self.name = _ustr(self)
1151	self.errmsg = "Expected " + self.name
1152	self.myException.msg = self.errmsg
1153	self.mayIndexError = False
1154	self.mayReturnEmpty = True
1155
1156	def parseImpl( self, instring, loc, doActions=True ):
1157	result = self.re.match(instring,loc)
1158	if not result:
1159	exc = self.myException
1160	exc.loc = loc
1161	exc.pstr = instring
1162	raise exc
1163
1164	loc = result.end()
1165	d = result.groupdict()
1166	ret = ParseResults(result.group())
1167	if d:
1168	for k in d.keys():
1169	ret[k] = d[k]
1170	return loc,ret
1171
1172	def __str__( self ):
1173	try:
1174	return super(Regex,self).__str__()
1175	except:
1176	pass
1177
1178	if self.strRepr is None:
1179	self.strRepr = "Re:(%s)" % repr(self.pattern)
1180
1181	return self.strRepr
1182
1183
1184	class QuotedString(Token):
1185	"""Token for matching strings that are delimited by quoting characters.
1186	"""
1187	def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True):
1188	"""
1189	Defined with the following parameters:
1190	- quoteCharacter - string of one or more characters defining the quote delimiting string
1191	- escapeCharacter - character to escape quotes, typically backslash (default=None)
1192	- escapedQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape and embedded ") (default=None)
1193	- multiline - boolean indicating whether quotes can span multiple lines (default=False)
1194	- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1195	"""
1196	super(QuotedString,self).__init__()
1197
1198	# remove white space from quote char - wont work anyway
1199	quoteChar = quoteChar.strip()
1200	if len(quoteChar) == 0:
1201	warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1202	raise SyntaxError()
1203
1204	self.quoteChar = quoteChar
1205	self.quoteCharLen = len(quoteChar)
1206	self.firstQuoteChar = quoteChar[0]
1207	self.escChar = escChar
1208	self.escQuote = escQuote
1209	self.unquoteResults = unquoteResults
1210
1211	if multiline:
1212	self.flags = re.MULTILINE \| re.DOTALL
1213	self.pattern = r'%s([^%s%s]' % \
1214	( re.escape(self.quoteChar),
1215	_escapeRegexRangeChars(self.quoteChar[0]),
1216	(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1217	else:
1218	self.flags = 0
1219	self.pattern = r'%s([^%s\n\r%s]' % \
1220	( re.escape(self.quoteChar),
1221	_escapeRegexRangeChars(self.quoteChar[0]),
1222	(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1223	if len(self.quoteChar) > 1:
1224	self.pattern += (
1225	'\|(' + ')\|('.join(["%s[^%s]" % (re.escape(self.quoteChar[:i]),
1226	_escapeRegexRangeChars(self.quoteChar[i]))
1227	for i in range(len(self.quoteChar)-1,0,-1)]) + ')'
1228	)
1229	if escQuote:
1230	self.pattern += (r'\|(%s)' % re.escape(escQuote))
1231	if escChar:
1232	self.pattern += (r'\|(%s.)' % re.escape(escChar))
1233	self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1234	self.pattern += (r')*%s' % re.escape(self.quoteChar))
1235
1236	try:
1237	self.re = re.compile(self.pattern, self.flags)
1238	self.reString = self.pattern
1239	except Exception,e:
1240	warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1241	SyntaxWarning, stacklevel=2)
1242	raise
1243
1244	self.name = _ustr(self)
1245	self.errmsg = "Expected " + self.name
1246	self.myException.msg = self.errmsg
1247	self.mayIndexError = False
1248	self.mayReturnEmpty = True
1249
1250	def parseImpl( self, instring, loc, doActions=True ):
1251	result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1252	if not result:
1253	exc = self.myException
1254	exc.loc = loc
1255	exc.pstr = instring
1256	raise exc
1257
1258	loc = result.end()
1259	ret = result.group()
1260
1261	if self.unquoteResults:
1262
1263	# strip off quotes
1264	ret = ret[self.quoteCharLen:-self.quoteCharLen]
1265
1266	if isinstance(ret,basestring):
1267	# replace escaped characters
1268	if self.escChar:
1269	ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1270
1271	# replace escaped quotes
1272	if self.escQuote:
1273	ret = ret.replace(self.escQuote, self.quoteChar)
1274
1275	return loc, ret
1276
1277	def __str__( self ):
1278	try:
1279	return super(QuotedString,self).__str__()
1280	except:
1281	pass
1282
1283	if self.strRepr is None:
1284	self.strRepr = "quoted string, delimited by %s characters" % self.quoteChar
1285
1286	return self.strRepr
1287
1288
1289	class CharsNotIn(Token):
1290	"""Token for matching words composed of characters not in a given set.
1291	Defined with string containing all disallowed characters, and an optional
1292	minimum, maximum, and/or exact length.
1293	"""
1294	def __init__( self, notChars, min=1, max=0, exact=0 ):
1295	super(CharsNotIn,self).__init__()
1296	self.skipWhitespace = False
1297	self.notChars = notChars
1298
1299	self.minLen = min
1300
1301	if max > 0:
1302	self.maxLen = max
1303	else:
1304	self.maxLen = sys.maxint
1305
1306	if exact > 0:
1307	self.maxLen = exact
1308	self.minLen = exact
1309
1310	self.name = _ustr(self)
1311	self.errmsg = "Expected " + self.name
1312	self.mayReturnEmpty = ( self.minLen == 0 )
1313	self.myException.msg = self.errmsg
1314	self.mayIndexError = False
1315
1316	def parseImpl( self, instring, loc, doActions=True ):
1317	if instring[loc] in self.notChars:
1318	#~ raise ParseException, ( instring, loc, self.errmsg )
1319	exc = self.myException
1320	exc.loc = loc
1321	exc.pstr = instring
1322	raise exc
1323
1324	start = loc
1325	loc += 1
1326	notchars = self.notChars
1327	maxlen = min( start+self.maxLen, len(instring) )
1328	while loc < maxlen and \
1329	(instring[loc] not in notchars):
1330	loc += 1
1331
1332	if loc - start < self.minLen:
1333	#~ raise ParseException, ( instring, loc, self.errmsg )
1334	exc = self.myException
1335	exc.loc = loc
1336	exc.pstr = instring
1337	raise exc
1338
1339	return loc, instring[start:loc]
1340
1341	def __str__( self ):
1342	try:
1343	return super(CharsNotIn, self).__str__()
1344	except:
1345	pass
1346
1347	if self.strRepr is None:
1348	if len(self.notChars) > 4:
1349	self.strRepr = "!W:(%s...)" % self.notChars[:4]
1350	else:
1351	self.strRepr = "!W:(%s)" % self.notChars
1352
1353	return self.strRepr
1354
1355	class White(Token):
1356	"""Special matching class for matching whitespace. Normally, whitespace is ignored
1357	by pyparsing grammars. This class is included when some whitespace structures
1358	are significant. Define with a string containing the whitespace characters to be
1359	matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1360	as defined for the Word class."""
1361	whiteStrs = {
1362	" " : "<SPC>",
1363	"\t": "<TAB>",
1364	"\n": "<LF>",
1365	"\r": "<CR>",
1366	"\f": "<FF>",
1367	}
1368	def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1369	super(White,self).__init__()
1370	self.matchWhite = ws
1371	self.whiteChars = "".join([c for c in self.whiteChars if c not in self.matchWhite])
1372	#~ self.leaveWhitespace()
1373	self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1374	self.mayReturnEmpty = True
1375	self.errmsg = "Expected " + self.name
1376	self.myException.msg = self.errmsg
1377
1378	self.minLen = min
1379
1380	if max > 0:
1381	self.maxLen = max
1382	else:
1383	self.maxLen = sys.maxint
1384
1385	if exact > 0:
1386	self.maxLen = exact
1387	self.minLen = exact
1388
1389	def parseImpl( self, instring, loc, doActions=True ):
1390	if not(instring[ loc ] in self.matchWhite):
1391	#~ raise ParseException, ( instring, loc, self.errmsg )
1392	exc = self.myException
1393	exc.loc = loc
1394	exc.pstr = instring
1395	raise exc
1396	start = loc
1397	loc += 1
1398	maxloc = start + self.maxLen
1399	maxloc = min( maxloc, len(instring) )
1400	while loc < maxloc and instring[loc] in self.matchWhite:
1401	loc += 1
1402
1403	if loc - start < self.minLen:
1404	#~ raise ParseException, ( instring, loc, self.errmsg )
1405	exc = self.myException
1406	exc.loc = loc
1407	exc.pstr = instring
1408	raise exc
1409
1410	return loc, instring[start:loc]
1411
1412
1413	class PositionToken(Token):
1414	def __init__( self ):
1415	super(PositionToken,self).__init__()
1416	self.name=self.__class__.__name__
1417	self.mayReturnEmpty = True
1418
1419	class GoToColumn(PositionToken):
1420	"""Token to advance to a specific column of input text; useful for tabular report scraping."""
1421	def __init__( self, colno ):
1422	super(GoToColumn,self).__init__()
1423	self.col = colno
1424
1425	def preParse( self, instring, loc ):
1426	if col(loc,instring) != self.col:
1427	instrlen = len(instring)
1428	if self.ignoreExprs:
1429	loc = self.skipIgnorables( instring, loc )
1430	while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1431	loc += 1
1432	return loc
1433
1434	def parseImpl( self, instring, loc, doActions=True ):
1435	thiscol = col( loc, instring )
1436	if thiscol > self.col:
1437	raise ParseException, ( instring, loc, "Text not in expected column", self )
1438	newloc = loc + self.col - thiscol
1439	ret = instring[ loc: newloc ]
1440	return newloc, ret
1441
1442	class LineStart(PositionToken):
1443	"""Matches if current position is at the beginning of a line within the parse string"""
1444	def __init__( self ):
1445	super(LineStart,self).__init__()
1446	self.whiteChars = " \t"
1447	self.errmsg = "Expected start of line"
1448	self.myException.msg = self.errmsg
1449
1450	def preParse( self, instring, loc ):
1451	loc = super(LineStart,self).preParse(instring,loc)
1452	if instring[loc] == "\n":
1453	loc += 1
1454	return loc
1455
1456	def parseImpl( self, instring, loc, doActions=True ):
1457	if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1:
1458	#~ raise ParseException, ( instring, loc, "Expected start of line" )
1459	exc = self.myException
1460	exc.loc = loc
1461	exc.pstr = instring
1462	raise exc
1463	return loc, []
1464
1465	class LineEnd(PositionToken):
1466	"""Matches if current position is at the end of a line within the parse string"""
1467	def __init__( self ):
1468	super(LineEnd,self).__init__()
1469	self.whiteChars = " \t"
1470	self.errmsg = "Expected end of line"
1471	self.myException.msg = self.errmsg
1472
1473	def parseImpl( self, instring, loc, doActions=True ):
1474	if loc<len(instring):
1475	if instring[loc] == "\n":
1476	return loc+1, "\n"
1477	else:
1478	#~ raise ParseException, ( instring, loc, "Expected end of line" )
1479	exc = self.myException
1480	exc.loc = loc
1481	exc.pstr = instring
1482	raise exc
1483	else:
1484	return loc, []
1485
1486	class StringStart(PositionToken):
1487	"""Matches if current position is at the beginning of the parse string"""
1488	def __init__( self ):
1489	super(StringStart,self).__init__()
1490	self.errmsg = "Expected start of text"
1491	self.myException.msg = self.errmsg
1492
1493	def parseImpl( self, instring, loc, doActions=True ):
1494	if loc != 0:
1495	# see if entire string up to here is just whitespace and ignoreables
1496	if loc != self.preParse( instring, 0 ):
1497	#~ raise ParseException, ( instring, loc, "Expected start of text" )
1498	exc = self.myException
1499	exc.loc = loc
1500	exc.pstr = instring
1501	raise exc
1502	return loc, []
1503
1504	class StringEnd(PositionToken):
1505	"""Matches if current position is at the end of the parse string"""
1506	def __init__( self ):
1507	super(StringEnd,self).__init__()
1508	self.errmsg = "Expected end of text"
1509	self.myException.msg = self.errmsg
1510
1511	def parseImpl( self, instring, loc, doActions=True ):
1512	if loc < len(instring):
1513	#~ raise ParseException, ( instring, loc, "Expected end of text" )
1514	exc = self.myException
1515	exc.loc = loc
1516	exc.pstr = instring
1517	raise exc
1518	return loc, []
1519
1520
1521	class ParseExpression(ParserElement):
1522	"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1523	def __init__( self, exprs, savelist = False ):
1524	super(ParseExpression,self).__init__(savelist)
1525	if isinstance( exprs, list ):
1526	self.exprs = exprs
1527	elif isinstance( exprs, basestring ):
1528	self.exprs = [ Literal( exprs ) ]
1529	else:
1530	self.exprs = [ exprs ]
1531
1532	def __getitem__( self, i ):
1533	return self.exprs[i]
1534
1535	def append( self, other ):
1536	self.exprs.append( other )
1537	self.strRepr = None
1538	return self
1539
1540	def leaveWhitespace( self ):
1541	"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1542	all contained expressions."""
1543	self.skipWhitespace = False
1544	self.exprs = [ copy.copy(e) for e in self.exprs ]
1545	for e in self.exprs:
1546	e.leaveWhitespace()
1547	return self
1548
1549	def ignore( self, other ):
1550	if isinstance( other, Suppress ):
1551	if other not in self.ignoreExprs:
1552	super( ParseExpression, self).ignore( other )
1553	for e in self.exprs:
1554	e.ignore( self.ignoreExprs[-1] )
1555	else:
1556	super( ParseExpression, self).ignore( other )
1557	for e in self.exprs:
1558	e.ignore( self.ignoreExprs[-1] )
1559	return self
1560
1561	def __str__( self ):
1562	try:
1563	return super(ParseExpression,self).__str__()
1564	except:
1565	pass
1566
1567	if self.strRepr is None:
1568	self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1569	return self.strRepr
1570
1571	def streamline( self ):
1572	super(ParseExpression,self).streamline()
1573
1574	for e in self.exprs:
1575	e.streamline()
1576
1577	# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1578	# but only if there are no parse actions or resultsNames on the nested And's
1579	# (likewise for Or's and MatchFirst's)
1580	if ( len(self.exprs) == 2 ):
1581	other = self.exprs[0]
1582	if ( isinstance( other, self.__class__ ) and
1583	other.parseAction is None and
1584	other.resultsName is None and
1585	not other.debug ):
1586	self.exprs = other.exprs[:] + [ self.exprs[1] ]
1587	self.strRepr = None
1588
1589	other = self.exprs[-1]
1590	if ( isinstance( other, self.__class__ ) and
1591	other.parseAction is None and
1592	other.resultsName is None and
1593	not other.debug ):
1594	self.exprs = self.exprs[:-1] + other.exprs[:]
1595	self.strRepr = None
1596
1597	return self
1598
1599	def setResultsName( self, name, listAllMatches=False ):
1600	ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1601	#~ ret.saveAsList = True
1602	return ret
1603
1604	def validate( self, validateTrace=[] ):
1605	tmp = validateTrace[:]+[self]
1606	for e in self.exprs:
1607	e.validate(tmp)
1608	self.checkRecursion( [] )
1609
1610
1611	class And(ParseExpression):
1612	"""Requires all given ParseExpressions to be found in the given order.
1613	Expressions may be separated by whitespace.
1614	May be constructed using the '+' operator.
1615	"""
1616	def __init__( self, exprs, savelist = True ):
1617	super(And,self).__init__(exprs, savelist)
1618	self.mayReturnEmpty = True
1619	for e in exprs:
1620	if not e.mayReturnEmpty:
1621	self.mayReturnEmpty = False
1622	break
1623	self.skipWhitespace = exprs[0].skipWhitespace
1624	self.whiteChars = exprs[0].whiteChars
1625
1626	def parseImpl( self, instring, loc, doActions=True ):
1627	loc, resultlist = self.exprs[0].parse( instring, loc, doActions )
1628	for e in self.exprs[1:]:
1629	loc, exprtokens = e.parse( instring, loc, doActions )
1630	if exprtokens or exprtokens.keys():
1631	resultlist += exprtokens
1632	return loc, resultlist
1633
1634	def __iadd__(self, other ):
1635	if isinstance( other, basestring ):
1636	other = Literal( other )
1637	return self.append( other ) #And( [ self, other ] )
1638
1639	def checkRecursion( self, parseElementList ):
1640	subRecCheckList = parseElementList[:] + [ self ]
1641	for e in self.exprs:
1642	e.checkRecursion( subRecCheckList )
1643	if not e.mayReturnEmpty:
1644	break
1645
1646	def __str__( self ):
1647	if hasattr(self,"name"):
1648	return self.name
1649
1650	if self.strRepr is None:
1651	self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1652
1653	return self.strRepr
1654
1655
1656	class Or(ParseExpression):
1657	"""Requires that at least one ParseExpression is found.
1658	If two expressions match, the expression that matches the longest string will be used.
1659	May be constructed using the '^' operator.
1660	"""
1661	def __init__( self, exprs, savelist = False ):
1662	super(Or,self).__init__(exprs, savelist)
1663	self.mayReturnEmpty = False
1664	for e in exprs:
1665	if e.mayReturnEmpty:
1666	self.mayReturnEmpty = True
1667	break
1668
1669	def parseImpl( self, instring, loc, doActions=True ):
1670	maxExcLoc = -1
1671	maxMatchLoc = -1
1672	for e in self.exprs:
1673	try:
1674	loc2 = e.tryParse( instring, loc )
1675	except ParseException, err:
1676	if err.loc > maxExcLoc:
1677	maxException = err
1678	maxExcLoc = err.loc
1679	except IndexError, err:
1680	if len(instring) > maxExcLoc:
1681	maxException = ParseException(instring,len(instring),e.errmsg,self)
1682	maxExcLoc = len(instring)
1683	else:
1684	if loc2 > maxMatchLoc:
1685	maxMatchLoc = loc2
1686	maxMatchExp = e
1687
1688	if maxMatchLoc < 0:
1689	if self.exprs:
1690	raise maxException
1691	else:
1692	raise ParseException(instring, loc, "no defined alternatives to match", self)
1693
1694	return maxMatchExp.parse( instring, loc, doActions )
1695
1696	def __ixor__(self, other ):
1697	if isinstance( other, basestring ):
1698	other = Literal( other )
1699	return self.append( other ) #Or( [ self, other ] )
1700
1701	def __str__( self ):
1702	if hasattr(self,"name"):
1703	return self.name
1704
1705	if self.strRepr is None:
1706	self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1707
1708	return self.strRepr
1709
1710	def checkRecursion( self, parseElementList ):
1711	subRecCheckList = parseElementList[:] + [ self ]
1712	for e in self.exprs:
1713	e.checkRecursion( subRecCheckList )
1714
1715
1716	class MatchFirst(ParseExpression):
1717	"""Requires that at least one ParseExpression is found.
1718	If two expressions match, the first one listed is the one that will match.
1719	May be constructed using the '\|' operator.
1720	"""
1721	def __init__( self, exprs, savelist = False ):
1722	super(MatchFirst,self).__init__(exprs, savelist)
1723	if exprs:
1724	self.mayReturnEmpty = False
1725	for e in exprs:
1726	if e.mayReturnEmpty:
1727	self.mayReturnEmpty = True
1728	break
1729	else:
1730	self.mayReturnEmpty = True
1731
1732	def parseImpl( self, instring, loc, doActions=True ):
1733	maxExcLoc = -1
1734	for e in self.exprs:
1735	try:
1736	ret = e.parse( instring, loc, doActions )
1737	return ret
1738	except ParseException, err:
1739	if err.loc > maxExcLoc:
1740	maxException = err
1741	maxExcLoc = err.loc
1742	except IndexError, err:
1743	if len(instring) > maxExcLoc:
1744	maxException = ParseException(instring,len(instring),e.errmsg,self)
1745	maxExcLoc = len(instring)
1746
1747	# only got here if no expression matched, raise exception for match that made it the furthest
1748	else:
1749	if self.exprs:
1750	raise maxException
1751	else:
1752	raise ParseException(instring, loc, "no defined alternatives to match", self)
1753
1754	def __ior__(self, other ):
1755	if isinstance( other, basestring ):
1756	other = Literal( other )
1757	return self.append( other ) #MatchFirst( [ self, other ] )
1758
1759	def __str__( self ):
1760	if hasattr(self,"name"):
1761	return self.name
1762
1763	if self.strRepr is None:
1764	self.strRepr = "{" + " \| ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1765
1766	return self.strRepr
1767
1768	def checkRecursion( self, parseElementList ):
1769	subRecCheckList = parseElementList[:] + [ self ]
1770	for e in self.exprs:
1771	e.checkRecursion( subRecCheckList )
1772
1773	class Each(ParseExpression):
1774	"""Requires all given ParseExpressions to be found, but in any order.
1775	Expressions may be separated by whitespace.
1776	May be constructed using the '&' operator.
1777	"""
1778	def __init__( self, exprs, savelist = True ):
1779	super(Each,self).__init__(exprs, savelist)
1780	self.mayReturnEmpty = True
1781	for e in exprs:
1782	if not e.mayReturnEmpty:
1783	self.mayReturnEmpty = False
1784	break
1785	self.skipWhitespace = True
1786	self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
1787	self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
1788	self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
1789	self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
1790	self.required += self.multirequired
1791
1792	def parseImpl( self, instring, loc, doActions=True ):
1793	tmpLoc = loc
1794	tmpReqd = self.required[:]
1795	tmpOpt = self.optionals[:]
1796	matchOrder = []
1797
1798	keepMatching = True
1799	while keepMatching:
1800	tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
1801	failed = []
1802	for e in tmpExprs:
1803	try:
1804	tmpLoc = e.tryParse( instring, tmpLoc )
1805	except ParseException:
1806	failed.append(e)
1807	else:
1808	matchOrder.append(e)
1809	if e in tmpReqd:
1810	tmpReqd.remove(e)
1811	elif e in tmpOpt:
1812	tmpOpt.remove(e)
1813	if len(failed) == len(tmpExprs):
1814	keepMatching = False
1815
1816	if tmpReqd:
1817	missing = ", ".join( [ str(e) for e in tmpReqd ] )
1818	raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
1819
1820	resultlist = []
1821	for e in matchOrder:
1822	loc,results = e.parse(instring,loc,doActions)
1823	resultlist.append(results)
1824
1825	finalResults = ParseResults([])
1826	for r in resultlist:
1827	dups = {}
1828	for k in r.keys():
1829	if k in finalResults.keys():
1830	tmp = ParseResults(finalResults[k])
1831	tmp += ParseResults(r[k])
1832	dups[k] = tmp
1833	finalResults += ParseResults(r)
1834	for k,v in dups.items():
1835	finalResults[k] = v
1836	return loc, finalResults
1837
1838	def __str__( self ):
1839	if hasattr(self,"name"):
1840	return self.name
1841
1842	if self.strRepr is None:
1843	self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1844
1845	return self.strRepr
1846
1847	def checkRecursion( self, parseElementList ):
1848	subRecCheckList = parseElementList[:] + [ self ]
1849	for e in self.exprs:
1850	e.checkRecursion( subRecCheckList )
1851
1852
1853	class ParseElementEnhance(ParserElement):
1854	"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1855	def __init__( self, expr, savelist=False ):
1856	super(ParseElementEnhance,self).__init__(savelist)
1857	if isinstance( expr, basestring ):
1858	expr = Literal(expr)
1859	self.expr = expr
1860	self.strRepr = None
1861	if expr is not None:
1862	self.mayIndexError = expr.mayIndexError
1863	self.skipWhitespace = expr.skipWhitespace
1864	self.whiteChars = expr.whiteChars
1865
1866	def parseImpl( self, instring, loc, doActions=True ):
1867	if self.expr is not None:
1868	return self.expr.parse( instring, loc, doActions )
1869	else:
1870	raise ParseException(instring,loc,"",self)
1871
1872	def leaveWhitespace( self ):
1873	self.skipWhitespace = False
1874	self.expr = copy.copy(self.expr)
1875	if self.expr is not None:
1876	self.expr.leaveWhitespace()
1877	return self
1878
1879	def ignore( self, other ):
1880	if isinstance( other, Suppress ):
1881	if other not in self.ignoreExprs:
1882	super( ParseElementEnhance, self).ignore( other )
1883	if self.expr is not None:
1884	self.expr.ignore( self.ignoreExprs[-1] )
1885	else:
1886	super( ParseElementEnhance, self).ignore( other )
1887	if self.expr is not None:
1888	self.expr.ignore( self.ignoreExprs[-1] )
1889	return self
1890
1891	def streamline( self ):
1892	super(ParseElementEnhance,self).streamline()
1893	if self.expr is not None:
1894	self.expr.streamline()
1895	return self
1896
1897	def checkRecursion( self, parseElementList ):
1898	if self in parseElementList:
1899	raise RecursiveGrammarException( parseElementList+[self] )
1900	subRecCheckList = parseElementList[:] + [ self ]
1901	if self.expr is not None:
1902	self.expr.checkRecursion( subRecCheckList )
1903
1904	def validate( self, validateTrace=[] ):
1905	tmp = validateTrace[:]+[self]
1906	if self.expr is not None:
1907	self.expr.validate(tmp)
1908	self.checkRecursion( [] )
1909
1910	def __str__( self ):
1911	try:
1912	return super(ParseElementEnhance,self).__str__()
1913	except:
1914	pass
1915
1916	if self.strRepr is None and self.expr is not None:
1917	self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
1918	return self.strRepr
1919
1920
1921	class FollowedBy(ParseElementEnhance):
1922	"""Lookahead matching of the given parse expression. FollowedBy
1923	does not advance the parsing position within the input string, it only
1924	verifies that the specified parse expression matches at the current
1925	position. FollowedBy always returns a null token list."""
1926	def __init__( self, expr ):
1927	super(FollowedBy,self).__init__(expr)
1928	self.mayReturnEmpty = True
1929
1930	def parseImpl( self, instring, loc, doActions=True ):
1931	self.expr.tryParse( instring, loc )
1932	return loc, []
1933
1934
1935	class NotAny(ParseElementEnhance):
1936	"""Lookahead to disallow matching with the given parse expression. NotAny
1937	does not advance the parsing position within the input string, it only
1938	verifies that the specified parse expression does not match at the current
1939	position. Also, NotAny does not skip over leading whitespace. NotAny
1940	always returns a null token list. May be constructed using the '~' operator."""
1941	def __init__( self, expr ):
1942	super(NotAny,self).__init__(expr)
1943	#~ self.leaveWhitespace()
1944	self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
1945	self.mayReturnEmpty = True
1946	self.errmsg = "Found unexpected token, "+_ustr(self.expr)
1947	self.myException = ParseException("",0,self.errmsg,self)
1948
1949	def parseImpl( self, instring, loc, doActions=True ):
1950	try:
1951	self.expr.tryParse( instring, loc )
1952	except (ParseException,IndexError):
1953	pass
1954	else:
1955	#~ raise ParseException, (instring, loc, self.errmsg )
1956	exc = self.myException
1957	exc.loc = loc
1958	exc.pstr = instring
1959	raise exc
1960	return loc, []
1961
1962	def __str__( self ):
1963	if hasattr(self,"name"):
1964	return self.name
1965
1966	if self.strRepr is None:
1967	self.strRepr = "~{" + _ustr(self.expr) + "}"
1968
1969	return self.strRepr
1970
1971
1972	class ZeroOrMore(ParseElementEnhance):
1973	"""Optional repetition of zero or more of the given expression."""
1974	def __init__( self, expr ):
1975	super(ZeroOrMore,self).__init__(expr)
1976	self.mayReturnEmpty = True
1977
1978	def parseImpl( self, instring, loc, doActions=True ):
1979	tokens = []
1980	try:
1981	loc, tokens = self.expr.parse( instring, loc, doActions )
1982	hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1983	while 1:
1984	if hasIgnoreExprs:
1985	loc = self.skipIgnorables( instring, loc )
1986	loc, tmptokens = self.expr.parse( instring, loc, doActions )
1987	if tmptokens or tmptokens.keys():
1988	tokens += tmptokens
1989	except (ParseException,IndexError):
1990	pass
1991
1992	return loc, tokens
1993
1994	def __str__( self ):
1995	if hasattr(self,"name"):
1996	return self.name
1997
1998	if self.strRepr is None:
1999	self.strRepr = "[" + _ustr(self.expr) + "]..."
2000
2001	return self.strRepr
2002
2003	def setResultsName( self, name, listAllMatches=False ):
2004	ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2005	ret.saveAsList = True
2006	return ret
2007
2008
2009	class OneOrMore(ParseElementEnhance):
2010	"""Repetition of one or more of the given expression."""
2011	def parseImpl( self, instring, loc, doActions=True ):
2012	# must be at least one
2013	loc, tokens = self.expr.parse( instring, loc, doActions )
2014	try:
2015	hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2016	while 1:
2017	if hasIgnoreExprs:
2018	loc = self.skipIgnorables( instring, loc )
2019	loc, tmptokens = self.expr.parse( instring, loc, doActions )
2020	if tmptokens or tmptokens.keys():
2021	tokens += tmptokens
2022	except (ParseException,IndexError):
2023	pass
2024
2025	return loc, tokens
2026
2027	def __str__( self ):
2028	if hasattr(self,"name"):
2029	return self.name
2030
2031	if self.strRepr is None:
2032	self.strRepr = "{" + _ustr(self.expr) + "}..."
2033
2034	return self.strRepr
2035
2036	def setResultsName( self, name, listAllMatches=False ):
2037	ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2038	ret.saveAsList = True
2039	return ret
2040
2041
2042	class Optional(ParseElementEnhance):
2043	"""Optional matching of the given expression.
2044	A default return string can also be specified, if the optional expression
2045	is not found.
2046	"""
2047	def __init__( self, exprs, default=None ):
2048	super(Optional,self).__init__( exprs, savelist=False )
2049	self.defaultValue = default
2050	self.mayReturnEmpty = True
2051
2052	def parseImpl( self, instring, loc, doActions=True ):
2053	try:
2054	loc, tokens = self.expr.parse( instring, loc, doActions )
2055	except (ParseException,IndexError):
2056	if self.defaultValue is not None:
2057	tokens = [ self.defaultValue ]
2058	else:
2059	tokens = []
2060
2061	return loc, tokens
2062
2063	def __str__( self ):
2064	if hasattr(self,"name"):
2065	return self.name
2066
2067	if self.strRepr is None:
2068	self.strRepr = "[" + _ustr(self.expr) + "]"
2069
2070	return self.strRepr
2071
2072
2073	class SkipTo(ParseElementEnhance):
2074	"""Token for skipping over all undefined text until the matched expression is found.
2075	If include is set to true, the matched expression is also consumed. The ignore
2076	argument is used to define grammars (typically quoted strings and comments) that
2077	might contain false matches.
2078	"""
2079	def __init__( self, other, include=False, ignore=None ):
2080	super( SkipTo, self ).__init__( other )
2081	if ignore is not None:
2082	self.expr = copy.copy( self.expr )
2083	self.expr.ignore(ignore)
2084	self.mayReturnEmpty = True
2085	self.mayIndexError = False
2086	self.includeMatch = include
2087	self.errmsg = "No match found for "+_ustr(self.expr)
2088	self.myException = ParseException("",0,self.errmsg,self)
2089
2090	def parseImpl( self, instring, loc, doActions=True ):
2091	startLoc = loc
2092	instrlen = len(instring)
2093	expr = self.expr
2094	while loc < instrlen:
2095	try:
2096	loc = expr.skipIgnorables( instring, loc )
2097	expr.parse( instring, loc, doActions=False, callPreParse=False )
2098	if self.includeMatch:
2099	skipText = instring[startLoc:loc]
2100	loc,mat = expr.parse(instring,loc)
2101	if mat:
2102	return loc, [ skipText, mat ]
2103	else:
2104	return loc, [ skipText ]
2105	else:
2106	return loc, [ instring[startLoc:loc] ]
2107	except (ParseException,IndexError):
2108	loc += 1
2109	exc = self.myException
2110	exc.loc = loc
2111	exc.pstr = instring
2112	raise exc
2113
2114	class Forward(ParseElementEnhance):
2115	"""Forward declaration of an expression to be defined later -
2116	used for recursive grammars, such as algebraic infix notation.
2117	When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2118
2119	Note: take care when assigning to Forward to not overlook precedence of operators.
2120	Specifically, '\|' has a lower precedence than '<<', so that::
2121	fwdExpr << a \| b \| c
2122	will actually be evaluated as::
2123	(fwdExpr << a) \| b \| c
2124	thereby leaving b and c out as parseable alternatives. It is recommended that you
2125	explicitly group the values inserted into the Forward::
2126	fwdExpr << (a \| b \| c)
2127	"""
2128	def __init__( self, other=None ):
2129	super(Forward,self).__init__( other, savelist=False )
2130
2131	def __lshift__( self, other ):
2132	self.expr = other
2133	self.mayReturnEmpty = other.mayReturnEmpty
2134	self.strRepr = None
2135	return self
2136
2137	def leaveWhitespace( self ):
2138	self.skipWhitespace = False
2139	return self
2140
2141	def streamline( self ):
2142	if not self.streamlined:
2143	self.streamlined = True
2144	if self.expr is not None:
2145	self.expr.streamline()
2146	return self
2147
2148	def validate( self, validateTrace=[] ):
2149	if self not in validateTrace:
2150	tmp = validateTrace[:]+[self]
2151	if self.expr is not None:
2152	self.expr.validate(tmp)
2153	self.checkRecursion([])
2154
2155	def __str__( self ):
2156	if hasattr(self,"name"):
2157	return self.name
2158
2159	self.__class__ = _ForwardNoRecurse
2160	try:
2161	if self.expr is not None:
2162	retString = _ustr(self.expr)
2163	else:
2164	retString = "None"
2165	finally:
2166	self.__class__ = Forward
2167	return "Forward: "+retString
2168
2169	class _ForwardNoRecurse(Forward):
2170	def __str__( self ):
2171	return "..."
2172
2173	class TokenConverter(ParseElementEnhance):
2174	"""Abstract subclass of ParseExpression, for converting parsed results."""
2175	def __init__( self, expr, savelist=False ):
2176	super(TokenConverter,self).__init__( expr )#, savelist )
2177
2178
2179	class Upcase(TokenConverter):
2180	"""Converter to upper case all matching tokens."""
2181	def __init__(self, *args):
2182	super(Upcase,self).__init__(*args)
2183	warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2184	DeprecationWarning,stacklevel=2)
2185
2186	def postParse( self, instring, loc, tokenlist ):
2187	return map( string.upper, tokenlist )
2188
2189
2190	class Combine(TokenConverter):
2191	"""Converter to concatenate all matching tokens to a single string.
2192	By default, the matching patterns must also be contiguous in the input string;
2193	this can be disabled by specifying 'adjacent=False' in the constructor.
2194	"""
2195	def __init__( self, expr, joinString="", adjacent=True ):
2196	super(Combine,self).__init__( expr )
2197	# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2198	if adjacent:
2199	self.leaveWhitespace()
2200	self.adjacent = adjacent
2201	self.skipWhitespace = True
2202	self.joinString = joinString
2203
2204	def ignore( self, other ):
2205	if self.adjacent:
2206	ParserElement.ignore(self, other)
2207	else:
2208	super( Combine, self).ignore( other )
2209	return self
2210
2211	def postParse( self, instring, loc, tokenlist ):
2212	retToks = tokenlist.copy()
2213	del retToks[:]
2214	retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2215
2216	if self.resultsName and len(retToks.keys())>0:
2217	return [ retToks ]
2218	else:
2219	return retToks
2220
2221	class Group(TokenConverter):
2222	"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2223	def __init__( self, expr ):
2224	super(Group,self).__init__( expr )
2225	self.saveAsList = True
2226
2227	def postParse( self, instring, loc, tokenlist ):
2228	return [ tokenlist ]
2229
2230	class Dict(TokenConverter):
2231	"""Converter to return a repetitive expression as a list, but also as a dictionary.
2232	Each element can also be referenced using the first token in the expression as its key.
2233	Useful for tabular report scraping when the first column can be used as a item key.
2234	"""
2235	def __init__( self, exprs ):
2236	super(Dict,self).__init__( exprs )
2237	self.saveAsList = True
2238
2239	def postParse( self, instring, loc, tokenlist ):
2240	for i,tok in enumerate(tokenlist):
2241	ikey = _ustr(tok[0]).strip()
2242	if len(tok)==1:
2243	tokenlist[ikey] = ("",i)
2244	elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2245	tokenlist[ikey] = (tok[1],i)
2246	else:
2247	dictvalue = tok.copy() #ParseResults(i)
2248	del dictvalue[0]
2249	if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2250	tokenlist[ikey] = (dictvalue,i)
2251	else:
2252	tokenlist[ikey] = (dictvalue[0],i)
2253
2254	if self.resultsName:
2255	return [ tokenlist ]
2256	else:
2257	return tokenlist
2258
2259
2260	class Suppress(TokenConverter):
2261	"""Converter for ignoring the results of a parsed expression."""
2262	def postParse( self, instring, loc, tokenlist ):
2263	return []
2264
2265	def suppress( self ):
2266	return self
2267
2268	#
2269	# global helpers
2270	#
2271	def delimitedList( expr, delim=",", combine=False ):
2272	"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
2273	By default, the list elements and delimiters can have intervening whitespace, and
2274	comments, but this can be overridden by passing 'combine=True' in the constructor.
2275	If combine is set to True, the matching tokens are returned as a single token
2276	string, with the delimiters included; otherwise, the matching tokens are returned
2277	as a list of tokens, with the delimiters suppressed.
2278	"""
2279	if combine:
2280	return Combine( expr + ZeroOrMore( delim + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2281	else:
2282	return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2283
2284	def _escapeRegexRangeChars(s):
2285	#~ escape these chars: ^-]
2286	for c in r"\^-]":
2287	s = s.replace(c,"\\"+c)
2288	s = s.replace("\n",r"\n")
2289	s = s.replace("\t",r"\t")
2290	return _ustr(s)
2291
2292	def oneOf( strs, caseless=False, useRegex=True ):
2293	"""Helper to quickly define a set of alternative Literals, and makes sure to do
2294	longest-first testing when there is a conflict, regardless of the input order,
2295	but returns a MatchFirst for best performance.
2296
2297	Parameters:
2298	- strs - a string of space-delimited literals, or a list of string literals
2299	- caseless - (default=False) - treat all literals as caseless
2300	- useRegex - (default=True) - as an optimization, will generate a Regex
2301	object; otherwise, will generate a MatchFirst object (if caseless=True, or
2302	if creating a Regex raises an exception)
2303	"""
2304	if caseless:
2305	isequal = ( lambda a,b: a.upper() == b.upper() )
2306	masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2307	parseElementClass = CaselessLiteral
2308	else:
2309	isequal = ( lambda a,b: a == b )
2310	masks = ( lambda a,b: b.startswith(a) )
2311	parseElementClass = Literal
2312
2313	if isinstance(strs,list):
2314	symbols = strs[:]
2315	elif isinstance(strs,basestring):
2316	symbols = strs.split()
2317	else:
2318	warnings.warn("Invalid argument to oneOf, expected string or list",
2319	SyntaxWarning, stacklevel=2)
2320
2321	i = 0
2322	while i < len(symbols)-1:
2323	cur = symbols[i]
2324	for j,other in enumerate(symbols[i+1:]):
2325	if ( isequal(other, cur) ):
2326	del symbols[i+j+1]
2327	break
2328	elif ( masks(cur, other) ):
2329	del symbols[i+j+1]
2330	symbols.insert(i,other)
2331	cur = other
2332	break
2333	else:
2334	i += 1
2335
2336	if not caseless and useRegex:
2337	#~ print strs,"->", "\|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2338	try:
2339	if len(symbols)==len("".join(symbols)):
2340	return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
2341	else:
2342	return Regex( "\|".join( [ re.escape(sym) for sym in symbols] ) )
2343	except:
2344	warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
2345	SyntaxWarning, stacklevel=2)
2346
2347
2348	# last resort, just use MatchFirst
2349	return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2350
2351	def dictOf( key, value ):
2352	"""Helper to easily and clearly define a dictionary by specifying the respective patterns
2353	for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
2354	in the proper order. The key pattern can include delimiting markers or punctuation,
2355	as long as they are suppressed, thereby leaving the significant key text. The value
2356	pattern can include named results, so that the Dict results can include named token
2357	fields.
2358	"""
2359	return Dict( ZeroOrMore( Group ( key + value ) ) )
2360
2361	_bslash = "\\"
2362	printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2363
2364	# convenience constants for positional expressions
2365	empty = Empty().setName("empty")
2366	lineStart = LineStart().setName("lineStart")
2367	lineEnd = LineEnd().setName("lineEnd")
2368	stringStart = StringStart().setName("stringStart")
2369	stringEnd = StringEnd().setName("stringEnd")
2370
2371	_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2372	_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
2373	_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2374	_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2375	_singleChar = _escapedPunc \| _escapedHexChar \| _escapedOctChar \| Word(_printables_less_backslash,exact=1)
2376	_charRange = Group(_singleChar + Suppress("-") + _singleChar)
2377	_reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange \| _singleChar ) ).setResultsName("body") + "]"
2378
2379	_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2380
2381	def srange(s):
2382	r"""Helper to easily define string ranges for use in Word construction. Borrows
2383	syntax from regexp '[]' string range definitions::
2384	srange("[0-9]") -> "0123456789"
2385	srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
2386	srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2387	The input string must be enclosed in []'s, and the returned string is the expanded
2388	character set joined into a single string.
2389	The values enclosed in the []'s may be::
2390	a single character
2391	an escaped character with a leading backslash (such as \- or \])
2392	an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2393	an escaped octal character with a leading '\0' (\041, which is a '!' character)
2394	a range of any of the above, separated by a dash ('a-z', etc.)
2395	any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2396	"""
2397	try:
2398	return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2399	except:
2400	return ""
2401
2402	def replaceWith(replStr):
2403	"""Helper method for common parse actions that simply return a literal value. Especially
2404	useful when used with transformString().
2405	"""
2406	def _replFunc(*args):
2407	return [replStr]
2408	return _replFunc
2409
2410	def removeQuotes(s,l,t):
2411	"""Helper parse action for removing quotation marks from parsed quoted strings.
2412	To use, add this parse action to quoted string using::
2413	quotedString.setParseAction( removeQuotes )
2414	"""
2415	return t[0][1:-1]
2416
2417	def upcaseTokens(s,l,t):
2418	"""Helper parse action to convert tokens to upper case."""
2419	return map( str.upper, t )
2420
2421	def downcaseTokens(s,l,t):
2422	"""Helper parse action to convert tokens to lower case."""
2423	return map( str.lower, t )
2424
2425	def _makeTags(tagStr, xml):
2426	"""Internal helper to construct opening and closing tag expressions, given a tag name"""
2427	tagAttrName = Word(alphanums)
2428	if (xml):
2429	tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
2430	openTag = Suppress("<") + Keyword(tagStr) + \
2431	Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
2432	Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2433	else:
2434	printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
2435	tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) \| Word(printablesLessRAbrack)
2436	openTag = Suppress("<") + Keyword(tagStr,caseless=True) + \
2437	Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
2438	Suppress("=") + tagAttrValue ))) + \
2439	Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2440	closeTag = Combine("</" + Keyword(tagStr,caseless=not xml) + ">")
2441
2442	openTag = openTag.setResultsName("start"+"".join(tagStr.replace(":"," ").title().split())).setName("<%s>" % tagStr)
2443	closeTag = closeTag.setResultsName("end"+"".join(tagStr.replace(":"," ").title().split())).setName("</%s>" % tagStr)
2444
2445	return openTag, closeTag
2446
2447	def makeHTMLTags(tagStr):
2448	"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""
2449	return _makeTags( tagStr, False )
2450
2451	def makeXMLTags(tagStr):
2452	"""Helper to construct opening and closing tag expressions for XML, given a tag name"""
2453	return _makeTags( tagStr, True )
2454
2455	alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
2456
2457	_escapedChar = Regex(r"\\.")
2458	dblQuotedString = Regex(r'"([^"\n\r\\]\|("")\|(\\.))*"').setName("string enclosed in double quotes")
2459	sglQuotedString = Regex(r"'([^'\n\r\\]\|('')\|(\\.))*'").setName("string enclosed in single quotes")
2460	quotedString = Regex(r'''("([^"\n\r\\]\|("")\|(\\.))")\|('([^'\n\r\\]\|('')\|(\\.))')''').setName("quotedString using single or double quotes")
2461
2462	# it's easy to get these comment structures wrong - they're very common, so may as well make them available
2463	cStyleComment = Regex(r"\/\[\s\S]?\*\/").setName("C style comment")
2464	htmlComment = Regex(r"<!--[\s\S]*?-->")
2465	restOfLine = Regex(r".*").leaveWhitespace()
2466	dblSlashComment = Regex(r"\/\/.*").setName("// comment")
2467	cppStyleComment = Regex(r"(\/\[\s\S]?\\/)\|(\/\/.)").setName("C++ style comment")
2468	javaStyleComment = cppStyleComment
2469	pythonStyleComment = Regex(r"#.*").setName("Python style comment")
2470	_noncomma = "".join( [ c for c in printables if c != "," ] )
2471	_commasepitem = Combine(OneOrMore(Word(_noncomma) +
2472	Optional( Word(" \t") +
2473	~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
2474	commaSeparatedList = delimitedList( Optional( quotedString \| _commasepitem, default="") ).setName("commaSeparatedList")
2475
2476
2477	if __name__ == "__main__":
2478
2479	def test( teststring ):
2480	print teststring,"->",
2481	try:
2482	tokens = simpleSQL.parseString( teststring )
2483	tokenlist = tokens.asList()
2484	print tokenlist
2485	print "tokens = ", tokens
2486	print "tokens.columns =", tokens.columns
2487	print "tokens.tables =", tokens.tables
2488	print tokens.asXML("SQL",True)
2489	except ParseException, err:
2490	print err.line
2491	print " "*(err.column-1) + "^"
2492	print err
2493	print
2494
2495	selectToken = CaselessLiteral( "select" )
2496	fromToken = CaselessLiteral( "from" )
2497
2498	ident = Word( alphas, alphanums + "_$" )
2499	columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2500	columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
2501	tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2502	tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
2503	simpleSQL = ( selectToken + \
2504	( '*' \| columnNameList ).setResultsName( "columns" ) + \
2505	fromToken + \
2506	tableNameList.setResultsName( "tables" ) )
2507
2508	test( "SELECT * from XYZZY, ABC" )
2509	test( "select * from SYS.XYZZY" )
2510	test( "Select A from Sys.dual" )
2511	test( "Select AA,BB,CC from Sys.dual" )
2512	test( "Select A, B, C from Sys.dual" )
2513	test( "Select A, B, C from Sys.dual" )
2514	test( "Xelect A, B, C from Sys.dual" )
2515	test( "Select A, B, C frox Sys.dual" )
2516	test( "Select" )
2517	test( "Select ^^^ frox Sys.dual" )
2518	test( "Select A, B, C from Sys.dual, Table2 " )

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/pyparsing.py @ 3

異なるフォーマットでダウンロード: