root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx_extras/pyparsing.py

リビジョン 3, 144.1 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# module pyparsing.py
2#
3# Copyright (c) 2003-2008  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24#from __future__ import generators
25
26__doc__ = \
27"""
28pyparsing module - Classes and methods to define and execute parsing grammars
29
30The pyparsing module is an alternative approach to creating and executing simple grammars,
31vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33provides a library of classes that you use to construct the grammar directly in Python.
34
35Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37    from pyparsing import Word, alphas
38
39    # define grammar of a greeting
40    greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42    hello = "Hello, World!"
43    print hello, "->", greet.parseString( hello )
44
45The program outputs the following::
46
47    Hello, World! -> ['Hello', ',', 'World', '!']
48
49The Python representation of the grammar is quite readable, owing to the self-explanatory
50class names, and the use of '+', '|' and '^' operators.
51
52The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53object with named attributes.
54
55The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
57 - quoted strings
58 - embedded comments
59"""
60
61__version__ = "1.5.0"
62__versionTime__ = "28 May 2008 10:05"
63__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65import string
66from weakref import ref as wkref
67import copy,sys
68import warnings
69import re
70import sre_constants
71import xml.sax.saxutils
72#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74__all__ = [
75'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91'indentedBlock',
92]
93
94
95"""
96Detect if we are running version 3.X and make appropriate changes
97Robert A. Clark
98"""
99if sys.version_info[0] > 2:
100    _PY3K = True
101    _MAX_INT = sys.maxsize
102    basestring = str
103else:
104    _PY3K = False
105    _MAX_INT = sys.maxint
106
107if not _PY3K:
108    def _ustr(obj):
109        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111           then < returns the unicode object | encodes it with the default encoding | ... >.
112        """
113        try:
114            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
115            # it won't break any existing code.
116            return str(obj)
117
118        except UnicodeEncodeError:
119            # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
120            # state that "The return value must be a string object". However, does a
121            # unicode object (being a subclass of basestring) count as a "string
122            # object"?
123            # If so, then return a unicode object:
124            return unicode(obj)
125            # Else encode it... but how? There are many choices... :)
126            # Replace unprintables with escape codes?
127            #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
128            # Replace unprintables with question marks?
129            #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
130            # ...
131else:
132    _ustr = str
133
134def _str2dict(strg):
135    return dict( [(c,0) for c in strg] )
136    #~ return set( [c for c in strg] )
137
138class _Constants(object):
139    pass
140
141if not _PY3K:
142    alphas     = string.lowercase + string.uppercase
143else:
144    alphas     = string.ascii_lowercase + string.ascii_uppercase
145nums       = string.digits
146hexnums    = nums + "ABCDEFabcdef"
147alphanums  = alphas + nums
148_bslash = "\\"
149printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
150
151class ParseBaseException(Exception):
152    """base exception class for all parsing runtime exceptions"""
153    __slots__ = ( "loc","msg","pstr","parserElement" )
154    # Performance tuning: we construct a *lot* of these, so keep this
155    # constructor as small and fast as possible
156    def __init__( self, pstr, loc=0, msg=None, elem=None ):
157        self.loc = loc
158        if msg is None:
159            self.msg = pstr
160            self.pstr = ""
161        else:
162            self.msg = msg
163            self.pstr = pstr
164        self.parserElement = elem
165
166    def __getattr__( self, aname ):
167        """supported attributes by name are:
168            - lineno - returns the line number of the exception text
169            - col - returns the column number of the exception text
170            - line - returns the line containing the exception text
171        """
172        if( aname == "lineno" ):
173            return lineno( self.loc, self.pstr )
174        elif( aname in ("col", "column") ):
175            return col( self.loc, self.pstr )
176        elif( aname == "line" ):
177            return line( self.loc, self.pstr )
178        else:
179            raise AttributeError(aname)
180
181    def __str__( self ):
182        return "%s (at char %d), (line:%d, col:%d)" % \
183                ( self.msg, self.loc, self.lineno, self.column )
184    def __repr__( self ):
185        return _ustr(self)
186    def markInputline( self, markerString = ">!<" ):
187        """Extracts the exception line from the input string, and marks
188           the location of the exception with a special symbol.
189        """
190        line_str = self.line
191        line_column = self.column - 1
192        if markerString:
193            line_str = "".join( [line_str[:line_column],
194                                markerString, line_str[line_column:]])
195        return line_str.strip()
196
197class ParseException(ParseBaseException):
198    """exception thrown when parse expressions don't match class;
199       supported attributes by name are:
200        - lineno - returns the line number of the exception text
201        - col - returns the column number of the exception text
202        - line - returns the line containing the exception text
203    """
204    pass
205
206class ParseFatalException(ParseBaseException):
207    """user-throwable exception thrown when inconsistent parse content
208       is found; stops all parsing immediately"""
209    pass
210
211class ParseSyntaxException(ParseFatalException):
212    """just like ParseFatalException, but thrown internally when an
213       ErrorStop indicates that parsing is to stop immediately because
214       an unbacktrackable syntax error has been found"""
215    def __init__(self, pe):
216        super(ParseSyntaxException, self).__init__(
217                                    pe.pstr, pe.loc, pe.msg, pe.parserElement)
218
219#~ class ReparseException(ParseBaseException):
220    #~ """Experimental class - parse actions can raise this exception to cause
221       #~ pyparsing to reparse the input string:
222        #~ - with a modified input string, and/or
223        #~ - with a modified start location
224       #~ Set the values of the ReparseException in the constructor, and raise the
225       #~ exception in a parse action to cause pyparsing to use the new string/location.
226       #~ Setting the values as None causes no change to be made.
227       #~ """
228    #~ def __init_( self, newstring, restartLoc ):
229        #~ self.newParseText = newstring
230        #~ self.reparseLoc = restartLoc
231
232class RecursiveGrammarException(Exception):
233    """exception thrown by validate() if the grammar could be improperly recursive"""
234    def __init__( self, parseElementList ):
235        self.parseElementTrace = parseElementList
236
237    def __str__( self ):
238        return "RecursiveGrammarException: %s" % self.parseElementTrace
239
240class _ParseResultsWithOffset(object):
241    def __init__(self,p1,p2):
242        self.tup = (p1,p2)
243    def __getitem__(self,i):
244        return self.tup[i]
245    def __repr__(self):
246        return repr(self.tup)
247
248class ParseResults(object):
249    """Structured parse results, to provide multiple means of access to the parsed data:
250       - as a list (len(results))
251       - by list index (results[0], results[1], etc.)
252       - by attribute (results.<resultsName>)
253       """
254    __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
255    def __new__(cls, toklist, name=None, asList=True, modal=True ):
256        if isinstance(toklist, cls):
257            return toklist
258        retobj = object.__new__(cls)
259        retobj.__doinit = True
260        return retobj
261
262    # Performance tuning: we construct a *lot* of these, so keep this
263    # constructor as small and fast as possible
264    def __init__( self, toklist, name=None, asList=True, modal=True ):
265        if self.__doinit:
266            self.__doinit = False
267            self.__name = None
268            self.__parent = None
269            self.__accumNames = {}
270            if isinstance(toklist, list):
271                self.__toklist = toklist[:]
272            else:
273                self.__toklist = [toklist]
274            self.__tokdict = dict()
275
276        # this line is related to debugging the asXML bug
277        #~ asList = False
278
279        if name:
280            if not modal:
281                self.__accumNames[name] = 0
282            if isinstance(name,int):
283                name = _ustr(name) # will always return a str, but use _ustr for consistency
284            self.__name = name
285            if not toklist in (None,'',[]):
286                if isinstance(toklist,basestring):
287                    toklist = [ toklist ]
288                if asList:
289                    if isinstance(toklist,ParseResults):
290                        self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
291                    else:
292                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
293                    self[name].__name = name
294                else:
295                    try:
296                        self[name] = toklist[0]
297                    except (KeyError,TypeError):
298                        self[name] = toklist
299
300    def __getitem__( self, i ):
301        if isinstance( i, (int,slice) ):
302            return self.__toklist[i]
303        else:
304            if i not in self.__accumNames:
305                return self.__tokdict[i][-1][0]
306            else:
307                return ParseResults([ v[0] for v in self.__tokdict[i] ])
308
309    def __setitem__( self, k, v ):
310        if isinstance(v,_ParseResultsWithOffset):
311            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
312            sub = v[0]
313        elif isinstance(k,int):
314            self.__toklist[k] = v
315            sub = v
316        else:
317            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
318            sub = v
319        if isinstance(sub,ParseResults):
320            sub.__parent = wkref(self)
321
322    def __delitem__( self, i ):
323        if isinstance(i,(int,slice)):
324            mylen = len( self.__toklist )
325            del self.__toklist[i]
326
327            # convert int to slice
328            if isinstance(i, int):
329                if i < 0:
330                    i += mylen
331                i = slice(i, i+1)
332            # get removed indices
333            removed = list(range(*i.indices(mylen)))
334            removed.reverse()
335            # fixup indices in token dictionary
336            for name in self.__tokdict:
337                occurrences = self.__tokdict[name]
338                for j in removed:
339                    for k, (value, position) in enumerate(occurrences):
340                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
341        else:
342            del self.__tokdict[i]
343
344    def __contains__( self, k ):
345        return k in self.__tokdict
346
347    def __len__( self ): return len( self.__toklist )
348    def __bool__(self): return len( self.__toklist ) > 0
349    __nonzero__ = __bool__
350    def __iter__( self ): return iter( self.__toklist )
351    def __reversed__( self ): return iter( reversed(self.__toklist) )
352    def keys( self ):
353        """Returns all named result keys."""
354        return self.__tokdict.keys()
355
356    def pop( self, index=-1 ):
357        """Removes and returns item at specified index (default=last).
358           Will work with either numeric indices or dict-key indicies."""
359        ret = self[index]
360        del self[index]
361        return ret
362
363    def get(self, key, defaultValue=None):
364        """Returns named result matching the given key, or if there is no
365           such name, then returns the given defaultValue or None if no
366           defaultValue is specified."""
367        if key in self:
368            return self[key]
369        else:
370            return defaultValue
371
372    def insert( self, index, insStr ):
373        self.__toklist.insert(index, insStr)
374        # fixup indices in token dictionary
375        for name in self.__tokdict:
376            occurrences = self.__tokdict[name]
377            for k, (value, position) in enumerate(occurrences):
378                occurrences[k] = _ParseResultsWithOffset(value, position + (position > j))
379
380    def items( self ):
381        """Returns all named result keys and values as a list of tuples."""
382        return [(k,self[k]) for k in self.__tokdict]
383
384    def values( self ):
385        """Returns all named result values."""
386        return [ v[-1][0] for v in self.__tokdict.values() ]
387
388    def __getattr__( self, name ):
389        if name not in self.__slots__:
390            if name in self.__tokdict:
391                if name not in self.__accumNames:
392                    return self.__tokdict[name][-1][0]
393                else:
394                    return ParseResults([ v[0] for v in self.__tokdict[name] ])
395            else:
396                return ""
397        return None
398
399    def __add__( self, other ):
400        ret = self.copy()
401        ret += other
402        return ret
403
404    def __iadd__( self, other ):
405        if other.__tokdict:
406            offset = len(self.__toklist)
407            addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
408            otheritems = other.__tokdict.items()
409            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
410                                for (k,vlist) in otheritems for v in vlist]
411            for k,v in otherdictitems:
412                self[k] = v
413                if isinstance(v[0],ParseResults):
414                    v[0].__parent = wkref(self)
415        self.__toklist += other.__toklist
416        self.__accumNames.update( other.__accumNames )
417        del other
418        return self
419
420    def __repr__( self ):
421        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
422
423    def __str__( self ):
424        out = "["
425        sep = ""
426        for i in self.__toklist:
427            if isinstance(i, ParseResults):
428                out += sep + _ustr(i)
429            else:
430                out += sep + repr(i)
431            sep = ", "
432        out += "]"
433        return out
434
435    def _asStringList( self, sep='' ):
436        out = []
437        for item in self.__toklist:
438            if out and sep:
439                out.append(sep)
440            if isinstance( item, ParseResults ):
441                out += item._asStringList()
442            else:
443                out.append( _ustr(item) )
444        return out
445
446    def asList( self ):
447        """Returns the parse results as a nested list of matching tokens, all converted to strings."""
448        out = []
449        for res in self.__toklist:
450            if isinstance(res,ParseResults):
451                out.append( res.asList() )
452            else:
453                out.append( res )
454        return out
455
456    def asDict( self ):
457        """Returns the named parse results as dictionary."""
458        return dict( self.items() )
459
460    def copy( self ):
461        """Returns a new copy of a ParseResults object."""
462        ret = ParseResults( self.__toklist )
463        ret.__tokdict = self.__tokdict.copy()
464        ret.__parent = self.__parent
465        ret.__accumNames.update( self.__accumNames )
466        ret.__name = self.__name
467        return ret
468
469    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
470        """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
471        nl = "\n"
472        out = []
473        namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
474                                                            for v in vlist ] )
475        nextLevelIndent = indent + "  "
476
477        # collapse out indents if formatting is not desired
478        if not formatted:
479            indent = ""
480            nextLevelIndent = ""
481            nl = ""
482
483        selfTag = None
484        if doctag is not None:
485            selfTag = doctag
486        else:
487            if self.__name:
488                selfTag = self.__name
489
490        if not selfTag:
491            if namedItemsOnly:
492                return ""
493            else:
494                selfTag = "ITEM"
495
496        out += [ nl, indent, "<", selfTag, ">" ]
497
498        worklist = self.__toklist
499        for i,res in enumerate(worklist):
500            if isinstance(res,ParseResults):
501                if i in namedItems:
502                    out += [ res.asXML(namedItems[i],
503                                        namedItemsOnly and doctag is None,
504                                        nextLevelIndent,
505                                        formatted)]
506                else:
507                    out += [ res.asXML(None,
508                                        namedItemsOnly and doctag is None,
509                                        nextLevelIndent,
510                                        formatted)]
511            else:
512                # individual token, see if there is a name for it
513                resTag = None
514                if i in namedItems:
515                    resTag = namedItems[i]
516                if not resTag:
517                    if namedItemsOnly:
518                        continue
519                    else:
520                        resTag = "ITEM"
521                xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
522                out += [ nl, nextLevelIndent, "<", resTag, ">",
523                                                xmlBodyText,
524                                                "</", resTag, ">" ]
525
526        out += [ nl, indent, "</", selfTag, ">" ]
527        return "".join(out)
528
529    def __lookup(self,sub):
530        for k,vlist in self.__tokdict.items():
531            for v,loc in vlist:
532                if sub is v:
533                    return k
534        return None
535
536    def getName(self):
537        """Returns the results name for this token expression."""
538        if self.__name:
539            return self.__name
540        elif self.__parent:
541            par = self.__parent()
542            if par:
543                return par.__lookup(self)
544            else:
545                return None
546        elif (len(self) == 1 and
547               len(self.__tokdict) == 1 and
548               self.__tokdict.values()[0][0][1] in (0,-1)):
549            return self.__tokdict.keys()[0]
550        else:
551            return None
552
553    def dump(self,indent='',depth=0):
554        """Diagnostic method for listing out the contents of a ParseResults.
555           Accepts an optional indent argument so that this string can be embedded
556           in a nested display of other data."""
557        out = []
558        out.append( indent+_ustr(self.asList()) )
559        keys = self.items()
560        keys.sort()
561        for k,v in keys:
562            if out:
563                out.append('\n')
564            out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
565            if isinstance(v,ParseResults):
566                if v.keys():
567                    #~ out.append('\n')
568                    out.append( v.dump(indent,depth+1) )
569                    #~ out.append('\n')
570                else:
571                    out.append(_ustr(v))
572            else:
573                out.append(_ustr(v))
574        #~ out.append('\n')
575        return "".join(out)
576
577    # add support for pickle protocol
578    def __getstate__(self):
579        return ( self.__toklist,
580                 ( self.__tokdict.copy(),
581                   self.__parent is not None and self.__parent() or None,
582                   self.__accumNames,
583                   self.__name ) )
584
585    def __setstate__(self,state):
586        self.__toklist = state[0]
587        self.__tokdict, \
588        par, \
589        inAccumNames, \
590        self.__name = state[1]
591        self.__accumNames = {}
592        self.__accumNames.update(inAccumNames)
593        if par is not None:
594            self.__parent = wkref(par)
595        else:
596            self.__parent = None
597
598
599def col (loc,strg):
600    """Returns current column within a string, counting newlines as line separators.
601   The first column is number 1.
602
603   Note: the default parsing behavior is to expand tabs in the input string
604   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
605   on parsing strings containing <TAB>s, and suggested methods to maintain a
606   consistent view of the parsed string, the parse location, and line and column
607   positions within the parsed string.
608   """
609    return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
610
611def lineno(loc,strg):
612    """Returns current line number within a string, counting newlines as line separators.
613   The first line is number 1.
614
615   Note: the default parsing behavior is to expand tabs in the input string
616   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
617   on parsing strings containing <TAB>s, and suggested methods to maintain a
618   consistent view of the parsed string, the parse location, and line and column
619   positions within the parsed string.
620   """
621    return strg.count("\n",0,loc) + 1
622
623def line( loc, strg ):
624    """Returns the line of text containing loc within a string, counting newlines as line separators.
625       """
626    lastCR = strg.rfind("\n", 0, loc)
627    nextCR = strg.find("\n", loc)
628    if nextCR > 0:
629        return strg[lastCR+1:nextCR]
630    else:
631        return strg[lastCR+1:]
632
633def _defaultStartDebugAction( instring, loc, expr ):
634    print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
635
636def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
637    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
638
639def _defaultExceptionDebugAction( instring, loc, expr, exc ):
640    print ("Exception raised:" + _ustr(exc))
641
642def nullDebugAction(*args):
643    """'Do-nothing' debug action, to suppress debugging output during parsing."""
644    pass
645
646class ParserElement(object):
647    """Abstract base level parser element class."""
648    DEFAULT_WHITE_CHARS = " \n\t\r"
649
650    def setDefaultWhitespaceChars( chars ):
651        """Overrides the default whitespace chars
652        """
653        ParserElement.DEFAULT_WHITE_CHARS = chars
654    setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
655
656    def __init__( self, savelist=False ):
657        self.parseAction = list()
658        self.failAction = None
659        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
660        self.strRepr = None
661        self.resultsName = None
662        self.saveAsList = savelist
663        self.skipWhitespace = True
664        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
665        self.copyDefaultWhiteChars = True
666        self.mayReturnEmpty = False # used when checking for left-recursion
667        self.keepTabs = False
668        self.ignoreExprs = list()
669        self.debug = False
670        self.streamlined = False
671        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
672        self.errmsg = ""
673        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
674        self.debugActions = ( None, None, None ) #custom debug actions
675        self.re = None
676        self.callPreparse = True # used to avoid redundant calls to preParse
677        self.callDuringTry = False
678
679    def copy( self ):
680        """Make a copy of this ParserElement.  Useful for defining different parse actions
681           for the same parsing pattern, using copies of the original parse element."""
682        cpy = copy.copy( self )
683        cpy.parseAction = self.parseAction[:]
684        cpy.ignoreExprs = self.ignoreExprs[:]
685        if self.copyDefaultWhiteChars:
686            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
687        return cpy
688
689    def setName( self, name ):
690        """Define name for this expression, for use in debugging."""
691        self.name = name
692        self.errmsg = "Expected " + self.name
693        if hasattr(self,"exception"):
694            self.exception.msg = self.errmsg
695        return self
696
697    def setResultsName( self, name, listAllMatches=False ):
698        """Define name for referencing matching tokens as a nested attribute
699           of the returned parse results.
700           NOTE: this returns a *copy* of the original ParserElement object;
701           this is so that the client can define a basic element, such as an
702           integer, and reference it in multiple places with different names.
703        """
704        newself = self.copy()
705        newself.resultsName = name
706        newself.modalResults = not listAllMatches
707        return newself
708
709    def setBreak(self,breakFlag = True):
710        """Method to invoke the Python pdb debugger when this element is
711           about to be parsed. Set breakFlag to True to enable, False to
712           disable.
713        """
714        if breakFlag:
715            _parseMethod = self._parse
716            def breaker(instring, loc, doActions=True, callPreParse=True):
717                import pdb
718                pdb.set_trace()
719                _parseMethod( instring, loc, doActions, callPreParse )
720            breaker._originalParseMethod = _parseMethod
721            self._parse = breaker
722        else:
723            if hasattr(self._parse,"_originalParseMethod"):
724                self._parse = self._parse._originalParseMethod
725        return self
726
727    def _normalizeParseActionArgs( f ):
728        """Internal method used to decorate parse actions that take fewer than 3 arguments,
729           so that all parse actions can be called as f(s,l,t)."""
730        STAR_ARGS = 4
731
732        try:
733            restore = None
734            if isinstance(f,type):
735                restore = f
736                f = f.__init__
737            if not _PY3K:
738                codeObj = f.func_code
739            else:
740                codeObj = f.code
741            if codeObj.co_flags & STAR_ARGS:
742                return f
743            numargs = codeObj.co_argcount
744            if not _PY3K:
745                if hasattr(f,"im_self"):
746                    numargs -= 1
747            else:
748                if hasattr(f,"__self__"):
749                    numargs -= 1
750            if restore:
751                f = restore
752        except AttributeError:
753            try:
754                if not _PY3K:
755                    call_im_func_code = f.__call__.im_func.func_code
756                else:
757                    call_im_func_code = f.__code__
758
759                # not a function, must be a callable object, get info from the
760                # im_func binding of its bound __call__ method
761                if call_im_func_code.co_flags & STAR_ARGS:
762                    return f
763                numargs = call_im_func_code.co_argcount
764                if not _PY3K:
765                    if hasattr(f.__call__,"im_self"):
766                        numargs -= 1
767                else:
768                    if hasattr(f.__call__,"__self__"):
769                        numargs -= 0
770            except AttributeError:
771                if not _PY3K:
772                    call_func_code = f.__call__.func_code
773                else:
774                    call_func_code = f.__call__.__code__
775                # not a bound method, get info directly from __call__ method
776                if call_func_code.co_flags & STAR_ARGS:
777                    return f
778                numargs = call_func_code.co_argcount
779                if not _PY3K:
780                    if hasattr(f.__call__,"im_self"):
781                        numargs -= 1
782                else:
783                    if hasattr(f.__call__,"__self__"):
784                        numargs -= 1
785
786
787        #~ print ("adding function %s with %d args" % (f.func_name,numargs))
788        if numargs == 3:
789            return f
790        else:
791            if numargs > 3:
792                def tmp(s,l,t):
793                    return f(f.__call__.__self__, s,l,t)
794            if numargs == 2:
795                def tmp(s,l,t):
796                    return f(l,t)
797            elif numargs == 1:
798                def tmp(s,l,t):
799                    return f(t)
800            else: #~ numargs == 0:
801                def tmp(s,l,t):
802                    return f()
803            try:
804                tmp.__name__ = f.__name__
805            except (AttributeError,TypeError):
806                # no need for special handling if attribute doesnt exist
807                pass
808            try:
809                tmp.__doc__ = f.__doc__
810            except (AttributeError,TypeError):
811                # no need for special handling if attribute doesnt exist
812                pass
813            try:
814                tmp.__dict__.update(f.__dict__)
815            except (AttributeError,TypeError):
816                # no need for special handling if attribute doesnt exist
817                pass
818            return tmp
819    _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
820
821    def setParseAction( self, *fns, **kwargs ):
822        """Define action to perform when successfully matching parse element definition.
823           Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
824           fn(loc,toks), fn(toks), or just fn(), where:
825            - s   = the original string being parsed (see note below)
826            - loc = the location of the matching substring
827            - toks = a list of the matched tokens, packaged as a ParseResults object
828           If the functions in fns modify the tokens, they can return them as the return
829           value from fn, and the modified list of tokens will replace the original.
830           Otherwise, fn does not need to return any value.
831
832           Note: the default parsing behavior is to expand tabs in the input string
833           before starting the parsing process.  See L{I{parseString}<parseString>} for more information
834           on parsing strings containing <TAB>s, and suggested methods to maintain a
835           consistent view of the parsed string, the parse location, and line and column
836           positions within the parsed string.
837           """
838        self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
839        self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
840        return self
841
842    def addParseAction( self, *fns, **kwargs ):
843        """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
844        self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
845        self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
846        return self
847
848    def setFailAction( self, fn ):
849        """Define action to perform if parsing fails at this expression.
850           Fail acton fn is a callable function that takes the arguments
851           fn(s,loc,expr,err) where:
852            - s = string being parsed
853            - loc = location where expression match was attempted and failed
854            - expr = the parse expression that failed
855            - err = the exception thrown
856           The function returns no value.  It may throw ParseFatalException
857           if it is desired to stop parsing immediately."""
858        self.failAction = fn
859        return self
860
861    def _skipIgnorables( self, instring, loc ):
862        exprsFound = True
863        while exprsFound:
864            exprsFound = False
865            for e in self.ignoreExprs:
866                try:
867                    while 1:
868                        loc,dummy = e._parse( instring, loc )
869                        exprsFound = True
870                except ParseException:
871                    pass
872        return loc
873
874    def preParse( self, instring, loc ):
875        if self.ignoreExprs:
876            loc = self._skipIgnorables( instring, loc )
877
878        if self.skipWhitespace:
879            wt = self.whiteChars
880            instrlen = len(instring)
881            while loc < instrlen and instring[loc] in wt:
882                loc += 1
883
884        return loc
885
886    def parseImpl( self, instring, loc, doActions=True ):
887        return loc, []
888
889    def postParse( self, instring, loc, tokenlist ):
890        return tokenlist
891
892    #~ @profile
893    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
894        debugging = ( self.debug ) #and doActions )
895
896        if debugging or self.failAction:
897            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
898            if (self.debugActions[0] ):
899                self.debugActions[0]( instring, loc, self )
900            if callPreParse and self.callPreparse:
901                preloc = self.preParse( instring, loc )
902            else:
903                preloc = loc
904            tokensStart = loc
905            try:
906                try:
907                    loc,tokens = self.parseImpl( instring, preloc, doActions )
908                except IndexError:
909                    raise ParseException( instring, len(instring), self.errmsg, self )
910            except ParseBaseException, err:
911                #~ print ("Exception raised:", err)
912                if self.debugActions[2]:
913                    self.debugActions[2]( instring, tokensStart, self, err )
914                if self.failAction:
915                    self.failAction( instring, tokensStart, self, err )
916                raise
917        else:
918            if callPreParse and self.callPreparse:
919                preloc = self.preParse( instring, loc )
920            else:
921                preloc = loc
922            tokensStart = loc
923            if self.mayIndexError or loc >= len(instring):
924                try:
925                    loc,tokens = self.parseImpl( instring, preloc, doActions )
926                except IndexError:
927                    raise ParseException( instring, len(instring), self.errmsg, self )
928            else:
929                loc,tokens = self.parseImpl( instring, preloc, doActions )
930
931        tokens = self.postParse( instring, loc, tokens )
932
933        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
934        if self.parseAction and (doActions or self.callDuringTry):
935            if debugging:
936                try:
937                    for fn in self.parseAction:
938                        tokens = fn( instring, tokensStart, retTokens )
939                        if tokens is not None:
940                            retTokens = ParseResults( tokens,
941                                                      self.resultsName,
942                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
943                                                      modal=self.modalResults )
944                except ParseBaseException, err:
945                    #~ print "Exception raised in user parse action:", err
946                    if (self.debugActions[2] ):
947                        self.debugActions[2]( instring, tokensStart, self, err )
948                    raise
949            else:
950                for fn in self.parseAction:
951                    tokens = fn( instring, tokensStart, retTokens )
952                    if tokens is not None:
953                        retTokens = ParseResults( tokens,
954                                                  self.resultsName,
955                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
956                                                  modal=self.modalResults )
957
958        if debugging:
959            #~ print ("Matched",self,"->",retTokens.asList())
960            if (self.debugActions[1] ):
961                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
962
963        return loc, retTokens
964
965    def tryParse( self, instring, loc ):
966        try:
967            return self._parse( instring, loc, doActions=False )[0]
968        except ParseFatalException:
969            raise ParseException( instring, loc, self.errmsg, self)
970
971    # this method gets repeatedly called during backtracking with the same arguments -
972    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
973    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
974        lookup = (self,instring,loc,callPreParse,doActions)
975        if lookup in ParserElement._exprArgCache:
976            value = ParserElement._exprArgCache[ lookup ]
977            if isinstance(value,Exception):
978                raise value
979            return value
980        else:
981            try:
982                value = self._parseNoCache( instring, loc, doActions, callPreParse )
983                ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
984                return value
985            except ParseBaseException, pe:
986                ParserElement._exprArgCache[ lookup ] = pe
987                raise
988
989    _parse = _parseNoCache
990
991    # argument cache for optimizing repeated calls when backtracking through recursive expressions
992    _exprArgCache = {}
993    def resetCache():
994        ParserElement._exprArgCache.clear()
995    resetCache = staticmethod(resetCache)
996
997    _packratEnabled = False
998    def enablePackrat():
999        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1000           Repeated parse attempts at the same string location (which happens
1001           often in many complex grammars) can immediately return a cached value,
1002           instead of re-executing parsing/validating code.  Memoizing is done of
1003           both valid results and parsing exceptions.
1004
1005           This speedup may break existing programs that use parse actions that
1006           have side-effects.  For this reason, packrat parsing is disabled when
1007           you first import pyparsing.  To activate the packrat feature, your
1008           program must call the class method ParserElement.enablePackrat().  If
1009           your program uses psyco to "compile as you go", you must call
1010           enablePackrat before calling psyco.full().  If you do not do this,
1011           Python will crash.  For best results, call enablePackrat() immediately
1012           after importing pyparsing.
1013        """
1014        if not ParserElement._packratEnabled:
1015            ParserElement._packratEnabled = True
1016            ParserElement._parse = ParserElement._parseCache
1017    enablePackrat = staticmethod(enablePackrat)
1018
1019    def parseString( self, instring, parseAll=False ):
1020        """Execute the parse expression with the given string.
1021           This is the main interface to the client code, once the complete
1022           expression has been built.
1023
1024           If you want the grammar to require that the entire input string be
1025           successfully parsed, then set parseAll to True (equivalent to ending
1026           the grammar with StringEnd()).
1027
1028           Note: parseString implicitly calls expandtabs() on the input string,
1029           in order to report proper column numbers in parse actions.
1030           If the input string contains tabs and
1031           the grammar uses parse actions that use the loc argument to index into the
1032           string being parsed, you can ensure you have a consistent view of the input
1033           string by:
1034            - calling parseWithTabs on your grammar before calling parseString
1035              (see L{I{parseWithTabs}<parseWithTabs>})
1036            - define your parse action using the full (s,loc,toks) signature, and
1037              reference the input string using the parse action's s argument
1038            - explictly expand the tabs in your input string before calling
1039              parseString
1040        """
1041        ParserElement.resetCache()
1042        if not self.streamlined:
1043            self.streamline()
1044            #~ self.saveAsList = True
1045        for e in self.ignoreExprs:
1046            e.streamline()
1047        if not self.keepTabs:
1048            instring = instring.expandtabs()
1049        loc, tokens = self._parse( instring, 0 )
1050        if parseAll:
1051            StringEnd()._parse( instring, loc )
1052        return tokens
1053
1054    def scanString( self, instring, maxMatches=_MAX_INT ):
1055        """Scan the input string for expression matches.  Each match will return the
1056           matching tokens, start location, and end location.  May be called with optional
1057           maxMatches argument, to clip scanning after 'n' matches are found.
1058
1059           Note that the start and end locations are reported relative to the string
1060           being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1061           strings with embedded tabs."""
1062        if not self.streamlined:
1063            self.streamline()
1064        for e in self.ignoreExprs:
1065            e.streamline()
1066
1067        if not self.keepTabs:
1068            instring = _ustr(instring).expandtabs()
1069        instrlen = len(instring)
1070        loc = 0
1071        preparseFn = self.preParse
1072        parseFn = self._parse
1073        ParserElement.resetCache()
1074        matches = 0
1075        while loc <= instrlen and matches < maxMatches:
1076            try:
1077                preloc = preparseFn( instring, loc )
1078                nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1079            except ParseException:
1080                loc = preloc+1
1081            else:
1082                matches += 1
1083                yield tokens, preloc, nextLoc
1084                loc = nextLoc
1085
1086    def transformString( self, instring ):
1087        """Extension to scanString, to modify matching text with modified tokens that may
1088           be returned from a parse action.  To use transformString, define a grammar and
1089           attach a parse action to it that modifies the returned token list.
1090           Invoking transformString() on a target string will then scan for matches,
1091           and replace the matched text patterns according to the logic in the parse
1092           action.  transformString() returns the resulting transformed string."""
1093        out = []
1094        lastE = 0
1095        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1096        # keep string locs straight between transformString and scanString
1097        self.keepTabs = True
1098        for t,s,e in self.scanString( instring ):
1099            out.append( instring[lastE:s] )
1100            if t:
1101                if isinstance(t,ParseResults):
1102                    out += t.asList()
1103                elif isinstance(t,list):
1104                    out += t
1105                else:
1106                    out.append(t)
1107            lastE = e
1108        out.append(instring[lastE:])
1109        return "".join(map(_ustr,out))
1110
1111    def searchString( self, instring, maxMatches=_MAX_INT ):
1112        """Another extension to scanString, simplifying the access to the tokens found
1113           to match the given parse expression.  May be called with optional
1114           maxMatches argument, to clip searching after 'n' matches are found.
1115        """
1116        return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1117
1118    def __add__(self, other ):
1119        """Implementation of + operator - returns And"""
1120        if isinstance( other, basestring ):
1121            other = Literal( other )
1122        if not isinstance( other, ParserElement ):
1123            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1124                    SyntaxWarning, stacklevel=2)
1125            return None
1126        return And( [ self, other ] )
1127
1128    def __radd__(self, other ):
1129        """Implementation of + operator when left operand is not a ParserElement"""
1130        if isinstance( other, basestring ):
1131            other = Literal( other )
1132        if not isinstance( other, ParserElement ):
1133            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1134                    SyntaxWarning, stacklevel=2)
1135            return None
1136        return other + self
1137
1138    def __sub__(self, other):
1139        """Implementation of - operator, returns And with error stop"""
1140        if isinstance( other, basestring ):
1141            other = Literal( other )
1142        if not isinstance( other, ParserElement ):
1143            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1144                    SyntaxWarning, stacklevel=2)
1145            return None
1146        return And( [ self, And._ErrorStop(), other ] )
1147
1148    def __rsub__(self, other ):
1149        """Implementation of - operator when left operand is not a ParserElement"""
1150        if isinstance( other, basestring ):
1151            other = Literal( other )
1152        if not isinstance( other, ParserElement ):
1153            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1154                    SyntaxWarning, stacklevel=2)
1155            return None
1156        return other - self
1157
1158    def __mul__(self,other):
1159        if isinstance(other,int):
1160            minElements, optElements = other,0
1161        elif isinstance(other,tuple):
1162            if len(other)==0:
1163                other = (None,None)
1164            elif len(other)==1:
1165                other = (other[0],None)
1166            if len(other)==2:
1167                if other[0] is None:
1168                    other = (0, other[1])
1169                if isinstance(other[0],int) and other[1] is None:
1170                    if other[0] == 0:
1171                        return ZeroOrMore(self)
1172                    if other[0] == 1:
1173                        return OneOrMore(self)
1174                    else:
1175                        return self*other[0] + ZeroOrMore(self)
1176                elif isinstance(other[0],int) and isinstance(other[1],int):
1177                    minElements, optElements = other
1178                    optElements -= minElements
1179                else:
1180                    raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1181            else:
1182                raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
1183        else:
1184            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1185
1186        if minElements < 0:
1187            raise ValueError("cannot multiply ParserElement by negative value")
1188        if optElements < 0:
1189            raise ValueError("second tuple value must be greater or equal to first tuple value")
1190        if minElements == optElements == 0:
1191            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1192
1193        if (optElements):
1194            def makeOptionalList(n):
1195                if n>1:
1196                    return Optional(self + makeOptionalList(n-1))
1197                else:
1198                    return Optional(self)
1199            if minElements:
1200                if minElements == 1:
1201                    ret = self + makeOptionalList(optElements)
1202                else:
1203                    ret = And([self]*minElements) + makeOptionalList(optElements)
1204            else:
1205                ret = makeOptionalList(optElements)
1206        else:
1207            if minElements == 1:
1208                ret = self
1209            else:
1210                ret = And([self]*minElements)
1211        return ret
1212
1213    def __rmul__(self, other):
1214        return self.__mul__(other)
1215
1216    def __or__(self, other ):
1217        """Implementation of | operator - returns MatchFirst"""
1218        if isinstance( other, basestring ):
1219            other = Literal( other )
1220        if not isinstance( other, ParserElement ):
1221            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1222                    SyntaxWarning, stacklevel=2)
1223            return None
1224        return MatchFirst( [ self, other ] )
1225
1226    def __ror__(self, other ):
1227        """Implementation of | operator when left operand is not a ParserElement"""
1228        if isinstance( other, basestring ):
1229            other = Literal( other )
1230        if not isinstance( other, ParserElement ):
1231            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1232                    SyntaxWarning, stacklevel=2)
1233            return None
1234        return other | self
1235
1236    def __xor__(self, other ):
1237        """Implementation of ^ operator - returns Or"""
1238        if isinstance( other, basestring ):
1239            other = Literal( other )
1240        if not isinstance( other, ParserElement ):
1241            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1242                    SyntaxWarning, stacklevel=2)
1243            return None
1244        return Or( [ self, other ] )
1245
1246    def __rxor__(self, other ):
1247        """Implementation of ^ operator when left operand is not a ParserElement"""
1248        if isinstance( other, basestring ):
1249            other = Literal( other )
1250        if not isinstance( other, ParserElement ):
1251            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1252                    SyntaxWarning, stacklevel=2)
1253            return None
1254        return other ^ self
1255
1256    def __and__(self, other ):
1257        """Implementation of & operator - returns Each"""
1258        if isinstance( other, basestring ):
1259            other = Literal( other )
1260        if not isinstance( other, ParserElement ):
1261            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1262                    SyntaxWarning, stacklevel=2)
1263            return None
1264        return Each( [ self, other ] )
1265
1266    def __rand__(self, other ):
1267        """Implementation of & operator when left operand is not a ParserElement"""
1268        if isinstance( other, basestring ):
1269            other = Literal( other )
1270        if not isinstance( other, ParserElement ):
1271            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1272                    SyntaxWarning, stacklevel=2)
1273            return None
1274        return other & self
1275
1276    def __invert__( self ):
1277        """Implementation of ~ operator - returns NotAny"""
1278        return NotAny( self )
1279
1280    def __call__(self, name):
1281        """Shortcut for setResultsName, with listAllMatches=default::
1282             userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1283           could be written as::
1284             userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1285           """
1286        return self.setResultsName(name)
1287
1288    def suppress( self ):
1289        """Suppresses the output of this ParserElement; useful to keep punctuation from
1290           cluttering up returned output.
1291        """
1292        return Suppress( self )
1293
1294    def leaveWhitespace( self ):
1295        """Disables the skipping of whitespace before matching the characters in the
1296           ParserElement's defined pattern.  This is normally only used internally by
1297           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1298        """
1299        self.skipWhitespace = False
1300        return self
1301
1302    def setWhitespaceChars( self, chars ):
1303        """Overrides the default whitespace chars
1304        """
1305        self.skipWhitespace = True
1306        self.whiteChars = chars
1307        self.copyDefaultWhiteChars = False
1308        return self
1309
1310    def parseWithTabs( self ):
1311        """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1312           Must be called before parseString when the input grammar contains elements that
1313           match <TAB> characters."""
1314        self.keepTabs = True
1315        return self
1316
1317    def ignore( self, other ):
1318        """Define expression to be ignored (e.g., comments) while doing pattern
1319           matching; may be called repeatedly, to define multiple comment or other
1320           ignorable patterns.
1321        """
1322        if isinstance( other, Suppress ):
1323            if other not in self.ignoreExprs:
1324                self.ignoreExprs.append( other )
1325        else:
1326            self.ignoreExprs.append( Suppress( other ) )
1327        return self
1328
1329    def setDebugActions( self, startAction, successAction, exceptionAction ):
1330        """Enable display of debugging messages while doing pattern matching."""
1331        self.debugActions = (startAction or _defaultStartDebugAction,
1332                             successAction or _defaultSuccessDebugAction,
1333                             exceptionAction or _defaultExceptionDebugAction)
1334        self.debug = True
1335        return self
1336
1337    def setDebug( self, flag=True ):
1338        """Enable display of debugging messages while doing pattern matching.
1339           Set flag to True to enable, False to disable."""
1340        if flag:
1341            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1342        else:
1343            self.debug = False
1344        return self
1345
1346    def __str__( self ):
1347        return self.name
1348
1349    def __repr__( self ):
1350        return _ustr(self)
1351
1352    def streamline( self ):
1353        self.streamlined = True
1354        self.strRepr = None
1355        return self
1356
1357    def checkRecursion( self, parseElementList ):
1358        pass
1359
1360    def validate( self, validateTrace=[] ):
1361        """Check defined expressions for valid structure, check for infinite recursive definitions."""
1362        self.checkRecursion( [] )
1363
1364    def parseFile( self, file_or_filename ):
1365        """Execute the parse expression on the given file or filename.
1366           If a filename is specified (instead of a file object),
1367           the entire file is opened, read, and closed before parsing.
1368        """
1369        try:
1370            file_contents = file_or_filename.read()
1371        except AttributeError:
1372            f = open(file_or_filename, "rb")
1373            file_contents = f.read()
1374            f.close()
1375        return self.parseString(file_contents)
1376
1377    def getException(self):
1378        return ParseException("",0,self.errmsg,self)
1379
1380    def __getattr__(self,aname):
1381        if aname == "myException":
1382            self.myException = ret = self.getException();
1383            return ret;
1384        else:
1385            raise AttributeError("no such attribute " + aname)
1386
1387    def __eq__(self,other):
1388        if isinstance(other, basestring):
1389            try:
1390                (self + StringEnd()).parseString(_ustr(other))
1391                return True
1392            except ParseBaseException:
1393                return False
1394        else:
1395            return super(ParserElement,self)==other
1396
1397    def __hash__(self):
1398        return hash(id(self))
1399
1400    def __req__(self,other):
1401        return self == other
1402
1403
1404class Token(ParserElement):
1405    """Abstract ParserElement subclass, for defining atomic matching patterns."""
1406    def __init__( self ):
1407        super(Token,self).__init__( savelist=False )
1408        #self.myException = ParseException("",0,"",self)
1409
1410    def setName(self, name):
1411        s = super(Token,self).setName(name)
1412        self.errmsg = "Expected " + self.name
1413        #s.myException.msg = self.errmsg
1414        return s
1415
1416
1417class Empty(Token):
1418    """An empty token, will always match."""
1419    def __init__( self ):
1420        super(Empty,self).__init__()
1421        self.name = "Empty"
1422        self.mayReturnEmpty = True
1423        self.mayIndexError = False
1424
1425
1426class NoMatch(Token):
1427    """A token that will never match."""
1428    def __init__( self ):
1429        super(NoMatch,self).__init__()
1430        self.name = "NoMatch"
1431        self.mayReturnEmpty = True
1432        self.mayIndexError = False
1433        self.errmsg = "Unmatchable token"
1434        #self.myException.msg = self.errmsg
1435
1436    def parseImpl( self, instring, loc, doActions=True ):
1437        exc = self.myException
1438        exc.loc = loc
1439        exc.pstr = instring
1440        raise exc
1441
1442
1443class Literal(Token):
1444    """Token to exactly match a specified string."""
1445    def __init__( self, matchString ):
1446        super(Literal,self).__init__()
1447        self.match = matchString
1448        self.matchLen = len(matchString)
1449        try:
1450            self.firstMatchChar = matchString[0]
1451        except IndexError:
1452            warnings.warn("null string passed to Literal; use Empty() instead",
1453                            SyntaxWarning, stacklevel=2)
1454            self.__class__ = Empty
1455        self.name = '"%s"' % _ustr(self.match)
1456        self.errmsg = "Expected " + self.name
1457        self.mayReturnEmpty = False
1458        #self.myException.msg = self.errmsg
1459        self.mayIndexError = False
1460
1461    # Performance tuning: this routine gets called a *lot*
1462    # if this is a single character match string  and the first character matches,
1463    # short-circuit as quickly as possible, and avoid calling startswith
1464    #~ @profile
1465    def parseImpl( self, instring, loc, doActions=True ):
1466        if (instring[loc] == self.firstMatchChar and
1467            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1468            return loc+self.matchLen, self.match
1469        #~ raise ParseException( instring, loc, self.errmsg )
1470        exc = self.myException
1471        exc.loc = loc
1472        exc.pstr = instring
1473        raise exc
1474_L = Literal
1475
1476class Keyword(Token):
1477    """Token to exactly match a specified string as a keyword, that is, it must be
1478       immediately followed by a non-keyword character.  Compare with Literal::
1479         Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1480         Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1481       Accepts two optional constructor arguments in addition to the keyword string:
1482       identChars is a string of characters that would be valid identifier characters,
1483       defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1484       matching, default is False.
1485    """
1486    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1487
1488    def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1489        super(Keyword,self).__init__()
1490        self.match = matchString
1491        self.matchLen = len(matchString)
1492        try:
1493            self.firstMatchChar = matchString[0]
1494        except IndexError:
1495            warnings.warn("null string passed to Keyword; use Empty() instead",
1496                            SyntaxWarning, stacklevel=2)
1497        self.name = '"%s"' % self.match
1498        self.errmsg = "Expected " + self.name
1499        self.mayReturnEmpty = False
1500        #self.myException.msg = self.errmsg
1501        self.mayIndexError = False
1502        self.caseless = caseless
1503        if caseless:
1504            self.caselessmatch = matchString.upper()
1505            identChars = identChars.upper()
1506        self.identChars = _str2dict(identChars)
1507
1508    def parseImpl( self, instring, loc, doActions=True ):
1509        if self.caseless:
1510            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1511                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1512                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1513                return loc+self.matchLen, self.match
1514        else:
1515            if (instring[loc] == self.firstMatchChar and
1516                (self.matchLen==1 or instring.startswith(self.match,loc)) and
1517                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1518                (loc == 0 or instring[loc-1] not in self.identChars) ):
1519                return loc+self.matchLen, self.match
1520        #~ raise ParseException( instring, loc, self.errmsg )
1521        exc = self.myException
1522        exc.loc = loc
1523        exc.pstr = instring
1524        raise exc
1525
1526    def copy(self):
1527        c = super(Keyword,self).copy()
1528        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1529        return c
1530
1531    def setDefaultKeywordChars( chars ):
1532        """Overrides the default Keyword chars
1533        """
1534        Keyword.DEFAULT_KEYWORD_CHARS = chars
1535    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1536
1537
1538class CaselessLiteral(Literal):
1539    """Token to match a specified string, ignoring case of letters.
1540       Note: the matched results will always be in the case of the given
1541       match string, NOT the case of the input text.
1542    """
1543    def __init__( self, matchString ):
1544        super(CaselessLiteral,self).__init__( matchString.upper() )
1545        # Preserve the defining literal.
1546        self.returnString = matchString
1547        self.name = "'%s'" % self.returnString
1548        self.errmsg = "Expected " + self.name
1549        #self.myException.msg = self.errmsg
1550
1551    def parseImpl( self, instring, loc, doActions=True ):
1552        if instring[ loc:loc+self.matchLen ].upper() == self.match:
1553            return loc+self.matchLen, self.returnString
1554        #~ raise ParseException( instring, loc, self.errmsg )
1555        exc = self.myException
1556        exc.loc = loc
1557        exc.pstr = instring
1558        raise exc
1559
1560class CaselessKeyword(Keyword):
1561    def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1562        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1563
1564    def parseImpl( self, instring, loc, doActions=True ):
1565        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1566             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1567            return loc+self.matchLen, self.match
1568        #~ raise ParseException( instring, loc, self.errmsg )
1569        exc = self.myException
1570        exc.loc = loc
1571        exc.pstr = instring
1572        raise exc
1573
1574class Word(Token):
1575    """Token for matching words composed of allowed character sets.
1576       Defined with string containing all allowed initial characters,
1577       an optional string containing allowed body characters (if omitted,
1578       defaults to the initial character set), and an optional minimum,
1579       maximum, and/or exact length.  The default value for min is 1 (a
1580       minimum value < 1 is not valid); the default values for max and exact
1581       are 0, meaning no maximum or exact length restriction.
1582    """
1583    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1584        super(Word,self).__init__()
1585        self.initCharsOrig = initChars
1586        self.initChars = _str2dict(initChars)
1587        if bodyChars :
1588            self.bodyCharsOrig = bodyChars
1589            self.bodyChars = _str2dict(bodyChars)
1590        else:
1591            self.bodyCharsOrig = initChars
1592            self.bodyChars = _str2dict(initChars)
1593
1594        self.maxSpecified = max > 0
1595
1596        if min < 1:
1597            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1598
1599        self.minLen = min
1600
1601        if max > 0:
1602            self.maxLen = max
1603        else:
1604            self.maxLen = _MAX_INT
1605
1606        if exact > 0:
1607            self.maxLen = exact
1608            self.minLen = exact
1609
1610        self.name = _ustr(self)
1611        self.errmsg = "Expected " + self.name
1612        #self.myException.msg = self.errmsg
1613        self.mayIndexError = False
1614        self.asKeyword = asKeyword
1615
1616        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1617            if self.bodyCharsOrig == self.initCharsOrig:
1618                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1619            elif len(self.bodyCharsOrig) == 1:
1620                self.reString = "%s[%s]*" % \
1621                                      (re.escape(self.initCharsOrig),
1622                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1623            else:
1624                self.reString = "[%s][%s]*" % \
1625                                      (_escapeRegexRangeChars(self.initCharsOrig),
1626                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1627            if self.asKeyword:
1628                self.reString = r"\b"+self.reString+r"\b"
1629            try:
1630                self.re = re.compile( self.reString )
1631            except:
1632                self.re = None
1633
1634    def parseImpl( self, instring, loc, doActions=True ):
1635        if self.re:
1636            result = self.re.match(instring,loc)
1637            if not result:
1638                exc = self.myException
1639                exc.loc = loc
1640                exc.pstr = instring
1641                raise exc
1642
1643            loc = result.end()
1644            return loc,result.group()
1645
1646        if not(instring[ loc ] in self.initChars):
1647            #~ raise ParseException( instring, loc, self.errmsg )
1648            exc = self.myException
1649            exc.loc = loc
1650            exc.pstr = instring
1651            raise exc
1652        start = loc
1653        loc += 1
1654        instrlen = len(instring)
1655        bodychars = self.bodyChars
1656        maxloc = start + self.maxLen
1657        maxloc = min( maxloc, instrlen )
1658        while loc < maxloc and instring[loc] in bodychars:
1659            loc += 1
1660
1661        throwException = False
1662        if loc - start < self.minLen:
1663            throwException = True
1664        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1665            throwException = True
1666        if self.asKeyword:
1667            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1668                throwException = True
1669
1670        if throwException:
1671            #~ raise ParseException( instring, loc, self.errmsg )
1672            exc = self.myException
1673            exc.loc = loc
1674            exc.pstr = instring
1675            raise exc
1676
1677        return loc, instring[start:loc]
1678
1679    def __str__( self ):
1680        try:
1681            return super(Word,self).__str__()
1682        except:
1683            pass
1684
1685
1686        if self.strRepr is None:
1687
1688            def charsAsStr(s):
1689                if len(s)>4:
1690                    return s[:4]+"..."
1691                else:
1692                    return s
1693
1694            if ( self.initCharsOrig != self.bodyCharsOrig ):
1695                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1696            else:
1697                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1698
1699        return self.strRepr
1700
1701
1702class Regex(Token):
1703    """Token for matching strings that match a given regular expression.
1704       Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1705    """
1706    def __init__( self, pattern, flags=0):
1707        """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1708        super(Regex,self).__init__()
1709
1710        if len(pattern) == 0:
1711            warnings.warn("null string passed to Regex; use Empty() instead",
1712                    SyntaxWarning, stacklevel=2)
1713
1714        self.pattern = pattern
1715        self.flags = flags
1716
1717        try:
1718            self.re = re.compile(self.pattern, self.flags)
1719            self.reString = self.pattern
1720        except sre_constants.error:
1721            warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1722                SyntaxWarning, stacklevel=2)
1723            raise
1724
1725        self.name = _ustr(self)
1726        self.errmsg = "Expected " + self.name
1727        #self.myException.msg = self.errmsg
1728        self.mayIndexError = False
1729        self.mayReturnEmpty = True
1730
1731    def parseImpl( self, instring, loc, doActions=True ):
1732        result = self.re.match(instring,loc)
1733        if not result:
1734            exc = self.myException
1735            exc.loc = loc
1736            exc.pstr = instring
1737            raise exc
1738
1739        loc = result.end()
1740        d = result.groupdict()
1741        ret = ParseResults(result.group())
1742        if d:
1743            for k in d:
1744                ret[k] = d[k]
1745        return loc,ret
1746
1747    def __str__( self ):
1748        try:
1749            return super(Regex,self).__str__()
1750        except:
1751            pass
1752
1753        if self.strRepr is None:
1754            self.strRepr = "Re:(%s)" % repr(self.pattern)
1755
1756        return self.strRepr
1757
1758
1759class QuotedString(Token):
1760    """Token for matching strings that are delimited by quoting characters.
1761    """
1762    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1763        """
1764           Defined with the following parameters:
1765            - quoteChar - string of one or more characters defining the quote delimiting string
1766            - escChar - character to escape quotes, typically backslash (default=None)
1767            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1768            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1769            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1770            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1771        """
1772        super(QuotedString,self).__init__()
1773
1774        # remove white space from quote chars - wont work anyway
1775        quoteChar = quoteChar.strip()
1776        if len(quoteChar) == 0:
1777            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1778            raise SyntaxError()
1779
1780        if endQuoteChar is None:
1781            endQuoteChar = quoteChar
1782        else:
1783            endQuoteChar = endQuoteChar.strip()
1784            if len(endQuoteChar) == 0:
1785                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1786                raise SyntaxError()
1787
1788        self.quoteChar = quoteChar
1789        self.quoteCharLen = len(quoteChar)
1790        self.firstQuoteChar = quoteChar[0]
1791        self.endQuoteChar = endQuoteChar
1792        self.endQuoteCharLen = len(endQuoteChar)
1793        self.escChar = escChar
1794        self.escQuote = escQuote
1795        self.unquoteResults = unquoteResults
1796
1797        if multiline:
1798            self.flags = re.MULTILINE | re.DOTALL
1799            self.pattern = r'%s(?:[^%s%s]' % \
1800                ( re.escape(self.quoteChar),
1801                  _escapeRegexRangeChars(self.endQuoteChar[0]),
1802                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1803        else:
1804            self.flags = 0
1805            self.pattern = r'%s(?:[^%s\n\r%s]' % \
1806                ( re.escape(self.quoteChar),
1807                  _escapeRegexRangeChars(self.endQuoteChar[0]),
1808                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1809        if len(self.endQuoteChar) > 1:
1810            self.pattern += (
1811                '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1812                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
1813                                    for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1814                )
1815        if escQuote:
1816            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1817        if escChar:
1818            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1819            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1820        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1821
1822        try:
1823            self.re = re.compile(self.pattern, self.flags)
1824            self.reString = self.pattern
1825        except sre_constants.error:
1826            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1827                SyntaxWarning, stacklevel=2)
1828            raise
1829
1830        self.name = _ustr(self)
1831        self.errmsg = "Expected " + self.name
1832        #self.myException.msg = self.errmsg
1833        self.mayIndexError = False
1834        self.mayReturnEmpty = True
1835
1836    def parseImpl( self, instring, loc, doActions=True ):
1837        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1838        if not result:
1839            exc = self.myException
1840            exc.loc = loc
1841            exc.pstr = instring
1842            raise exc
1843
1844        loc = result.end()
1845        ret = result.group()
1846
1847        if self.unquoteResults:
1848
1849            # strip off quotes
1850            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1851
1852            if isinstance(ret,basestring):
1853                # replace escaped characters
1854                if self.escChar:
1855                    ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1856
1857                # replace escaped quotes
1858                if self.escQuote:
1859                    ret = ret.replace(self.escQuote, self.endQuoteChar)
1860
1861        return loc, ret
1862
1863    def __str__( self ):
1864        try:
1865            return super(QuotedString,self).__str__()
1866        except:
1867            pass
1868
1869        if self.strRepr is None:
1870            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1871
1872        return self.strRepr
1873
1874
1875class CharsNotIn(Token):
1876    """Token for matching words composed of characters *not* in a given set.
1877       Defined with string containing all disallowed characters, and an optional
1878       minimum, maximum, and/or exact length.  The default value for min is 1 (a
1879       minimum value < 1 is not valid); the default values for max and exact
1880       are 0, meaning no maximum or exact length restriction.
1881    """
1882    def __init__( self, notChars, min=1, max=0, exact=0 ):
1883        super(CharsNotIn,self).__init__()
1884        self.skipWhitespace = False
1885        self.notChars = notChars
1886
1887        if min < 1:
1888            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1889
1890        self.minLen = min
1891
1892        if max > 0:
1893            self.maxLen = max
1894        else:
1895            self.maxLen = _MAX_INT
1896
1897        if exact > 0:
1898            self.maxLen = exact
1899            self.minLen = exact
1900
1901        self.name = _ustr(self)
1902        self.errmsg = "Expected " + self.name
1903        self.mayReturnEmpty = ( self.minLen == 0 )
1904        #self.myException.msg = self.errmsg
1905        self.mayIndexError = False
1906
1907    def parseImpl( self, instring, loc, doActions=True ):
1908        if instring[loc] in self.notChars:
1909            #~ raise ParseException( instring, loc, self.errmsg )
1910            exc = self.myException
1911            exc.loc = loc
1912            exc.pstr = instring
1913            raise exc
1914
1915        start = loc
1916        loc += 1
1917        notchars = self.notChars
1918        maxlen = min( start+self.maxLen, len(instring) )
1919        while loc < maxlen and \
1920              (instring[loc] not in notchars):
1921            loc += 1
1922
1923        if loc - start < self.minLen:
1924            #~ raise ParseException( instring, loc, self.errmsg )
1925            exc = self.myException
1926            exc.loc = loc
1927            exc.pstr = instring
1928            raise exc
1929
1930        return loc, instring[start:loc]
1931
1932    def __str__( self ):
1933        try:
1934            return super(CharsNotIn, self).__str__()
1935        except:
1936            pass
1937
1938        if self.strRepr is None:
1939            if len(self.notChars) > 4:
1940                self.strRepr = "!W:(%s...)" % self.notChars[:4]
1941            else:
1942                self.strRepr = "!W:(%s)" % self.notChars
1943
1944        return self.strRepr
1945
1946class White(Token):
1947    """Special matching class for matching whitespace.  Normally, whitespace is ignored
1948       by pyparsing grammars.  This class is included when some whitespace structures
1949       are significant.  Define with a string containing the whitespace characters to be
1950       matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
1951       as defined for the Word class."""
1952    whiteStrs = {
1953        " " : "<SPC>",
1954        "\t": "<TAB>",
1955        "\n": "<LF>",
1956        "\r": "<CR>",
1957        "\f": "<FF>",
1958        }
1959    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1960        super(White,self).__init__()
1961        self.matchWhite = ws
1962        self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1963        #~ self.leaveWhitespace()
1964        self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1965        self.mayReturnEmpty = True
1966        self.errmsg = "Expected " + self.name
1967        #self.myException.msg = self.errmsg
1968
1969        self.minLen = min
1970
1971        if max > 0:
1972            self.maxLen = max
1973        else:
1974            self.maxLen = _MAX_INT
1975
1976        if exact > 0:
1977            self.maxLen = exact
1978            self.minLen = exact
1979
1980    def parseImpl( self, instring, loc, doActions=True ):
1981        if not(instring[ loc ] in self.matchWhite):
1982            #~ raise ParseException( instring, loc, self.errmsg )
1983            exc = self.myException
1984            exc.loc = loc
1985            exc.pstr = instring
1986            raise exc
1987        start = loc
1988        loc += 1
1989        maxloc = start + self.maxLen
1990        maxloc = min( maxloc, len(instring) )
1991        while loc < maxloc and instring[loc] in self.matchWhite:
1992            loc += 1
1993
1994        if loc - start < self.minLen:
1995            #~ raise ParseException( instring, loc, self.errmsg )
1996            exc = self.myException
1997            exc.loc = loc
1998            exc.pstr = instring
1999            raise exc
2000
2001        return loc, instring[start:loc]
2002
2003
2004class _PositionToken(Token):
2005    def __init__( self ):
2006        super(_PositionToken,self).__init__()
2007        self.name=self.__class__.__name__
2008        self.mayReturnEmpty = True
2009        self.mayIndexError = False
2010
2011class GoToColumn(_PositionToken):
2012    """Token to advance to a specific column of input text; useful for tabular report scraping."""
2013    def __init__( self, colno ):
2014        super(GoToColumn,self).__init__()
2015        self.col = colno
2016
2017    def preParse( self, instring, loc ):
2018        if col(loc,instring) != self.col:
2019            instrlen = len(instring)
2020            if self.ignoreExprs:
2021                loc = self._skipIgnorables( instring, loc )
2022            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2023                loc += 1
2024        return loc
2025
2026    def parseImpl( self, instring, loc, doActions=True ):
2027        thiscol = col( loc, instring )
2028        if thiscol > self.col:
2029            raise ParseException( instring, loc, "Text not in expected column", self )
2030        newloc = loc + self.col - thiscol
2031        ret = instring[ loc: newloc ]
2032        return newloc, ret
2033
2034class LineStart(_PositionToken):
2035    """Matches if current position is at the beginning of a line within the parse string"""
2036    def __init__( self ):
2037        super(LineStart,self).__init__()
2038        self.setWhitespaceChars( " \t" )
2039        self.errmsg = "Expected start of line"
2040        #self.myException.msg = self.errmsg
2041
2042    def preParse( self, instring, loc ):
2043        preloc = super(LineStart,self).preParse(instring,loc)
2044        if instring[preloc] == "\n":
2045            loc += 1
2046        return loc
2047
2048    def parseImpl( self, instring, loc, doActions=True ):
2049        if not( loc==0 or
2050            (loc == self.preParse( instring, 0 )) or
2051            (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2052            #~ raise ParseException( instring, loc, "Expected start of line" )
2053            exc = self.myException
2054            exc.loc = loc
2055            exc.pstr = instring
2056            raise exc
2057        return loc, []
2058
2059class LineEnd(_PositionToken):
2060    """Matches if current position is at the end of a line within the parse string"""
2061    def __init__( self ):
2062        super(LineEnd,self).__init__()
2063        self.setWhitespaceChars( " \t" )
2064        self.errmsg = "Expected end of line"
2065        #self.myException.msg = self.errmsg
2066
2067    def parseImpl( self, instring, loc, doActions=True ):
2068        if loc<len(instring):
2069            if instring[loc] == "\n":
2070                return loc+1, "\n"
2071            else:
2072                #~ raise ParseException( instring, loc, "Expected end of line" )
2073                exc = self.myException
2074                exc.loc = loc
2075                exc.pstr = instring
2076                raise exc
2077        elif loc == len(instring):
2078            return loc+1, []
2079        else:
2080            exc = self.myException
2081            exc.loc = loc
2082            exc.pstr = instring
2083            raise exc
2084
2085class StringStart(_PositionToken):
2086    """Matches if current position is at the beginning of the parse string"""
2087    def __init__( self ):
2088        super(StringStart,self).__init__()
2089        self.errmsg = "Expected start of text"
2090        #self.myException.msg = self.errmsg
2091
2092    def parseImpl( self, instring, loc, doActions=True ):
2093        if loc != 0:
2094            # see if entire string up to here is just whitespace and ignoreables
2095            if loc != self.preParse( instring, 0 ):
2096                #~ raise ParseException( instring, loc, "Expected start of text" )
2097                exc = self.myException
2098                exc.loc = loc
2099                exc.pstr = instring
2100                raise exc
2101        return loc, []
2102
2103class StringEnd(_PositionToken):
2104    """Matches if current position is at the end of the parse string"""
2105    def __init__( self ):
2106        super(StringEnd,self).__init__()
2107        self.errmsg = "Expected end of text"
2108        #self.myException.msg = self.errmsg
2109
2110    def parseImpl( self, instring, loc, doActions=True ):
2111        if loc < len(instring):
2112            #~ raise ParseException( instring, loc, "Expected end of text" )
2113            exc = self.myException
2114            exc.loc = loc
2115            exc.pstr = instring
2116            raise exc
2117        elif loc == len(instring):
2118            return loc+1, []
2119        elif loc > len(instring):
2120            return loc, []
2121        else:
2122            exc = self.myException
2123            exc.loc = loc
2124            exc.pstr = instring
2125            raise exc
2126
2127class WordStart(_PositionToken):
2128    """Matches if the current position is at the beginning of a Word, and
2129       is not preceded by any character in a given set of wordChars
2130       (default=printables). To emulate the \b behavior of regular expressions,
2131       use WordStart(alphanums). WordStart will also match at the beginning of
2132       the string being parsed, or at the beginning of a line.
2133    """
2134    def __init__(self, wordChars = printables):
2135        super(WordStart,self).__init__()
2136        self.wordChars = _str2dict(wordChars)
2137        self.errmsg = "Not at the start of a word"
2138
2139    def parseImpl(self, instring, loc, doActions=True ):
2140        if loc != 0:
2141            if (instring[loc-1] in self.wordChars or
2142                instring[loc] not in self.wordChars):
2143                exc = self.myException
2144                exc.loc = loc
2145                exc.pstr = instring
2146                raise exc
2147        return loc, []
2148
2149class WordEnd(_PositionToken):
2150    """Matches if the current position is at the end of a Word, and
2151       is not followed by any character in a given set of wordChars
2152       (default=printables). To emulate the \b behavior of regular expressions,
2153       use WordEnd(alphanums). WordEnd will also match at the end of
2154       the string being parsed, or at the end of a line.
2155    """
2156    def __init__(self, wordChars = printables):
2157        super(WordEnd,self).__init__()
2158        self.wordChars = _str2dict(wordChars)
2159        self.skipWhitespace = False
2160        self.errmsg = "Not at the end of a word"
2161
2162    def parseImpl(self, instring, loc, doActions=True ):
2163        instrlen = len(instring)
2164        if instrlen>0 and loc<instrlen:
2165            if (instring[loc] in self.wordChars or
2166                instring[loc-1] not in self.wordChars):
2167                #~ raise ParseException( instring, loc, "Expected end of word" )
2168                exc = self.myException
2169                exc.loc = loc
2170                exc.pstr = instring
2171                raise exc
2172        return loc, []
2173
2174
2175class ParseExpression(ParserElement):
2176    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2177    def __init__( self, exprs, savelist = False ):
2178        super(ParseExpression,self).__init__(savelist)
2179        if isinstance( exprs, list ):
2180            self.exprs = exprs
2181        elif isinstance( exprs, basestring ):
2182            self.exprs = [ Literal( exprs ) ]
2183        else:
2184            self.exprs = [ exprs ]
2185        self.callPreparse = False
2186
2187    def __getitem__( self, i ):
2188        return self.exprs[i]
2189
2190    def append( self, other ):
2191        self.exprs.append( other )
2192        self.strRepr = None
2193        return self
2194
2195    def leaveWhitespace( self ):
2196        """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2197           all contained expressions."""
2198        self.skipWhitespace = False
2199        self.exprs = [ e.copy() for e in self.exprs ]
2200        for e in self.exprs:
2201            e.leaveWhitespace()
2202        return self
2203
2204    def ignore( self, other ):
2205        if isinstance( other, Suppress ):
2206            if other not in self.ignoreExprs:
2207                super( ParseExpression, self).ignore( other )
2208                for e in self.exprs:
2209                    e.ignore( self.ignoreExprs[-1] )
2210        else:
2211            super( ParseExpression, self).ignore( other )
2212            for e in self.exprs:
2213                e.ignore( self.ignoreExprs[-1] )
2214        return self
2215
2216    def __str__( self ):
2217        try:
2218            return super(ParseExpression,self).__str__()
2219        except:
2220            pass
2221
2222        if self.strRepr is None:
2223            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2224        return self.strRepr
2225
2226    def streamline( self ):
2227        super(ParseExpression,self).streamline()
2228
2229        for e in self.exprs:
2230            e.streamline()
2231
2232        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2233        # but only if there are no parse actions or resultsNames on the nested And's
2234        # (likewise for Or's and MatchFirst's)
2235        if ( len(self.exprs) == 2 ):
2236            other = self.exprs[0]
2237            if ( isinstance( other, self.__class__ ) and
2238                  not(other.parseAction) and
2239                  other.resultsName is None and
2240                  not other.debug ):
2241                self.exprs = other.exprs[:] + [ self.exprs[1] ]
2242                self.strRepr = None
2243                self.mayReturnEmpty |= other.mayReturnEmpty
2244                self.mayIndexError  |= other.mayIndexError
2245
2246            other = self.exprs[-1]
2247            if ( isinstance( other, self.__class__ ) and
2248                  not(other.parseAction) and
2249                  other.resultsName is None and
2250                  not other.debug ):
2251                self.exprs = self.exprs[:-1] + other.exprs[:]
2252                self.strRepr = None
2253                self.mayReturnEmpty |= other.mayReturnEmpty
2254                self.mayIndexError  |= other.mayIndexError
2255
2256        return self
2257
2258    def setResultsName( self, name, listAllMatches=False ):
2259        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2260        return ret
2261
2262    def validate( self, validateTrace=[] ):
2263        tmp = validateTrace[:]+[self]
2264        for e in self.exprs:
2265            e.validate(tmp)
2266        self.checkRecursion( [] )
2267
2268class And(ParseExpression):
2269    """Requires all given ParseExpressions to be found in the given order.
2270       Expressions may be separated by whitespace.
2271       May be constructed using the '+' operator.
2272    """
2273
2274    class _ErrorStop(Empty):
2275        def __new__(cls,*args,**kwargs):
2276            return And._ErrorStop.instance
2277    _ErrorStop.instance = Empty()
2278    _ErrorStop.instance.leaveWhitespace()
2279
2280    def __init__( self, exprs, savelist = True ):
2281        super(And,self).__init__(exprs, savelist)
2282        self.mayReturnEmpty = True
2283        for e in self.exprs:
2284            if not e.mayReturnEmpty:
2285                self.mayReturnEmpty = False
2286                break
2287        self.setWhitespaceChars( exprs[0].whiteChars )
2288        self.skipWhitespace = exprs[0].skipWhitespace
2289        self.callPreparse = True
2290
2291    def parseImpl( self, instring, loc, doActions=True ):
2292        # pass False as last arg to _parse for first element, since we already
2293        # pre-parsed the string as part of our And pre-parsing
2294        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2295        errorStop = False
2296        for e in self.exprs[1:]:
2297            if e is And._ErrorStop.instance:
2298                errorStop = True
2299                continue
2300            if errorStop:
2301                try:
2302                    loc, exprtokens = e._parse( instring, loc, doActions )
2303                except ParseBaseException, pe:
2304                    raise ParseSyntaxException(pe)
2305                except IndexError, ie:
2306                    raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2307            else:
2308                loc, exprtokens = e._parse( instring, loc, doActions )
2309            if exprtokens or exprtokens.keys():
2310                resultlist += exprtokens
2311        return loc, resultlist
2312
2313    def __iadd__(self, other ):
2314        if isinstance( other, basestring ):
2315            other = Literal( other )
2316        return self.append( other ) #And( [ self, other ] )
2317
2318    def checkRecursion( self, parseElementList ):
2319        subRecCheckList = parseElementList[:] + [ self ]
2320        for e in self.exprs:
2321            e.checkRecursion( subRecCheckList )
2322            if not e.mayReturnEmpty:
2323                break
2324
2325    def __str__( self ):
2326        if hasattr(self,"name"):
2327            return self.name
2328
2329        if self.strRepr is None:
2330            self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2331
2332        return self.strRepr
2333
2334
2335class Or(ParseExpression):
2336    """Requires that at least one ParseExpression is found.
2337       If two expressions match, the expression that matches the longest string will be used.
2338       May be constructed using the '^' operator.
2339    """
2340    def __init__( self, exprs, savelist = False ):
2341        super(Or,self).__init__(exprs, savelist)
2342        self.mayReturnEmpty = False
2343        for e in self.exprs:
2344            if e.mayReturnEmpty:
2345                self.mayReturnEmpty = True
2346                break
2347
2348    def parseImpl( self, instring, loc, doActions=True ):
2349        maxExcLoc = -1
2350        maxMatchLoc = -1
2351        maxException = None
2352        for e in self.exprs:
2353            try:
2354                loc2 = e.tryParse( instring, loc )
2355            except ParseException, err:
2356                if err.loc > maxExcLoc:
2357                    maxException = err
2358                    maxExcLoc = err.loc
2359            except IndexError:
2360                if len(instring) > maxExcLoc:
2361                    maxException = ParseException(instring,len(instring),e.errmsg,self)
2362                    maxExcLoc = len(instring)
2363            else:
2364                if loc2 > maxMatchLoc:
2365                    maxMatchLoc = loc2
2366                    maxMatchExp = e
2367
2368        if maxMatchLoc < 0:
2369            if maxException is not None:
2370                raise maxException
2371            else:
2372                raise ParseException(instring, loc, "no defined alternatives to match", self)
2373
2374        return maxMatchExp._parse( instring, loc, doActions )
2375
2376    def __ixor__(self, other ):
2377        if isinstance( other, basestring ):
2378            other = Literal( other )
2379        return self.append( other ) #Or( [ self, other ] )
2380
2381    def __str__( self ):
2382        if hasattr(self,"name"):
2383            return self.name
2384
2385        if self.strRepr is None:
2386            self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2387
2388        return self.strRepr
2389
2390    def checkRecursion( self, parseElementList ):
2391        subRecCheckList = parseElementList[:] + [ self ]
2392        for e in self.exprs:
2393            e.checkRecursion( subRecCheckList )
2394
2395
2396class MatchFirst(ParseExpression):
2397    """Requires that at least one ParseExpression is found.
2398       If two expressions match, the first one listed is the one that will match.
2399       May be constructed using the '|' operator.
2400    """
2401    def __init__( self, exprs, savelist = False ):
2402        super(MatchFirst,self).__init__(exprs, savelist)
2403        if exprs:
2404            self.mayReturnEmpty = False
2405            for e in self.exprs:
2406                if e.mayReturnEmpty:
2407                    self.mayReturnEmpty = True
2408                    break
2409        else:
2410            self.mayReturnEmpty = True
2411
2412    def parseImpl( self, instring, loc, doActions=True ):
2413        maxExcLoc = -1
2414        maxException = None
2415        for e in self.exprs:
2416            try:
2417                ret = e._parse( instring, loc, doActions )
2418                return ret
2419            except ParseException, err:
2420                if err.loc > maxExcLoc:
2421                    maxException = err
2422                    maxExcLoc = err.loc
2423            except IndexError:
2424                if len(instring) > maxExcLoc:
2425                    maxException = ParseException(instring,len(instring),e.errmsg,self)
2426                    maxExcLoc = len(instring)
2427
2428        # only got here if no expression matched, raise exception for match that made it the furthest
2429        else:
2430            if maxException is not None:
2431                raise maxException
2432            else:
2433                raise ParseException(instring, loc, "no defined alternatives to match", self)
2434
2435    def __ior__(self, other ):
2436        if isinstance( other, basestring ):
2437            other = Literal( other )
2438        return self.append( other ) #MatchFirst( [ self, other ] )
2439
2440    def __str__( self ):
2441        if hasattr(self,"name"):
2442            return self.name
2443
2444        if self.strRepr is None:
2445            self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2446
2447        return self.strRepr
2448
2449    def checkRecursion( self, parseElementList ):
2450        subRecCheckList = parseElementList[:] + [ self ]
2451        for e in self.exprs:
2452            e.checkRecursion( subRecCheckList )
2453
2454
2455class Each(ParseExpression):
2456    """Requires all given ParseExpressions to be found, but in any order.
2457       Expressions may be separated by whitespace.
2458       May be constructed using the '&' operator.
2459    """
2460    def __init__( self, exprs, savelist = True ):
2461        super(Each,self).__init__(exprs, savelist)
2462        self.mayReturnEmpty = True
2463        for e in self.exprs:
2464            if not e.mayReturnEmpty:
2465                self.mayReturnEmpty = False
2466                break
2467        self.skipWhitespace = True
2468        self.initExprGroups = True
2469
2470    def parseImpl( self, instring, loc, doActions=True ):
2471        if self.initExprGroups:
2472            self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2473            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2474            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2475            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2476            self.required += self.multirequired
2477            self.initExprGroups = False
2478        tmpLoc = loc
2479        tmpReqd = self.required[:]
2480        tmpOpt  = self.optionals[:]
2481        matchOrder = []
2482
2483        keepMatching = True
2484        while keepMatching:
2485            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2486            failed = []
2487            for e in tmpExprs:
2488                try:
2489                    tmpLoc = e.tryParse( instring, tmpLoc )
2490                except ParseException:
2491                    failed.append(e)
2492                else:
2493                    matchOrder.append(e)
2494                    if e in tmpReqd:
2495                        tmpReqd.remove(e)
2496                    elif e in tmpOpt:
2497                        tmpOpt.remove(e)
2498            if len(failed) == len(tmpExprs):
2499                keepMatching = False
2500
2501        if tmpReqd:
2502            missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2503            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2504
2505        # add any unmatched Optionals, in case they have default values defined
2506        matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2507
2508        resultlist = []
2509        for e in matchOrder:
2510            loc,results = e._parse(instring,loc,doActions)
2511            resultlist.append(results)
2512
2513        finalResults = ParseResults([])
2514        for r in resultlist:
2515            dups = {}
2516            for k in r.keys():
2517                if k in finalResults.keys():
2518                    tmp = ParseResults(finalResults[k])
2519                    tmp += ParseResults(r[k])
2520                    dups[k] = tmp
2521            finalResults += ParseResults(r)
2522            for k,v in dups.items():
2523                finalResults[k] = v
2524        return loc, finalResults
2525
2526    def __str__( self ):
2527        if hasattr(self,"name"):
2528            return self.name
2529
2530        if self.strRepr is None:
2531            self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2532
2533        return self.strRepr
2534
2535    def checkRecursion( self, parseElementList ):
2536        subRecCheckList = parseElementList[:] + [ self ]
2537        for e in self.exprs:
2538            e.checkRecursion( subRecCheckList )
2539
2540
2541class ParseElementEnhance(ParserElement):
2542    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2543    def __init__( self, expr, savelist=False ):
2544        super(ParseElementEnhance,self).__init__(savelist)
2545        if isinstance( expr, basestring ):
2546            expr = Literal(expr)
2547        self.expr = expr
2548        self.strRepr = None
2549        if expr is not None:
2550            self.mayIndexError = expr.mayIndexError
2551            self.mayReturnEmpty = expr.mayReturnEmpty
2552            self.setWhitespaceChars( expr.whiteChars )
2553            self.skipWhitespace = expr.skipWhitespace
2554            self.saveAsList = expr.saveAsList
2555            self.callPreparse = expr.callPreparse
2556            self.ignoreExprs.extend(expr.ignoreExprs)
2557
2558    def parseImpl( self, instring, loc, doActions=True ):
2559        if self.expr is not None:
2560            return self.expr._parse( instring, loc, doActions, callPreParse=False )
2561        else:
2562            raise ParseException("",loc,self.errmsg,self)
2563
2564    def leaveWhitespace( self ):
2565        self.skipWhitespace = False
2566        self.expr = self.expr.copy()
2567        if self.expr is not None:
2568            self.expr.leaveWhitespace()
2569        return self
2570
2571    def ignore( self, other ):
2572        if isinstance( other, Suppress ):
2573            if other not in self.ignoreExprs:
2574                super( ParseElementEnhance, self).ignore( other )
2575                if self.expr is not None:
2576                    self.expr.ignore( self.ignoreExprs[-1] )
2577        else:
2578            super( ParseElementEnhance, self).ignore( other )
2579            if self.expr is not None:
2580                self.expr.ignore( self.ignoreExprs[-1] )
2581        return self
2582
2583    def streamline( self ):
2584        super(ParseElementEnhance,self).streamline()
2585        if self.expr is not None:
2586            self.expr.streamline()
2587        return self
2588
2589    def checkRecursion( self, parseElementList ):
2590        if self in parseElementList:
2591            raise RecursiveGrammarException( parseElementList+[self] )
2592        subRecCheckList = parseElementList[:] + [ self ]
2593        if self.expr is not None:
2594            self.expr.checkRecursion( subRecCheckList )
2595
2596    def validate( self, validateTrace=[] ):
2597        tmp = validateTrace[:]+[self]
2598        if self.expr is not None:
2599            self.expr.validate(tmp)
2600        self.checkRecursion( [] )
2601
2602    def __str__( self ):
2603        try:
2604            return super(ParseElementEnhance,self).__str__()
2605        except:
2606            pass
2607
2608        if self.strRepr is None and self.expr is not None:
2609            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2610        return self.strRepr
2611
2612
2613class FollowedBy(ParseElementEnhance):
2614    """Lookahead matching of the given parse expression.  FollowedBy
2615    does *not* advance the parsing position within the input string, it only
2616    verifies that the specified parse expression matches at the current
2617    position.  FollowedBy always returns a null token list."""
2618    def __init__( self, expr ):
2619        super(FollowedBy,self).__init__(expr)
2620        self.mayReturnEmpty = True
2621
2622    def parseImpl( self, instring, loc, doActions=True ):
2623        self.expr.tryParse( instring, loc )
2624        return loc, []
2625
2626
2627class NotAny(ParseElementEnhance):
2628    """Lookahead to disallow matching with the given parse expression.  NotAny
2629    does *not* advance the parsing position within the input string, it only
2630    verifies that the specified parse expression does *not* match at the current
2631    position.  Also, NotAny does *not* skip over leading whitespace. NotAny
2632    always returns a null token list.  May be constructed using the '~' operator."""
2633    def __init__( self, expr ):
2634        super(NotAny,self).__init__(expr)
2635        #~ self.leaveWhitespace()
2636        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2637        self.mayReturnEmpty = True
2638        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2639        #self.myException = ParseException("",0,self.errmsg,self)
2640
2641    def parseImpl( self, instring, loc, doActions=True ):
2642        try:
2643            self.expr.tryParse( instring, loc )
2644        except (ParseException,IndexError):
2645            pass
2646        else:
2647            #~ raise ParseException(instring, loc, self.errmsg )
2648            exc = self.myException
2649            exc.loc = loc
2650            exc.pstr = instring
2651            raise exc
2652        return loc, []
2653
2654    def __str__( self ):
2655        if hasattr(self,"name"):
2656            return self.name
2657
2658        if self.strRepr is None:
2659            self.strRepr = "~{" + _ustr(self.expr) + "}"
2660
2661        return self.strRepr
2662
2663
2664class ZeroOrMore(ParseElementEnhance):
2665    """Optional repetition of zero or more of the given expression."""
2666    def __init__( self, expr ):
2667        super(ZeroOrMore,self).__init__(expr)
2668        self.mayReturnEmpty = True
2669
2670    def parseImpl( self, instring, loc, doActions=True ):
2671        tokens = []
2672        try:
2673            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2674            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2675            while 1:
2676                if hasIgnoreExprs:
2677                    preloc = self._skipIgnorables( instring, loc )
2678                else:
2679                    preloc = loc
2680                loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2681                if tmptokens or tmptokens.keys():
2682                    tokens += tmptokens
2683        except (ParseException,IndexError):
2684            pass
2685
2686        return loc, tokens
2687
2688    def __str__( self ):
2689        if hasattr(self,"name"):
2690            return self.name
2691
2692        if self.strRepr is None:
2693            self.strRepr = "[" + _ustr(self.expr) + "]..."
2694
2695        return self.strRepr
2696
2697    def setResultsName( self, name, listAllMatches=False ):
2698        ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2699        ret.saveAsList = True
2700        return ret
2701
2702
2703class OneOrMore(ParseElementEnhance):
2704    """Repetition of one or more of the given expression."""
2705    def parseImpl( self, instring, loc, doActions=True ):
2706        # must be at least one
2707        loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2708        try:
2709            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2710            while 1:
2711                if hasIgnoreExprs:
2712                    preloc = self._skipIgnorables( instring, loc )
2713                else:
2714                    preloc = loc
2715                loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2716                if tmptokens or tmptokens.keys():
2717                    tokens += tmptokens
2718        except (ParseException,IndexError):
2719            pass
2720
2721        return loc, tokens
2722
2723    def __str__( self ):
2724        if hasattr(self,"name"):
2725            return self.name
2726
2727        if self.strRepr is None:
2728            self.strRepr = "{" + _ustr(self.expr) + "}..."
2729
2730        return self.strRepr
2731
2732    def setResultsName( self, name, listAllMatches=False ):
2733        ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2734        ret.saveAsList = True
2735        return ret
2736
2737class _NullToken(object):
2738    def __bool__(self):
2739        return False
2740    __nonzero__ = __bool__
2741    def __str__(self):
2742        return ""
2743
2744_optionalNotMatched = _NullToken()
2745class Optional(ParseElementEnhance):
2746    """Optional matching of the given expression.
2747       A default return string can also be specified, if the optional expression
2748       is not found.
2749    """
2750    def __init__( self, exprs, default=_optionalNotMatched ):
2751        super(Optional,self).__init__( exprs, savelist=False )
2752        self.defaultValue = default
2753        self.mayReturnEmpty = True
2754
2755    def parseImpl( self, instring, loc, doActions=True ):
2756        try:
2757            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2758        except (ParseException,IndexError):
2759            if self.defaultValue is not _optionalNotMatched:
2760                if self.expr.resultsName:
2761                    tokens = ParseResults([ self.defaultValue ])
2762                    tokens[self.expr.resultsName] = self.defaultValue
2763                else:
2764                    tokens = [ self.defaultValue ]
2765            else:
2766                tokens = []
2767        return loc, tokens
2768
2769    def __str__( self ):
2770        if hasattr(self,"name"):
2771            return self.name
2772
2773        if self.strRepr is None:
2774            self.strRepr = "[" + _ustr(self.expr) + "]"
2775
2776        return self.strRepr
2777
2778
2779class SkipTo(ParseElementEnhance):
2780    """Token for skipping over all undefined text until the matched expression is found.
2781       If include is set to true, the matched expression is also consumed.  The ignore
2782       argument is used to define grammars (typically quoted strings and comments) that
2783       might contain false matches.
2784    """
2785    def __init__( self, other, include=False, ignore=None ):
2786        super( SkipTo, self ).__init__( other )
2787        if ignore is not None:
2788            self.expr = self.expr.copy()
2789            self.expr.ignore(ignore)
2790        self.mayReturnEmpty = True
2791        self.mayIndexError = False
2792        self.includeMatch = include
2793        self.asList = False
2794        self.errmsg = "No match found for "+_ustr(self.expr)
2795        #self.myException = ParseException("",0,self.errmsg,self)
2796
2797    def parseImpl( self, instring, loc, doActions=True ):
2798        startLoc = loc
2799        instrlen = len(instring)
2800        expr = self.expr
2801        while loc <= instrlen:
2802            try:
2803                loc = expr._skipIgnorables( instring, loc )
2804                expr._parse( instring, loc, doActions=False, callPreParse=False )
2805                if self.includeMatch:
2806                    skipText = instring[startLoc:loc]
2807                    loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2808                    if mat:
2809                        skipRes = ParseResults( skipText )
2810                        skipRes += mat
2811                        return loc, [ skipRes ]
2812                    else:
2813                        return loc, [ skipText ]
2814                else:
2815                    return loc, [ instring[startLoc:loc] ]
2816            except (ParseException,IndexError):
2817                loc += 1
2818        exc = self.myException
2819        exc.loc = loc
2820        exc.pstr = instring
2821        raise exc
2822
2823class Forward(ParseElementEnhance):
2824    """Forward declaration of an expression to be defined later -
2825       used for recursive grammars, such as algebraic infix notation.
2826       When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2827
2828       Note: take care when assigning to Forward not to overlook precedence of operators.
2829       Specifically, '|' has a lower precedence than '<<', so that::
2830          fwdExpr << a | b | c
2831       will actually be evaluated as::
2832          (fwdExpr << a) | b | c
2833       thereby leaving b and c out as parseable alternatives.  It is recommended that you
2834       explicitly group the values inserted into the Forward::
2835          fwdExpr << (a | b | c)
2836    """
2837    def __init__( self, other=None ):
2838        super(Forward,self).__init__( other, savelist=False )
2839
2840    def __lshift__( self, other ):
2841        if isinstance( other, basestring ):
2842            other = Literal(other)
2843        self.expr = other
2844        self.mayReturnEmpty = other.mayReturnEmpty
2845        self.strRepr = None
2846        self.mayIndexError = self.expr.mayIndexError
2847        self.mayReturnEmpty = self.expr.mayReturnEmpty
2848        self.setWhitespaceChars( self.expr.whiteChars )
2849        self.skipWhitespace = self.expr.skipWhitespace
2850        self.saveAsList = self.expr.saveAsList
2851        self.ignoreExprs.extend(self.expr.ignoreExprs)
2852        return None
2853
2854    def leaveWhitespace( self ):
2855        self.skipWhitespace = False
2856        return self
2857
2858    def streamline( self ):
2859        if not self.streamlined:
2860            self.streamlined = True
2861            if self.expr is not None:
2862                self.expr.streamline()
2863        return self
2864
2865    def validate( self, validateTrace=[] ):
2866        if self not in validateTrace:
2867            tmp = validateTrace[:]+[self]
2868            if self.expr is not None:
2869                self.expr.validate(tmp)
2870        self.checkRecursion([])
2871
2872    def __str__( self ):
2873        if hasattr(self,"name"):
2874            return self.name
2875
2876        self.__class__ = _ForwardNoRecurse
2877        try:
2878            if self.expr is not None:
2879                retString = _ustr(self.expr)
2880            else:
2881                retString = "None"
2882        finally:
2883            self.__class__ = Forward
2884        return "Forward: "+retString
2885
2886    def copy(self):
2887        if self.expr is not None:
2888            return super(Forward,self).copy()
2889        else:
2890            ret = Forward()
2891            ret << self
2892            return ret
2893
2894class _ForwardNoRecurse(Forward):
2895    def __str__( self ):
2896        return "..."
2897
2898class TokenConverter(ParseElementEnhance):
2899    """Abstract subclass of ParseExpression, for converting parsed results."""
2900    def __init__( self, expr, savelist=False ):
2901        super(TokenConverter,self).__init__( expr )#, savelist )
2902        self.saveAsList = False
2903
2904class Upcase(TokenConverter):
2905    """Converter to upper case all matching tokens."""
2906    def __init__(self, *args):
2907        super(Upcase,self).__init__(*args)
2908        warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2909                       DeprecationWarning,stacklevel=2)
2910
2911    def postParse( self, instring, loc, tokenlist ):
2912        return list(map( string.upper, tokenlist ))
2913
2914
2915class Combine(TokenConverter):
2916    """Converter to concatenate all matching tokens to a single string.
2917       By default, the matching patterns must also be contiguous in the input string;
2918       this can be disabled by specifying 'adjacent=False' in the constructor.
2919    """
2920    def __init__( self, expr, joinString="", adjacent=True ):
2921        super(Combine,self).__init__( expr )
2922        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2923        if adjacent:
2924            self.leaveWhitespace()
2925        self.adjacent = adjacent
2926        self.skipWhitespace = True
2927        self.joinString = joinString
2928
2929    def ignore( self, other ):
2930        if self.adjacent:
2931            ParserElement.ignore(self, other)
2932        else:
2933            super( Combine, self).ignore( other )
2934        return self
2935
2936    def postParse( self, instring, loc, tokenlist ):
2937        retToks = tokenlist.copy()
2938        del retToks[:]
2939        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2940
2941        if self.resultsName and len(retToks.keys())>0:
2942            return [ retToks ]
2943        else:
2944            return retToks
2945
2946class Group(TokenConverter):
2947    """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2948    def __init__( self, expr ):
2949        super(Group,self).__init__( expr )
2950        self.saveAsList = True
2951
2952    def postParse( self, instring, loc, tokenlist ):
2953        return [ tokenlist ]
2954
2955class Dict(TokenConverter):
2956    """Converter to return a repetitive expression as a list, but also as a dictionary.
2957       Each element can also be referenced using the first token in the expression as its key.
2958       Useful for tabular report scraping when the first column can be used as a item key.
2959    """
2960    def __init__( self, exprs ):
2961        super(Dict,self).__init__( exprs )
2962        self.saveAsList = True
2963
2964    def postParse( self, instring, loc, tokenlist ):
2965        for i,tok in enumerate(tokenlist):
2966            if len(tok) == 0:
2967                continue
2968            ikey = tok[0]
2969            if isinstance(ikey,int):
2970                ikey = _ustr(tok[0]).strip()
2971            if len(tok)==1:
2972                tokenlist[ikey] = _ParseResultsWithOffset("",i)
2973            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2974                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
2975            else:
2976                dictvalue = tok.copy() #ParseResults(i)
2977                del dictvalue[0]
2978                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2979                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
2980                else:
2981                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
2982
2983        if self.resultsName:
2984            return [ tokenlist ]
2985        else:
2986            return tokenlist
2987
2988
2989class Suppress(TokenConverter):
2990    """Converter for ignoring the results of a parsed expression."""
2991    def postParse( self, instring, loc, tokenlist ):
2992        return []
2993
2994    def suppress( self ):
2995        return self
2996
2997
2998class OnlyOnce(object):
2999    """Wrapper for parse actions, to ensure they are only called once."""
3000    def __init__(self, methodCall):
3001        self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3002        self.called = False
3003    def __call__(self,s,l,t):
3004        if not self.called:
3005            results = self.callable(s,l,t)
3006            self.called = True
3007            return results
3008        raise ParseException(s,l,"")
3009    def reset(self):
3010        self.called = False
3011
3012def traceParseAction(f):
3013    """Decorator for debugging parse actions."""
3014    f = ParserElement._normalizeParseActionArgs(f)
3015    def z(*paArgs):
3016        thisFunc = f.func_name
3017        s,l,t = paArgs[-3:]
3018        if len(paArgs)>3:
3019            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3020        sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3021        try:
3022            ret = f(*paArgs)
3023        except Exception, exc:
3024            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3025            raise
3026        sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3027        return ret
3028    try:
3029        z.__name__ = f.__name__
3030    except AttributeError:
3031        pass
3032    return z
3033
3034#
3035# global helpers
3036#
3037def delimitedList( expr, delim=",", combine=False ):
3038    """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3039       By default, the list elements and delimiters can have intervening whitespace, and
3040       comments, but this can be overridden by passing 'combine=True' in the constructor.
3041       If combine is set to True, the matching tokens are returned as a single token
3042       string, with the delimiters included; otherwise, the matching tokens are returned
3043       as a list of tokens, with the delimiters suppressed.
3044    """
3045    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3046    if combine:
3047        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3048    else:
3049        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3050
3051def countedArray( expr ):
3052    """Helper to define a counted list of expressions.
3053       This helper defines a pattern of the form::
3054           integer expr expr expr...
3055       where the leading integer tells how many expr expressions follow.
3056       The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3057    """
3058    arrayExpr = Forward()
3059    def countFieldParseAction(s,l,t):
3060        n = int(t[0])
3061        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3062        return []
3063    return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3064
3065def _flatten(L):
3066    if type(L) is not list: return [L]
3067    if L == []: return L
3068    return _flatten(L[0]) + _flatten(L[1:])
3069
3070def matchPreviousLiteral(expr):
3071    """Helper to define an expression that is indirectly defined from
3072       the tokens matched in a previous expression, that is, it looks
3073       for a 'repeat' of a previous expression.  For example::
3074           first = Word(nums)
3075           second = matchPreviousLiteral(first)
3076           matchExpr = first + ":" + second
3077       will match "1:1", but not "1:2".  Because this matches a
3078       previous literal, will also match the leading "1:1" in "1:10".
3079       If this is not desired, use matchPreviousExpr.
3080       Do *not* use with packrat parsing enabled.
3081    """
3082    rep = Forward()
3083    def copyTokenToRepeater(s,l,t):
3084        if t:
3085            if len(t) == 1:
3086                rep << t[0]
3087            else:
3088                # flatten t tokens
3089                tflat = _flatten(t.asList())
3090                rep << And( [ Literal(tt) for tt in tflat ] )
3091        else:
3092            rep << Empty()
3093    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3094    return rep
3095
3096def matchPreviousExpr(expr):
3097    """Helper to define an expression that is indirectly defined from
3098       the tokens matched in a previous expression, that is, it looks
3099       for a 'repeat' of a previous expression.  For example::
3100           first = Word(nums)
3101           second = matchPreviousExpr(first)
3102           matchExpr = first + ":" + second
3103       will match "1:1", but not "1:2".  Because this matches by
3104       expressions, will *not* match the leading "1:1" in "1:10";
3105       the expressions are evaluated first, and then compared, so
3106       "1" is compared with "10".
3107       Do *not* use with packrat parsing enabled.
3108    """
3109    rep = Forward()
3110    e2 = expr.copy()
3111    rep << e2
3112    def copyTokenToRepeater(s,l,t):
3113        matchTokens = _flatten(t.asList())
3114        def mustMatchTheseTokens(s,l,t):
3115            theseTokens = _flatten(t.asList())
3116            if  theseTokens != matchTokens:
3117                raise ParseException("",0,"")
3118        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3119    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3120    return rep
3121
3122def _escapeRegexRangeChars(s):
3123    #~  escape these chars: ^-]
3124    for c in r"\^-]":
3125        s = s.replace(c,"\\"+c)
3126    s = s.replace("\n",r"\n")
3127    s = s.replace("\t",r"\t")
3128    return _ustr(s)
3129
3130def oneOf( strs, caseless=False, useRegex=True ):
3131    """Helper to quickly define a set of alternative Literals, and makes sure to do
3132       longest-first testing when there is a conflict, regardless of the input order,
3133       but returns a MatchFirst for best performance.
3134
3135       Parameters:
3136        - strs - a string of space-delimited literals, or a list of string literals
3137        - caseless - (default=False) - treat all literals as caseless
3138        - useRegex - (default=True) - as an optimization, will generate a Regex
3139          object; otherwise, will generate a MatchFirst object (if caseless=True, or
3140          if creating a Regex raises an exception)
3141    """
3142    if caseless:
3143        isequal = ( lambda a,b: a.upper() == b.upper() )
3144        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3145        parseElementClass = CaselessLiteral
3146    else:
3147        isequal = ( lambda a,b: a == b )
3148        masks = ( lambda a,b: b.startswith(a) )
3149        parseElementClass = Literal
3150
3151    if isinstance(strs,(list,tuple)):
3152        symbols = strs[:]
3153    elif isinstance(strs,basestring):
3154        symbols = strs.split()
3155    else:
3156        warnings.warn("Invalid argument to oneOf, expected string or list",
3157                SyntaxWarning, stacklevel=2)
3158
3159    i = 0
3160    while i < len(symbols)-1:
3161        cur = symbols[i]
3162        for j,other in enumerate(symbols[i+1:]):
3163            if ( isequal(other, cur) ):
3164                del symbols[i+j+1]
3165                break
3166            elif ( masks(cur, other) ):
3167                del symbols[i+j+1]
3168                symbols.insert(i,other)
3169                cur = other
3170                break
3171        else:
3172            i += 1
3173
3174    if not caseless and useRegex:
3175        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3176        try:
3177            if len(symbols)==len("".join(symbols)):
3178                return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3179            else:
3180                return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3181        except:
3182            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3183                    SyntaxWarning, stacklevel=2)
3184
3185
3186    # last resort, just use MatchFirst
3187    return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3188
3189def dictOf( key, value ):
3190    """Helper to easily and clearly define a dictionary by specifying the respective patterns
3191       for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
3192       in the proper order.  The key pattern can include delimiting markers or punctuation,
3193       as long as they are suppressed, thereby leaving the significant key text.  The value
3194       pattern can include named results, so that the Dict results can include named token
3195       fields.
3196    """
3197    return Dict( ZeroOrMore( Group ( key + value ) ) )
3198
3199# convenience constants for positional expressions
3200empty       = Empty().setName("empty")
3201lineStart   = LineStart().setName("lineStart")
3202lineEnd     = LineEnd().setName("lineEnd")
3203stringStart = StringStart().setName("stringStart")
3204stringEnd   = StringEnd().setName("stringEnd")
3205
3206_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3207_printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
3208_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3209_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3210_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3211_charRange = Group(_singleChar + Suppress("-") + _singleChar)
3212_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3213
3214_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3215
3216def srange(s):
3217    r"""Helper to easily define string ranges for use in Word construction.  Borrows
3218       syntax from regexp '[]' string range definitions::
3219          srange("[0-9]")   -> "0123456789"
3220          srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
3221          srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3222       The input string must be enclosed in []'s, and the returned string is the expanded
3223       character set joined into a single string.
3224       The values enclosed in the []'s may be::
3225          a single character
3226          an escaped character with a leading backslash (such as \- or \])
3227          an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3228          an escaped octal character with a leading '\0' (\041, which is a '!' character)
3229          a range of any of the above, separated by a dash ('a-z', etc.)
3230          any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3231    """
3232    try:
3233        return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3234    except:
3235        return ""
3236
3237def matchOnlyAtCol(n):
3238    """Helper method for defining parse actions that require matching at a specific
3239       column in the input text.
3240    """
3241    def verifyCol(strg,locn,toks):
3242        if col(locn,strg) != n:
3243            raise ParseException(strg,locn,"matched token not at column %d" % n)
3244    return verifyCol
3245
3246def replaceWith(replStr):
3247    """Helper method for common parse actions that simply return a literal value.  Especially
3248       useful when used with transformString().
3249    """
3250    def _replFunc(*args):
3251        return [replStr]
3252    return _replFunc
3253
3254def removeQuotes(s,l,t):
3255    """Helper parse action for removing quotation marks from parsed quoted strings.
3256       To use, add this parse action to quoted string using::
3257         quotedString.setParseAction( removeQuotes )
3258    """
3259    return t[0][1:-1]
3260
3261def upcaseTokens(s,l,t):
3262    """Helper parse action to convert tokens to upper case."""
3263    return [ tt.upper() for tt in map(_ustr,t) ]
3264
3265def downcaseTokens(s,l,t):
3266    """Helper parse action to convert tokens to lower case."""
3267    return [ tt.lower() for tt in map(_ustr,t) ]
3268
3269def keepOriginalText(s,startLoc,t):
3270    """Helper parse action to preserve original parsed text,
3271       overriding any nested parse actions."""
3272    try:
3273        endloc = getTokensEndLoc()
3274    except ParseException:
3275        raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3276    del t[:]
3277    t += ParseResults(s[startLoc:endloc])
3278    return t
3279
3280def getTokensEndLoc():
3281    """Method to be called from within a parse action to determine the end
3282       location of the parsed tokens."""
3283    import inspect
3284    fstack = inspect.stack()
3285    try:
3286        # search up the stack (through intervening argument normalizers) for correct calling routine
3287        for f in fstack[2:]:
3288            if f[3] == "_parseNoCache":
3289                endloc = f[0].f_locals["loc"]
3290                return endloc
3291        else:
3292            raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3293    finally:
3294        del fstack
3295
3296def _makeTags(tagStr, xml):
3297    """Internal helper to construct opening and closing tag expressions, given a tag name"""
3298    if isinstance(tagStr,basestring):
3299        resname = tagStr
3300        tagStr = Keyword(tagStr, caseless=not xml)
3301    else:
3302        resname = tagStr.name
3303
3304    tagAttrName = Word(alphas,alphanums+"_-:")
3305    if (xml):
3306        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3307        openTag = Suppress("<") + tagStr + \
3308                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3309                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3310    else:
3311        printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3312        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3313        openTag = Suppress("<") + tagStr + \
3314                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3315                Optional( Suppress("=") + tagAttrValue ) ))) + \
3316                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3317    closeTag = Combine(_L("</") + tagStr + ">")
3318
3319    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3320    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3321
3322    return openTag, closeTag
3323
3324def makeHTMLTags(tagStr):
3325    """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3326    return _makeTags( tagStr, False )
3327
3328def makeXMLTags(tagStr):
3329    """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3330    return _makeTags( tagStr, True )
3331
3332def withAttribute(*args,**attrDict):
3333    """Helper to create a validating parse action to be used with start tags created
3334       with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3335       with a required attribute value, to avoid false matches on common tags such as
3336       <TD> or <DIV>.
3337
3338       Call withAttribute with a series of attribute names and values. Specify the list
3339       of filter attributes names and values as:
3340        - keyword arguments, as in (class="Customer",align="right"), or
3341        - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3342       For attribute names with a namespace prefix, you must use the second form.  Attribute
3343       names are matched insensitive to upper/lower case.
3344
3345       To verify that the attribute exists, but without specifying a value, pass
3346       withAttribute.ANY_VALUE as the value.
3347       """
3348    if args:
3349        attrs = args[:]
3350    else:
3351        attrs = attrDict.items()
3352    attrs = [(k,v) for k,v in attrs]
3353    def pa(s,l,tokens):
3354        for attrName,attrValue in attrs:
3355            if attrName not in tokens:
3356                raise ParseException(s,l,"no matching attribute " + attrName)
3357            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3358                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3359                                            (attrName, tokens[attrName], attrValue))
3360    return pa
3361withAttribute.ANY_VALUE = object()
3362
3363opAssoc = _Constants()
3364opAssoc.LEFT = object()
3365opAssoc.RIGHT = object()
3366
3367def operatorPrecedence( baseExpr, opList ):
3368    """Helper method for constructing grammars of expressions made up of
3369       operators working in a precedence hierarchy.  Operators may be unary or
3370       binary, left- or right-associative.  Parse actions can also be attached
3371       to operator expressions.
3372
3373       Parameters:
3374        - baseExpr - expression representing the most basic element for the nested
3375        - opList - list of tuples, one for each operator precedence level in the
3376          expression grammar; each tuple is of the form
3377          (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3378           - opExpr is the pyparsing expression for the operator;
3379              may also be a string, which will be converted to a Literal;
3380              if numTerms is 3, opExpr is a tuple of two expressions, for the
3381              two operators separating the 3 terms
3382           - numTerms is the number of terms for this operator (must
3383              be 1, 2, or 3)
3384           - rightLeftAssoc is the indicator whether the operator is
3385              right or left associative, using the pyparsing-defined
3386              constants opAssoc.RIGHT and opAssoc.LEFT.
3387           - parseAction is the parse action to be associated with
3388              expressions matching this operator expression (the
3389              parse action tuple member may be omitted)
3390    """
3391    ret = Forward()
3392    lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3393    for i,operDef in enumerate(opList):
3394        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3395        if arity == 3:
3396            if opExpr is None or len(opExpr) != 2:
3397                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3398            opExpr1, opExpr2 = opExpr
3399        thisExpr = Forward()#.setName("expr%d" % i)
3400        if rightLeftAssoc == opAssoc.LEFT:
3401            if arity == 1:
3402                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3403            elif arity == 2:
3404                if opExpr is not None:
3405                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3406                else:
3407                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3408            elif arity == 3:
3409                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3410                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3411            else:
3412                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3413        elif rightLeftAssoc == opAssoc.RIGHT:
3414            if arity == 1:
3415                # try to avoid LR with this extra test
3416                if not isinstance(opExpr, Optional):
3417                    opExpr = Optional(opExpr)
3418                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3419            elif arity == 2:
3420                if opExpr is not None:
3421                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3422                else:
3423                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3424            elif arity == 3:
3425                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3426                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3427            else:
3428                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3429        else:
3430            raise ValueError("operator must indicate right or left associativity")
3431        if pa:
3432            matchExpr.setParseAction( pa )
3433        thisExpr << ( matchExpr | lastExpr )
3434        lastExpr = thisExpr
3435    ret << lastExpr
3436    return ret
3437
3438dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3439sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3440quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3441unicodeString = Combine(_L('u') + quotedString.copy())
3442
3443def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3444    """Helper method for defining nested lists enclosed in opening and closing
3445       delimiters ("(" and ")" are the default).
3446
3447       Parameters:
3448        - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3449        - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3450        - content - expression for items within the nested lists (default=None)
3451        - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3452
3453       If an expression is not provided for the content argument, the nested
3454       expression will capture all whitespace-delimited content between delimiters
3455       as a list of separate values.
3456
3457       Use the ignoreExpr argument to define expressions that may contain
3458       opening or closing characters that should not be treated as opening
3459       or closing characters for nesting, such as quotedString or a comment
3460       expression.  Specify multiple expressions using an Or or MatchFirst.
3461       The default is quotedString, but if no expressions are to be ignored,
3462       then pass None for this argument.
3463    """
3464    if opener == closer:
3465        raise ValueError("opening and closing strings cannot be the same")
3466    if content is None:
3467        if isinstance(opener,basestring) and isinstance(closer,basestring):
3468            if ignoreExpr is not None:
3469                content = (Combine(OneOrMore(~ignoreExpr +
3470                                CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3471                            ).setParseAction(lambda t:t[0].strip()))
3472            else:
3473                content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
3474        else:
3475            raise ValueError("opening and closing arguments must be strings if no content expression is given")
3476    ret = Forward()
3477    if ignoreExpr is not None:
3478        ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3479    else:
3480        ret << Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
3481    return ret
3482
3483def indentedBlock(blockStatementExpr, indentStack, indent=True):
3484    """Helper method for defining space-delimited indentation blocks, such as
3485       those used to define block statements in Python source code.
3486       
3487       Parameters:
3488        - blockStatementExpr - expression defining syntax of statement that
3489            is repeated within the indented block
3490        - indentStack - list created by caller to manage indentation stack
3491            (multiple statementWithIndentedBlock expressions within a single grammar
3492            should share a common indentStack)
3493        - indent - boolean indicating whether block must be indented beyond the
3494            the current level; set to False for block of left-most statements
3495            (default=True)
3496
3497       A valid block must contain at least one blockStatement.
3498    """
3499    def checkPeerIndent(s,l,t):
3500        if l >= len(s): return
3501        curCol = col(l,s)
3502        if curCol != indentStack[-1]:
3503            if curCol > indentStack[-1]:
3504                raise ParseFatalException(s,l,"illegal nesting")
3505            raise ParseException(s,l,"not a peer entry")
3506
3507    def checkSubIndent(s,l,t):
3508        curCol = col(l,s)
3509        if curCol > indentStack[-1]:
3510            indentStack.append( curCol )
3511        else:
3512            raise ParseException(s,l,"not a subentry")
3513
3514    def checkUnindent(s,l,t):
3515        if l >= len(s): return
3516        curCol = col(l,s)
3517        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3518            raise ParseException(s,l,"not an unindent")
3519        indentStack.pop()
3520
3521    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3522    INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3523    PEER   = Empty().setParseAction(checkPeerIndent)
3524    UNDENT = Empty().setParseAction(checkUnindent)
3525    if indent:
3526        smExpr = Group( Optional(NL) +
3527            FollowedBy(blockStatementExpr) +
3528            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3529    else:
3530        smExpr = Group( Optional(NL) +
3531            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3532    blockStatementExpr.ignore("\\" + LineEnd())
3533    return smExpr
3534
3535alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3536punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3537
3538anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3539commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3540_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3541replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3542
3543# it's easy to get these comment structures wrong - they're very common, so may as well make them available
3544cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3545
3546htmlComment = Regex(r"<!--[\s\S]*?-->")
3547restOfLine = Regex(r".*").leaveWhitespace()
3548dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3549cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3550
3551javaStyleComment = cppStyleComment
3552pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3553_noncomma = "".join( [ c for c in printables if c != "," ] )
3554_commasepitem = Combine(OneOrMore(Word(_noncomma) +
3555                                  Optional( Word(" \t") +
3556                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3557commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3558
3559
3560if __name__ == "__main__":
3561
3562    def test( teststring ):
3563        try:
3564            tokens = simpleSQL.parseString( teststring )
3565            tokenlist = tokens.asList()
3566            print (teststring + "->"   + str(tokenlist))
3567            print ("tokens = "         + str(tokens))
3568            print ("tokens.columns = " + str(tokens.columns))
3569            print ("tokens.tables = "  + str(tokens.tables))
3570            print (tokens.asXML("SQL",True))
3571        except ParseBaseException,err:
3572            print (teststring + "->")
3573            print (err.line)
3574            print (" "*(err.column-1) + "^")
3575            print (err)
3576        print()
3577
3578    selectToken    = CaselessLiteral( "select" )
3579    fromToken      = CaselessLiteral( "from" )
3580
3581    ident          = Word( alphas, alphanums + "_$" )
3582    columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3583    columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3584    tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3585    tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
3586    simpleSQL      = ( selectToken + \
3587                     ( '*' | columnNameList ).setResultsName( "columns" ) + \
3588                     fromToken + \
3589                     tableNameList.setResultsName( "tables" ) )
3590
3591    test( "SELECT * from XYZZY, ABC" )
3592    test( "select * from SYS.XYZZY" )
3593    test( "Select A from Sys.dual" )
3594    test( "Select AA,BB,CC from Sys.dual" )
3595    test( "Select A, B, C from Sys.dual" )
3596    test( "Select A, B, C from Sys.dual" )
3597    test( "Xelect A, B, C from Sys.dual" )
3598    test( "Select A, B, C frox Sys.dual" )
3599    test( "Select" )
3600    test( "Select ^^^ frox Sys.dual" )
3601    test( "Select A, B, C from Sys.dual, Table2   " )
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。