root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/pyparsing.py

リビジョン 3, 97.0 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# module pyparsing.py
2#
3# Copyright (c) 2003-2006  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24#  Todo:
25#  - add pprint() - pretty-print output of defined BNF
26#
27#from __future__ import generators
28
29__doc__ = \
30"""
31pyparsing module - Classes and methods to define and execute parsing grammars
32
33The pyparsing module is an alternative approach to creating and executing simple grammars,
34vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
35don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
36provides a library of classes that you use to construct the grammar directly in Python.
37
38Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
39
40    from pyparsing import Word, alphas
41   
42    # define grammar of a greeting
43    greet = Word( alphas ) + "," + Word( alphas ) + "!"
44   
45    hello = "Hello, World!"
46    print hello, "->", greet.parseString( hello )
47
48The program outputs the following::
49
50    Hello, World! -> ['Hello', ',', 'World', '!']
51
52The Python representation of the grammar is quite readable, owing to the self-explanatory
53class names, and the use of '+', '|' and '^' operators.
54
55The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
56object with named attributes.
57
58The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
60 - quoted strings
61 - embedded comments
62"""
63__version__ = "1.4.1"
64__versionTime__ = "05 February 2006 12:24"
65__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
66
67import string
68import copy,sys
69import warnings
70import re
71#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
72
73def _ustr(obj):
74    """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
75       str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
76       then < returns the unicode object | encodes it with the default encoding | ... >.
77    """
78    try:
79        # If this works, then _ustr(obj) has the same behaviour as str(obj), so
80        # it won't break any existing code.
81        return str(obj)
82       
83    except UnicodeEncodeError, e:
84        # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
85        # state that "The return value must be a string object". However, does a
86        # unicode object (being a subclass of basestring) count as a "string
87        # object"?
88        # If so, then return a unicode object:
89        return unicode(obj)
90        # Else encode it... but how? There are many choices... :)
91        # Replace unprintables with escape codes?
92        #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
93        # Replace unprintables with question marks?
94        #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
95        # ...
96
97def _str2dict(strg):
98    return dict( [(c,0) for c in strg] )
99
100alphas     = string.lowercase + string.uppercase
101nums       = string.digits
102hexnums    = nums + "ABCDEFabcdef"
103alphanums  = alphas + nums   
104
105class ParseBaseException(Exception):
106    """base exception class for all parsing runtime exceptions"""
107    __slots__ = ( "loc","msg","pstr","parserElement" )
108    # Performance tuning: we construct a *lot* of these, so keep this
109    # constructor as small and fast as possible       
110    def __init__( self, pstr, loc, msg, elem=None ):
111        self.loc = loc
112        self.msg = msg
113        self.pstr = pstr
114        self.parserElement = elem
115
116    def __getattr__( self, aname ):
117        """supported attributes by name are:
118            - lineno - returns the line number of the exception text
119            - col - returns the column number of the exception text
120            - line - returns the line containing the exception text
121        """
122        if( aname == "lineno" ):
123            return lineno( self.loc, self.pstr )
124        elif( aname in ("col", "column") ):
125            return col( self.loc, self.pstr )
126        elif( aname == "line" ):
127            return line( self.loc, self.pstr )
128        else:
129            raise AttributeError, aname
130
131    def __str__( self ):
132        return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
133    def __repr__( self ):
134        return _ustr(self)
135    def markInputline( self, markerString = ">!<" ):
136        """Extracts the exception line from the input string, and marks
137           the location of the exception with a special symbol.
138        """
139        line_str = self.line
140        line_column = self.column - 1
141        if markerString:
142            line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
143        return line_str.strip()
144
145class ParseException(ParseBaseException):
146    """exception thrown when parse expressions don't match class"""
147    """supported attributes by name are:
148        - lineno - returns the line number of the exception text
149        - col - returns the column number of the exception text
150        - line - returns the line containing the exception text
151    """
152    pass
153   
154class ParseFatalException(ParseBaseException):
155    """user-throwable exception thrown when inconsistent parse content
156       is found; stops all parsing immediately"""
157    pass
158   
159class RecursiveGrammarException(Exception):
160    """exception thrown by validate() if the grammar could be improperly recursive"""
161    def __init__( self, parseElementList ):
162        self.parseElementTrace = parseElementList
163   
164    def __str__( self ):
165        return "RecursiveGrammarException: %s" % self.parseElementTrace
166
167class ParseResults(object):
168    """Structured parse results, to provide multiple means of access to the parsed data:
169       - as a list (len(results))
170       - by list index (results[0], results[1], etc.)
171       - by attribute (results.<resultsName>)
172       """
173    __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
174    def __new__(cls, toklist, name=None, asList=True, modal=True ):
175        if isinstance(toklist, cls):
176            return toklist
177        retobj = object.__new__(cls)
178        retobj.__doinit = True
179        return retobj
180       
181    # Performance tuning: we construct a *lot* of these, so keep this
182    # constructor as small and fast as possible
183    def __init__( self, toklist, name=None, asList=True, modal=True ):
184        if self.__doinit:
185            self.__doinit = False
186            self.__name = None
187            self.__parent = None
188            self.__modal = modal
189            if isinstance(toklist, list):
190                self.__toklist = toklist[:]
191            else:
192                self.__toklist = [toklist]
193            self.__tokdict = dict()
194
195        if name:
196            if not self.__name:
197                self.__modal = self.__modal and modal
198            if isinstance(name,int):
199                name = _ustr(name) # will always return a str, but use _ustr for consistency
200            self.__name = name
201            if not toklist in (None,'',[]):
202                if isinstance(toklist,basestring):
203                    toklist = [ toklist ]
204                if asList:
205                    if isinstance(toklist,ParseResults):
206                        self[name] = (toklist.copy(),-1)
207                    else:
208                        self[name] = (ParseResults(toklist[0]),-1)
209                    self[name].__name = name
210                else:
211                    try:
212                        self[name] = toklist[0]
213                    except TypeError:
214                        self[name] = toklist
215
216    def __getitem__( self, i ):
217        if isinstance( i, (int,slice) ):
218            return self.__toklist[i]
219        else:
220            if self.__modal:
221                return self.__tokdict[i][-1][0]
222            else:
223                return ParseResults([ v[0] for v in self.__tokdict[i] ])
224
225    def __setitem__( self, k, v ):
226        if isinstance(v,tuple):
227            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
228            sub = v[0]
229        else:
230            self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
231            sub = v
232        if isinstance(sub,ParseResults):
233            sub.__parent = self
234       
235    def __delitem__( self, i ):
236        del self.__toklist[i]
237
238    def __contains__( self, k ):
239        return self.__tokdict.has_key(k)
240       
241    def __len__( self ): return len( self.__toklist )
242    def __iter__( self ): return iter( self.__toklist )
243    def keys( self ):
244        """Returns all named result keys."""
245        return self.__tokdict.keys()
246   
247    def items( self ):
248        """Returns all named result keys and values as a list of tuples."""
249        return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
250   
251    def values( self ):
252        """Returns all named result values."""
253        return [ v[-1][0] for v in self.__tokdict.values() ]
254
255    def __getattr__( self, name ):
256        if name not in self.__slots__:
257            if self.__tokdict.has_key( name ):
258                if self.__modal:
259                    return self.__tokdict[name][-1][0]
260                else:
261                    return ParseResults([ v[0] for v in self.__tokdict[name] ])
262            else:
263                return ""
264        return None
265
266    def __iadd__( self, other ):
267        if other.__tokdict:
268            offset = len(self.__toklist)
269            addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
270            otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
271            for k,v in otherdictitems:
272                self[k] = v
273                if isinstance(v[0],ParseResults):
274                    v[0].__parent = self
275        self.__toklist += other.__toklist
276        del other
277        return self
278       
279    def __repr__( self ):
280        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
281
282    def __str__( self ):
283        out = "["
284        sep = ""
285        for i in self.__toklist:
286            if isinstance(i, ParseResults):
287                out += sep + _ustr(i)
288            else:
289                out += sep + repr(i)
290            sep = ", "
291        out += "]"
292        return out
293
294    def _asStringList( self, sep='' ):
295        out = []
296        for item in self.__toklist:
297            if out and sep:
298                out.append(sep)
299            if isinstance( item, ParseResults ):
300                out += item._asStringList()
301            else:
302                out.append( _ustr(item) )
303        return out
304
305    def asList( self ):
306        """Returns the parse results as a nested list of matching tokens, all converted to strings."""
307        out = []
308        for res in self.__toklist:
309            if isinstance(res,ParseResults):
310                out.append( res.asList() )
311            else:
312                out.append( res )
313        return out
314
315    def asDict( self ):
316        """Returns the named parse results as dictionary."""
317        return dict( self.items() )
318
319    def copy( self ):
320        """Returns a new copy of a ParseResults object."""
321        ret = ParseResults( self.__toklist )
322        ret.__tokdict = self.__tokdict.copy()
323        ret.__parent = self.__parent
324        ret.__modal = self.__modal
325        ret.__name = self.__name
326        return ret
327       
328    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
329        """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
330        nl = "\n"
331        out = []
332        namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
333        nextLevelIndent = indent + "  "
334       
335        # collapse out indents if formatting is not desired
336        if not formatted:
337            indent = ""
338            nextLevelIndent = ""
339            nl = ""
340           
341        selfTag = None
342        if doctag is not None:
343            selfTag = doctag
344        else:
345            if self.__name:
346                selfTag = self.__name
347       
348        if not selfTag:
349            if namedItemsOnly:
350                return ""
351            else:
352                selfTag = "ITEM"
353               
354        out += [ nl, indent, "<", selfTag, ">" ]
355       
356        worklist = self.__toklist
357        for i,res in enumerate(worklist):
358            if isinstance(res,ParseResults):
359                if i in namedItems:
360                    out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
361                else:
362                    out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
363            else:
364                # individual token, see if there is a name for it
365                resTag = None
366                if i in namedItems:
367                    resTag = namedItems[i]
368                if not resTag:
369                    if namedItemsOnly:
370                        continue
371                    else:
372                        resTag = "ITEM"
373                out += [ nl, nextLevelIndent, "<", resTag, ">", _ustr(res), "</", resTag, ">" ]
374       
375        out += [ nl, indent, "</", selfTag, ">" ]
376        return "".join(out)
377
378
379    def __lookup(self,sub):
380        for k,vlist in self.__tokdict.items():
381            for v,loc in vlist:
382                if sub is v:
383                    return k
384        return None
385           
386    def getName(self):
387        """Returns the results name for this token expression."""
388        if self.__name:
389            return self.__name
390        elif self.__parent:
391            par = self.__parent
392            if par:
393                return par.__lookup(self)
394            else:
395                return None
396        elif (len(self) == 1 and
397               len(self.__tokdict) == 1 and
398               self.__tokdict.values()[0][0][1] in (0,-1)):
399            return self.__tokdict.keys()[0]
400        else:
401            return None
402
403def col (loc,strg):
404    """Returns current column within a string, counting newlines as line separators.
405   The first column is number 1.
406   """
407    return loc - strg.rfind("\n", 0, loc)
408
409def lineno(loc,strg):
410    """Returns current line number within a string, counting newlines as line separators.
411   The first line is number 1.
412   """
413    return strg.count("\n",0,loc) + 1
414
415def line( loc, strg ):
416    """Returns the line of text containing loc within a string, counting newlines as line separators.
417       """
418    lastCR = strg.rfind("\n", 0, loc)
419    nextCR = strg.find("\n", loc)
420    if nextCR > 0:
421        return strg[lastCR+1:nextCR]
422    else:
423        return strg[lastCR+1:]
424
425def _defaultStartDebugAction( instring, loc, expr ):
426    print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
427
428def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
429    print "Matched",expr,"->",toks.asList()
430   
431def _defaultExceptionDebugAction( instring, loc, expr, exc ):
432    print "Exception raised:", exc
433
434def nullDebugAction(*args):
435    """'Do-nothing' debug action, to suppress debugging output during parsing."""
436    pass
437
438class ParserElement(object):
439    """Abstract base level parser element class."""
440    DEFAULT_WHITE_CHARS = " \n\t\r"
441
442    def setDefaultWhitespaceChars( chars ):
443        """Overrides the default whitespace chars
444        """
445        ParserElement.DEFAULT_WHITE_CHARS = chars
446    setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
447   
448    def __init__( self, savelist=False ):
449        self.parseAction = None
450        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
451        self.strRepr = None
452        self.resultsName = None
453        self.saveAsList = savelist
454        self.skipWhitespace = True
455        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
456        self.mayReturnEmpty = False
457        self.keepTabs = False
458        self.ignoreExprs = []
459        self.debug = False
460        self.streamlined = False
461        self.mayIndexError = True
462        self.errmsg = ""
463        self.modalResults = True
464        self.debugActions = ( None, None, None )
465        self.re = None
466
467    def copy( self ):
468        """Make a copy of this ParserElement.  Useful for defining different parse actions
469           for the same parsing pattern, using copies of the original parse element."""
470        cpy = copy.copy( self )
471        cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
472        return cpy
473
474    def setName( self, name ):
475        """Define name for this expression, for use in debugging."""
476        self.name = name
477        self.errmsg = "Expected " + self.name
478        return self
479
480    def setResultsName( self, name, listAllMatches=False ):
481        """Define name for referencing matching tokens as a nested attribute
482           of the returned parse results.
483           NOTE: this returns a *copy* of the original ParserElement object;
484           this is so that the client can define a basic element, such as an
485           integer, and reference it in multiple places with different names.
486        """
487        newself = self.copy()
488        newself.resultsName = name
489        newself.modalResults = not listAllMatches
490        return newself
491
492    def setParseAction( self, fn ):
493        """Define action to perform when successfully matching parse element definition.
494           Parse action fn is a callable method with the arguments (s, loc, toks) where:
495            - s   = the original string being parsed
496            - loc = the location of the matching substring
497            - toks = a list of the matched tokens, packaged as a ParseResults object
498           If the function fn modifies the tokens, it can return them as the return
499           value from fn, and the modified list of tokens will replace the original.
500           Otherwise, fn does not need to return any value.
501        """
502        self.parseAction = fn
503        return self
504
505    def skipIgnorables( self, instring, loc ):
506        exprsFound = True
507        while exprsFound:
508            exprsFound = False
509            for e in self.ignoreExprs:
510                try:
511                    while 1:
512                        loc,dummy = e.parse( instring, loc )
513                        exprsFound = True
514                except ParseException:
515                    pass
516        return loc
517
518    def preParse( self, instring, loc ):
519        if self.ignoreExprs:
520            loc = self.skipIgnorables( instring, loc )
521       
522        if self.skipWhitespace:
523            wt = self.whiteChars
524            instrlen = len(instring)
525            while loc < instrlen and instring[loc] in wt:
526                loc += 1
527               
528        return loc
529
530    def parseImpl( self, instring, loc, doActions=True ):
531        return loc, []
532
533    def postParse( self, instring, loc, tokenlist ):
534        return tokenlist
535
536    #~ @profile
537    def parse( self, instring, loc, doActions=True, callPreParse=True ):
538        debugging = ( self.debug ) #and doActions )
539
540        if debugging:
541            #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
542            if (self.debugActions[0] ):
543                self.debugActions[0]( instring, loc, self )
544            if callPreParse:
545                loc = self.preParse( instring, loc )
546            tokensStart = loc
547            try:
548                try:
549                    loc,tokens = self.parseImpl( instring, loc, doActions )
550                except IndexError:
551                    raise ParseException, ( instring, len(instring), self.errmsg, self )
552            except ParseException, err:
553                #~ print "Exception raised:", err
554                if (self.debugActions[2] ):
555                    self.debugActions[2]( instring, tokensStart, self, err )
556                raise
557        else:
558            if callPreParse:
559                loc = self.preParse( instring, loc )
560            tokensStart = loc
561            if self.mayIndexError or loc >= len(instring):
562                try:
563                    loc,tokens = self.parseImpl( instring, loc, doActions )
564                except IndexError:
565                    raise ParseException, ( instring, len(instring), self.errmsg, self )
566            else:
567                loc,tokens = self.parseImpl( instring, loc, doActions )
568       
569        tokens = self.postParse( instring, loc, tokens )
570
571        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
572        if self.parseAction and doActions:
573            if debugging:
574                try:
575                    tokens = self.parseAction( instring, tokensStart, retTokens )
576                    if tokens is not None:
577                        if isinstance(tokens,tuple):
578                            tokens = tokens[1]
579                        retTokens = ParseResults( tokens,
580                                                  self.resultsName,
581                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
582                                                  modal=self.modalResults )
583                except ParseException, err:
584                    #~ print "Exception raised in user parse action:", err
585                    if (self.debugActions[2] ):
586                        self.debugActions[2]( instring, tokensStart, self, err )
587                    raise
588            else:
589                tokens = self.parseAction( instring, tokensStart, retTokens )
590                if tokens is not None:
591                    if isinstance(tokens,tuple):
592                        tokens = tokens[1]
593                    retTokens = ParseResults( tokens,
594                                              self.resultsName,
595                                              asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
596                                              modal=self.modalResults )
597
598        if debugging:
599            #~ print "Matched",self,"->",retTokens.asList()
600            if (self.debugActions[1] ):
601                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
602
603        return loc, retTokens
604
605    def tryParse( self, instring, loc ):
606        return self.parse( instring, loc, doActions=False )[0]
607
608    def parseString( self, instring ):
609        """Execute the parse expression with the given string.
610           This is the main interface to the client code, once the complete
611           expression has been built.
612        """
613        if not self.streamlined:
614            self.streamline()
615            self.saveAsList = True
616        for e in self.ignoreExprs:
617            e.streamline()
618        if self.keepTabs:
619            loc, tokens = self.parse( instring, 0 )
620        else:
621            loc, tokens = self.parse( instring.expandtabs(), 0 )
622        return tokens
623
624    def scanString( self, instring ):
625        """Scan the input string for expression matches.  Each match will return the matching tokens, start location, and end location."""
626        if not self.streamlined:
627            self.streamline()
628        for e in self.ignoreExprs:
629            e.streamline()
630       
631        if not self.keepTabs:
632            instring = instring.expandtabs()
633        instrlen = len(instring)
634        loc = 0
635        preparseFn = self.preParse
636        parseFn = self.parse
637        while loc < instrlen:
638            try:
639                loc = preparseFn( instring, loc )
640                nextLoc,tokens = parseFn( instring, loc, callPreParse=False )
641            except ParseException:
642                loc += 1
643            else:
644                yield tokens, loc, nextLoc
645                loc = nextLoc
646       
647    def transformString( self, instring ):
648        """Extension to scanString, to modify matching text with modified tokens that may
649           be returned from a parse action.  To use transformString, define a grammar and
650           attach a parse action to it that modifies the returned token list. 
651           Invoking transformString() on a target string will then scan for matches,
652           and replace the matched text patterns according to the logic in the parse
653           action.  transformString() returns the resulting transformed string."""
654        out = []
655        lastE = 0
656        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
657        # keep string locs straight between transformString and scanString
658        self.keepTabs = True
659        for t,s,e in self.scanString( instring ):
660            out.append( instring[lastE:s] )
661            if t:
662                if isinstance(t,ParseResults):
663                    out += t.asList()
664                elif isinstance(t,list):
665                    out += t
666                else:
667                    out.append(t)
668            lastE = e
669        out.append(instring[lastE:])
670        return "".join(out)
671
672    def searchString( self, instring ):
673        """Another extension to scanString, simplifying the access to the tokens found
674           to match the given parse expression.
675        """
676        return [ t[0] for t,s,e in self.scanString( instring ) ]
677           
678    def __add__(self, other ):
679        """Implementation of + operator - returns And"""
680        if isinstance( other, basestring ):
681            other = Literal( other )
682        if not isinstance( other, ParserElement ):
683            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
684                    SyntaxWarning, stacklevel=2)
685        return And( [ self, other ] )
686
687    def __radd__(self, other ):
688        """Implementation of += operator"""
689        if isinstance( other, basestring ):
690            other = Literal( other )
691        if not isinstance( other, ParserElement ):
692            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
693                    SyntaxWarning, stacklevel=2)
694        return other + self
695
696    def __or__(self, other ):
697        """Implementation of | operator - returns MatchFirst"""
698        if isinstance( other, basestring ):
699            other = Literal( other )
700        if not isinstance( other, ParserElement ):
701            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
702                    SyntaxWarning, stacklevel=2)
703        return MatchFirst( [ self, other ] )
704
705    def __ror__(self, other ):
706        """Implementation of |= operator"""
707        if isinstance( other, basestring ):
708            other = Literal( other )
709        if not isinstance( other, ParserElement ):
710            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
711                    SyntaxWarning, stacklevel=2)
712        return other | self
713
714    def __xor__(self, other ):
715        """Implementation of ^ operator - returns Or"""
716        if isinstance( other, basestring ):
717            other = Literal( other )
718        if not isinstance( other, ParserElement ):
719            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
720                    SyntaxWarning, stacklevel=2)
721        return Or( [ self, other ] )
722
723    def __rxor__(self, other ):
724        """Implementation of ^= operator"""
725        if isinstance( other, basestring ):
726            other = Literal( other )
727        if not isinstance( other, ParserElement ):
728            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
729                    SyntaxWarning, stacklevel=2)
730        return other ^ self
731
732    def __and__(self, other ):
733        """Implementation of & operator - returns Each"""
734        if isinstance( other, basestring ):
735            other = Literal( other )
736        if not isinstance( other, ParserElement ):
737            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
738                    SyntaxWarning, stacklevel=2)
739        return Each( [ self, other ] )
740
741    def __rand__(self, other ):
742        """Implementation of right-& operator"""
743        if isinstance( other, basestring ):
744            other = Literal( other )
745        if not isinstance( other, ParserElement ):
746            warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
747                    SyntaxWarning, stacklevel=2)
748        return other & self
749
750    def __invert__( self ):
751        """Implementation of ~ operator - returns NotAny"""
752        return NotAny( self )
753
754    def suppress( self ):
755        """Suppresses the output of this ParserElement; useful to keep punctuation from
756           cluttering up returned output.
757        """
758        return Suppress( self )
759
760    def leaveWhitespace( self ):
761        """Disables the skipping of whitespace before matching the characters in the
762           ParserElement's defined pattern.  This is normally only used internally by
763           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
764        """
765        self.skipWhitespace = False
766        return self
767
768    def setWhitespaceChars( self, chars ):
769        """Overrides the default whitespace chars
770        """
771        self.skipWhitespace = True
772        self.whiteChars = chars
773       
774    def parseWithTabs( self ):
775        """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
776           Must be called before parseString when the input grammar contains elements that
777           match <TAB> characters."""
778        self.keepTabs = True
779        return self
780       
781    def ignore( self, other ):
782        """Define expression to be ignored (e.g., comments) while doing pattern
783           matching; may be called repeatedly, to define multiple comment or other
784           ignorable patterns.
785        """
786        if isinstance( other, Suppress ):
787            if other not in self.ignoreExprs:
788                self.ignoreExprs.append( other )
789        else:
790            self.ignoreExprs.append( Suppress( other ) )
791        return self
792
793    def setDebugActions( self, startAction, successAction, exceptionAction ):
794        """Enable display of debugging messages while doing pattern matching."""
795        self.debugActions = (startAction or _defaultStartDebugAction,
796                             successAction or _defaultSuccessDebugAction,
797                             exceptionAction or _defaultExceptionDebugAction)
798        self.debug = True
799        return self
800
801    def setDebug( self, flag=True ):
802        """Enable display of debugging messages while doing pattern matching."""
803        if flag:
804            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
805        else:
806            self.debug = False
807        return self
808
809    def __str__( self ):
810        return self.name
811
812    def __repr__( self ):
813        return _ustr(self)
814       
815    def streamline( self ):
816        self.streamlined = True
817        self.strRepr = None
818        return self
819       
820    def checkRecursion( self, parseElementList ):
821        pass
822       
823    def validate( self, validateTrace=[] ):
824        """Check defined expressions for valid structure, check for infinite recursive definitions."""
825        self.checkRecursion( [] )
826
827    def parseFile( self, file_or_filename ):
828        """Execute the parse expression on the given file or filename.
829           If a filename is specified (instead of a file object),
830           the entire file is opened, read, and closed before parsing.
831        """
832        try:
833            file_contents = file_or_filename.read()
834        except AttributeError:
835            f = open(file_or_filename, "rb")
836            file_contents = f.read()
837            f.close()
838        return self.parseString(file_contents)
839
840
841class Token(ParserElement):
842    """Abstract ParserElement subclass, for defining atomic matching patterns."""
843    def __init__( self ):
844        super(Token,self).__init__( savelist=False )
845        self.myException = ParseException("",0,"",self)
846
847    def setName(self, name):
848        s = super(Token,self).setName(name)
849        self.errmsg = "Expected " + self.name
850        s.myException.msg = self.errmsg
851        return s
852
853
854class Empty(Token):
855    """An empty token, will always match."""
856    def __init__( self ):
857        super(Empty,self).__init__()
858        self.name = "Empty"
859        self.mayReturnEmpty = True
860        self.mayIndexError = False
861
862
863class NoMatch(Token):
864    """A token that will never match."""
865    def __init__( self ):
866        super(NoMatch,self).__init__()
867        self.name = "NoMatch"
868        self.mayReturnEmpty = True
869        self.mayIndexError = False
870        self.errmsg = "Unmatchable token"
871        self.myException.msg = self.errmsg
872       
873    def parseImpl( self, instring, loc, doActions=True ):
874        exc = self.myException
875        exc.loc = loc
876        exc.pstr = instring
877        raise exc
878
879
880class Literal(Token):
881    """Token to exactly match a specified string."""
882    def __init__( self, matchString ):
883        super(Literal,self).__init__()
884        self.match = matchString
885        self.matchLen = len(matchString)
886        try:
887            self.firstMatchChar = matchString[0]
888        except IndexError:
889            warnings.warn("null string passed to Literal; use Empty() instead",
890                            SyntaxWarning, stacklevel=2)
891            self.__class__ = Empty
892        self.name = '"%s"' % self.match
893        self.errmsg = "Expected " + self.name
894        self.mayReturnEmpty = False
895        self.myException.msg = self.errmsg
896        self.mayIndexError = False
897
898    # Performance tuning: this routine gets called a *lot*
899    # if this is a single character match string  and the first character matches,
900    # short-circuit as quickly as possible, and avoid calling startswith
901    #~ @profile
902    def parseImpl( self, instring, loc, doActions=True ):
903        if (instring[loc] == self.firstMatchChar and
904            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
905            return loc+self.matchLen, self.match
906        #~ raise ParseException, ( instring, loc, self.errmsg )
907        exc = self.myException
908        exc.loc = loc
909        exc.pstr = instring
910        raise exc
911
912class Keyword(Token):
913    """Token to exactly match a specified string as a keyword, that is, it must be
914       immediately followed by a non-keyword character.  Compare with Literal::
915         Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
916         Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
917       Accepts two optional constructor arguments in addition to the keyword string:
918       identChars is a string of characters that would be valid identifier characters,
919       defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
920       matching, default is False.
921    """
922    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
923   
924    def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
925        super(Keyword,self).__init__()
926        self.match = matchString
927        self.matchLen = len(matchString)
928        try:
929            self.firstMatchChar = matchString[0]
930        except IndexError:
931            warnings.warn("null string passed to Keyword; use Empty() instead",
932                            SyntaxWarning, stacklevel=2)
933        self.name = '"%s"' % self.match
934        self.errmsg = "Expected " + self.name
935        self.mayReturnEmpty = False
936        self.myException.msg = self.errmsg
937        self.mayIndexError = False
938        self.caseless = caseless
939        if caseless:
940            self.caselessmatch = matchString.upper()
941            identChars = identChars.upper()
942        self.identChars = _str2dict(identChars)
943
944    def parseImpl( self, instring, loc, doActions=True ):
945        if self.caseless:
946            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
947                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
948                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
949                return loc+self.matchLen, self.match
950        else:
951            if (instring[loc] == self.firstMatchChar and
952                (self.matchLen==1 or instring.startswith(self.match,loc)) and
953                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
954                (loc == 0 or instring[loc-1] not in self.identChars) ):
955                return loc+self.matchLen, self.match
956        #~ raise ParseException, ( instring, loc, self.errmsg )
957        exc = self.myException
958        exc.loc = loc
959        exc.pstr = instring
960        raise exc
961       
962    def copy(self):
963        c = super(Keyword,self).copy()
964        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
965        return c
966       
967    def setDefaultKeywordChars( chars ):
968        """Overrides the default Keyword chars
969        """
970        Keyword.DEFAULT_KEYWORD_CHARS = chars
971    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)       
972
973
974class CaselessLiteral(Literal):
975    """Token to match a specified string, ignoring case of letters.
976       Note: the matched results will always be in the case of the given
977       match string, NOT the case of the input text.
978    """
979    def __init__( self, matchString ):
980        super(CaselessLiteral,self).__init__( matchString.upper() )
981        # Preserve the defining literal.
982        self.returnString = matchString
983        self.name = "'%s'" % self.returnString
984        self.errmsg = "Expected " + self.name
985        self.myException.msg = self.errmsg
986
987    def parseImpl( self, instring, loc, doActions=True ):
988        if instring[ loc:loc+self.matchLen ].upper() == self.match:
989            return loc+self.matchLen, self.returnString
990        #~ raise ParseException, ( instring, loc, self.errmsg )
991        exc = self.myException
992        exc.loc = loc
993        exc.pstr = instring
994        raise exc
995
996class CaselessKeyword(Keyword):
997    def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
998        super(CaselessKeyword,self).__init__( matchString, identCars, caseless=True )
999
1000    def parseImpl( self, instring, loc, doActions=True ):
1001        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1002             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1003            return loc+self.matchLen, self.match
1004        #~ raise ParseException, ( instring, loc, self.errmsg )
1005        exc = self.myException
1006        exc.loc = loc
1007        exc.pstr = instring
1008        raise exc
1009
1010class Word(Token):
1011    """Token for matching words composed of allowed character sets.
1012       Defined with string containing all allowed initial characters,
1013       an optional string containing allowed body characters (if omitted,
1014       defaults to the initial character set), and an optional minimum,
1015       maximum, and/or exact length.
1016    """
1017    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
1018        super(Word,self).__init__()
1019        self.initCharsOrig = initChars
1020        self.initChars = _str2dict(initChars)
1021        if bodyChars :
1022            self.bodyCharsOrig = bodyChars
1023            self.bodyChars = _str2dict(bodyChars)
1024        else:
1025            self.bodyCharsOrig = initChars
1026            self.bodyChars = _str2dict(initChars)
1027           
1028        self.maxSpecified = max > 0
1029
1030        self.minLen = min
1031
1032        if max > 0:
1033            self.maxLen = max
1034        else:
1035            self.maxLen = sys.maxint
1036
1037        if exact > 0:
1038            self.maxLen = exact
1039            self.minLen = exact
1040
1041        self.name = _ustr(self)
1042        self.errmsg = "Expected " + self.name
1043        self.myException.msg = self.errmsg
1044        self.mayIndexError = False
1045       
1046        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1047            if self.bodyCharsOrig == self.initCharsOrig:
1048                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1049            elif len(self.bodyCharsOrig) == 1:
1050                self.reString = "%s[%s]*" % \
1051                                      (re.escape(self.initCharsOrig),
1052                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1053            else:
1054                self.reString = "[%s][%s]*" % \
1055                                      (_escapeRegexRangeChars(self.initCharsOrig),
1056                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
1057            try:
1058                self.re = re.compile( self.reString )
1059            except:
1060                self.re = None
1061       
1062    def parseImpl( self, instring, loc, doActions=True ):
1063        if self.re:
1064            result = self.re.match(instring,loc)
1065            if not result:
1066                exc = self.myException
1067                exc.loc = loc
1068                exc.pstr = instring
1069                raise exc
1070           
1071            loc = result.end()
1072            return loc,result.group()
1073       
1074        if not(instring[ loc ] in self.initChars):
1075            #~ raise ParseException, ( instring, loc, self.errmsg )
1076            exc = self.myException
1077            exc.loc = loc
1078            exc.pstr = instring
1079            raise exc
1080        start = loc
1081        loc += 1
1082        instrlen = len(instring)
1083        bodychars = self.bodyChars
1084        maxloc = start + self.maxLen
1085        maxloc = min( maxloc, instrlen )
1086        while loc < maxloc and instring[loc] in bodychars:
1087            loc += 1
1088           
1089        throwException = False
1090        if loc - start < self.minLen:
1091            throwException = True
1092        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1093            throwException = True
1094
1095        if throwException:
1096            #~ raise ParseException, ( instring, loc, self.errmsg )
1097            exc = self.myException
1098            exc.loc = loc
1099            exc.pstr = instring
1100            raise exc
1101
1102        return loc, instring[start:loc]
1103
1104    def __str__( self ):
1105        try:
1106            return super(Word,self).__str__()
1107        except:
1108            pass
1109
1110           
1111        if self.strRepr is None:
1112           
1113            def charsAsStr(s):
1114                if len(s)>4:
1115                    return s[:4]+"..."
1116                else:
1117                    return s
1118           
1119            if ( self.initCharsOrig != self.bodyCharsOrig ):
1120                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1121            else:
1122                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1123
1124        return self.strRepr
1125
1126
1127class Regex(Token):
1128    """Token for matching strings that match a given regular expression.
1129       Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1130    """
1131    def __init__( self, pattern, flags=0):
1132        """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1133        super(Regex,self).__init__()
1134       
1135        if len(pattern) == 0:
1136            warnings.warn("null string passed to Regex; use Empty() instead",
1137                    SyntaxWarning, stacklevel=2)
1138   
1139        self.pattern = pattern
1140        self.flags = flags
1141       
1142        try:
1143            self.re = re.compile(self.pattern, self.flags)
1144            self.reString = self.pattern
1145        except Exception,e:
1146            warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1147                SyntaxWarning, stacklevel=2)
1148            raise
1149
1150        self.name = _ustr(self)
1151        self.errmsg = "Expected " + self.name
1152        self.myException.msg = self.errmsg
1153        self.mayIndexError = False
1154        self.mayReturnEmpty = True
1155   
1156    def parseImpl( self, instring, loc, doActions=True ):
1157        result = self.re.match(instring,loc)
1158        if not result:
1159            exc = self.myException
1160            exc.loc = loc
1161            exc.pstr = instring
1162            raise exc
1163       
1164        loc = result.end()
1165        d = result.groupdict()
1166        ret = ParseResults(result.group())
1167        if d:
1168            for k in d.keys():
1169                ret[k] = d[k]
1170        return loc,ret
1171   
1172    def __str__( self ):
1173        try:
1174            return super(Regex,self).__str__()
1175        except:
1176            pass
1177       
1178        if self.strRepr is None:
1179            self.strRepr = "Re:(%s)" % repr(self.pattern)
1180       
1181        return self.strRepr
1182
1183
1184class QuotedString(Token):
1185    """Token for matching strings that are delimited by quoting characters.
1186    """
1187    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True):
1188        """
1189           Defined with the following parameters:
1190           - quoteCharacter - string of one or more characters defining the quote delimiting string
1191           - escapeCharacter - character to escape quotes, typically backslash (default=None)
1192           - escapedQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape and embedded ") (default=None)
1193           - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1194           - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1195        """
1196        super(QuotedString,self).__init__()
1197       
1198        # remove white space from quote char - wont work anyway
1199        quoteChar = quoteChar.strip()
1200        if len(quoteChar) == 0:
1201            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1202            raise SyntaxError()
1203       
1204        self.quoteChar = quoteChar
1205        self.quoteCharLen = len(quoteChar)
1206        self.firstQuoteChar = quoteChar[0]
1207        self.escChar = escChar
1208        self.escQuote = escQuote
1209        self.unquoteResults = unquoteResults
1210       
1211        if multiline:
1212            self.flags = re.MULTILINE | re.DOTALL
1213            self.pattern = r'%s([^%s%s]' % \
1214                ( re.escape(self.quoteChar),
1215                  _escapeRegexRangeChars(self.quoteChar[0]),
1216                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1217        else:
1218            self.flags = 0
1219            self.pattern = r'%s([^%s\n\r%s]' % \
1220                ( re.escape(self.quoteChar),
1221                  _escapeRegexRangeChars(self.quoteChar[0]),
1222                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1223        if len(self.quoteChar) > 1:
1224            self.pattern += (
1225                '|(' + ')|('.join(["%s[^%s]" % (re.escape(self.quoteChar[:i]),
1226                                               _escapeRegexRangeChars(self.quoteChar[i]))
1227                                    for i in range(len(self.quoteChar)-1,0,-1)]) + ')'
1228                )
1229        if escQuote:
1230            self.pattern += (r'|(%s)' % re.escape(escQuote))
1231        if escChar:
1232            self.pattern += (r'|(%s.)' % re.escape(escChar))
1233            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1234        self.pattern += (r')*%s' % re.escape(self.quoteChar))
1235       
1236        try:
1237            self.re = re.compile(self.pattern, self.flags)
1238            self.reString = self.pattern
1239        except Exception,e:
1240            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1241                SyntaxWarning, stacklevel=2)
1242            raise
1243
1244        self.name = _ustr(self)
1245        self.errmsg = "Expected " + self.name
1246        self.myException.msg = self.errmsg
1247        self.mayIndexError = False
1248        self.mayReturnEmpty = True
1249   
1250    def parseImpl( self, instring, loc, doActions=True ):
1251        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1252        if not result:
1253            exc = self.myException
1254            exc.loc = loc
1255            exc.pstr = instring
1256            raise exc
1257       
1258        loc = result.end()
1259        ret = result.group()
1260       
1261        if self.unquoteResults:
1262           
1263            # strip off quotes
1264            ret = ret[self.quoteCharLen:-self.quoteCharLen]
1265               
1266            if isinstance(ret,basestring):
1267                # replace escaped characters
1268                if self.escChar:
1269                    ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1270
1271                # replace escaped quotes
1272                if self.escQuote:
1273                    ret = ret.replace(self.escQuote, self.quoteChar)
1274
1275        return loc, ret
1276   
1277    def __str__( self ):
1278        try:
1279            return super(QuotedString,self).__str__()
1280        except:
1281            pass
1282       
1283        if self.strRepr is None:
1284            self.strRepr = "quoted string, delimited by %s characters" % self.quoteChar
1285       
1286        return self.strRepr
1287
1288
1289class CharsNotIn(Token):
1290    """Token for matching words composed of characters *not* in a given set.
1291       Defined with string containing all disallowed characters, and an optional
1292       minimum, maximum, and/or exact length.
1293    """
1294    def __init__( self, notChars, min=1, max=0, exact=0 ):
1295        super(CharsNotIn,self).__init__()
1296        self.skipWhitespace = False
1297        self.notChars = notChars
1298       
1299        self.minLen = min
1300
1301        if max > 0:
1302            self.maxLen = max
1303        else:
1304            self.maxLen = sys.maxint
1305
1306        if exact > 0:
1307            self.maxLen = exact
1308            self.minLen = exact
1309       
1310        self.name = _ustr(self)
1311        self.errmsg = "Expected " + self.name
1312        self.mayReturnEmpty = ( self.minLen == 0 )
1313        self.myException.msg = self.errmsg
1314        self.mayIndexError = False
1315
1316    def parseImpl( self, instring, loc, doActions=True ):
1317        if instring[loc] in self.notChars:
1318            #~ raise ParseException, ( instring, loc, self.errmsg )
1319            exc = self.myException
1320            exc.loc = loc
1321            exc.pstr = instring
1322            raise exc
1323           
1324        start = loc
1325        loc += 1
1326        notchars = self.notChars
1327        maxlen = min( start+self.maxLen, len(instring) )
1328        while loc < maxlen and \
1329              (instring[loc] not in notchars):
1330            loc += 1
1331
1332        if loc - start < self.minLen:
1333            #~ raise ParseException, ( instring, loc, self.errmsg )
1334            exc = self.myException
1335            exc.loc = loc
1336            exc.pstr = instring
1337            raise exc
1338
1339        return loc, instring[start:loc]
1340
1341    def __str__( self ):
1342        try:
1343            return super(CharsNotIn, self).__str__()
1344        except:
1345            pass
1346
1347        if self.strRepr is None:
1348            if len(self.notChars) > 4:
1349                self.strRepr = "!W:(%s...)" % self.notChars[:4]
1350            else:
1351                self.strRepr = "!W:(%s)" % self.notChars
1352       
1353        return self.strRepr
1354
1355class White(Token):
1356    """Special matching class for matching whitespace.  Normally, whitespace is ignored
1357       by pyparsing grammars.  This class is included when some whitespace structures
1358       are significant.  Define with a string containing the whitespace characters to be
1359       matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
1360       as defined for the Word class."""
1361    whiteStrs = {
1362        " " : "<SPC>",
1363        "\t": "<TAB>",
1364        "\n": "<LF>",
1365        "\r": "<CR>",
1366        "\f": "<FF>",
1367        }
1368    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1369        super(White,self).__init__()
1370        self.matchWhite = ws
1371        self.whiteChars = "".join([c for c in self.whiteChars if c not in self.matchWhite])
1372        #~ self.leaveWhitespace()
1373        self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1374        self.mayReturnEmpty = True
1375        self.errmsg = "Expected " + self.name
1376        self.myException.msg = self.errmsg
1377
1378        self.minLen = min
1379
1380        if max > 0:
1381            self.maxLen = max
1382        else:
1383            self.maxLen = sys.maxint
1384
1385        if exact > 0:
1386            self.maxLen = exact
1387            self.minLen = exact
1388           
1389    def parseImpl( self, instring, loc, doActions=True ):
1390        if not(instring[ loc ] in self.matchWhite):
1391            #~ raise ParseException, ( instring, loc, self.errmsg )
1392            exc = self.myException
1393            exc.loc = loc
1394            exc.pstr = instring
1395            raise exc
1396        start = loc
1397        loc += 1
1398        maxloc = start + self.maxLen
1399        maxloc = min( maxloc, len(instring) )
1400        while loc < maxloc and instring[loc] in self.matchWhite:
1401            loc += 1
1402
1403        if loc - start < self.minLen:
1404            #~ raise ParseException, ( instring, loc, self.errmsg )
1405            exc = self.myException
1406            exc.loc = loc
1407            exc.pstr = instring
1408            raise exc
1409
1410        return loc, instring[start:loc]
1411
1412
1413class PositionToken(Token):
1414    def __init__( self ):
1415        super(PositionToken,self).__init__()
1416        self.name=self.__class__.__name__
1417        self.mayReturnEmpty = True
1418
1419class GoToColumn(PositionToken):
1420    """Token to advance to a specific column of input text; useful for tabular report scraping."""
1421    def __init__( self, colno ):
1422        super(GoToColumn,self).__init__()
1423        self.col = colno
1424
1425    def preParse( self, instring, loc ):
1426        if col(loc,instring) != self.col:
1427            instrlen = len(instring)
1428            if self.ignoreExprs:
1429                loc = self.skipIgnorables( instring, loc )
1430            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1431                loc += 1
1432        return loc
1433
1434    def parseImpl( self, instring, loc, doActions=True ):
1435        thiscol = col( loc, instring )
1436        if thiscol > self.col:
1437            raise ParseException, ( instring, loc, "Text not in expected column", self )
1438        newloc = loc + self.col - thiscol
1439        ret = instring[ loc: newloc ]
1440        return newloc, ret
1441
1442class LineStart(PositionToken):
1443    """Matches if current position is at the beginning of a line within the parse string"""
1444    def __init__( self ):
1445        super(LineStart,self).__init__()
1446        self.whiteChars = " \t"
1447        self.errmsg = "Expected start of line"
1448        self.myException.msg = self.errmsg
1449
1450    def preParse( self, instring, loc ):
1451        loc = super(LineStart,self).preParse(instring,loc)
1452        if instring[loc] == "\n":
1453            loc += 1
1454        return loc
1455
1456    def parseImpl( self, instring, loc, doActions=True ):
1457        if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1:
1458            #~ raise ParseException, ( instring, loc, "Expected start of line" )
1459            exc = self.myException
1460            exc.loc = loc
1461            exc.pstr = instring
1462            raise exc
1463        return loc, []
1464
1465class LineEnd(PositionToken):
1466    """Matches if current position is at the end of a line within the parse string"""
1467    def __init__( self ):
1468        super(LineEnd,self).__init__()
1469        self.whiteChars = " \t"
1470        self.errmsg = "Expected end of line"
1471        self.myException.msg = self.errmsg
1472   
1473    def parseImpl( self, instring, loc, doActions=True ):
1474        if loc<len(instring):
1475            if instring[loc] == "\n":
1476                return loc+1, "\n"
1477            else:
1478                #~ raise ParseException, ( instring, loc, "Expected end of line" )
1479                exc = self.myException
1480                exc.loc = loc
1481                exc.pstr = instring
1482                raise exc
1483        else:
1484            return loc, []
1485
1486class StringStart(PositionToken):
1487    """Matches if current position is at the beginning of the parse string"""
1488    def __init__( self ):
1489        super(StringStart,self).__init__()
1490        self.errmsg = "Expected start of text"
1491        self.myException.msg = self.errmsg
1492   
1493    def parseImpl( self, instring, loc, doActions=True ):
1494        if loc != 0:
1495            # see if entire string up to here is just whitespace and ignoreables
1496            if loc != self.preParse( instring, 0 ):
1497                #~ raise ParseException, ( instring, loc, "Expected start of text" )
1498                exc = self.myException
1499                exc.loc = loc
1500                exc.pstr = instring
1501                raise exc
1502        return loc, []
1503
1504class StringEnd(PositionToken):
1505    """Matches if current position is at the end of the parse string"""
1506    def __init__( self ):
1507        super(StringEnd,self).__init__()
1508        self.errmsg = "Expected end of text"
1509        self.myException.msg = self.errmsg
1510   
1511    def parseImpl( self, instring, loc, doActions=True ):
1512        if loc < len(instring):
1513            #~ raise ParseException, ( instring, loc, "Expected end of text" )
1514            exc = self.myException
1515            exc.loc = loc
1516            exc.pstr = instring
1517            raise exc
1518        return loc, []
1519
1520
1521class ParseExpression(ParserElement):
1522    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1523    def __init__( self, exprs, savelist = False ):
1524        super(ParseExpression,self).__init__(savelist)
1525        if isinstance( exprs, list ):
1526            self.exprs = exprs
1527        elif isinstance( exprs, basestring ):
1528            self.exprs = [ Literal( exprs ) ]
1529        else:
1530            self.exprs = [ exprs ]
1531
1532    def __getitem__( self, i ):
1533        return self.exprs[i]
1534
1535    def append( self, other ):
1536        self.exprs.append( other )
1537        self.strRepr = None
1538        return self
1539
1540    def leaveWhitespace( self ):
1541        """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1542           all contained expressions."""
1543        self.skipWhitespace = False
1544        self.exprs = [ copy.copy(e) for e in self.exprs ]
1545        for e in self.exprs:
1546            e.leaveWhitespace()
1547        return self
1548
1549    def ignore( self, other ):
1550        if isinstance( other, Suppress ):
1551            if other not in self.ignoreExprs:
1552                super( ParseExpression, self).ignore( other )
1553                for e in self.exprs:
1554                    e.ignore( self.ignoreExprs[-1] )
1555        else:
1556            super( ParseExpression, self).ignore( other )
1557            for e in self.exprs:
1558                e.ignore( self.ignoreExprs[-1] )
1559        return self
1560
1561    def __str__( self ):
1562        try:
1563            return super(ParseExpression,self).__str__()
1564        except:
1565            pass
1566           
1567        if self.strRepr is None:
1568            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1569        return self.strRepr
1570
1571    def streamline( self ):
1572        super(ParseExpression,self).streamline()
1573
1574        for e in self.exprs:
1575            e.streamline()
1576
1577        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1578        # but only if there are no parse actions or resultsNames on the nested And's
1579        # (likewise for Or's and MatchFirst's)
1580        if ( len(self.exprs) == 2 ):
1581            other = self.exprs[0]
1582            if ( isinstance( other, self.__class__ ) and
1583                  other.parseAction is None and
1584                  other.resultsName is None and
1585                  not other.debug ):
1586                self.exprs = other.exprs[:] + [ self.exprs[1] ]
1587                self.strRepr = None
1588
1589            other = self.exprs[-1]
1590            if ( isinstance( other, self.__class__ ) and
1591                  other.parseAction is None and
1592                  other.resultsName is None and
1593                  not other.debug ):
1594                self.exprs = self.exprs[:-1] + other.exprs[:]
1595                self.strRepr = None
1596
1597        return self
1598
1599    def setResultsName( self, name, listAllMatches=False ):
1600        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1601        #~ ret.saveAsList = True
1602        return ret
1603   
1604    def validate( self, validateTrace=[] ):
1605        tmp = validateTrace[:]+[self]
1606        for e in self.exprs:
1607            e.validate(tmp)
1608        self.checkRecursion( [] )
1609
1610
1611class And(ParseExpression):
1612    """Requires all given ParseExpressions to be found in the given order.
1613       Expressions may be separated by whitespace.
1614       May be constructed using the '+' operator.
1615    """
1616    def __init__( self, exprs, savelist = True ):
1617        super(And,self).__init__(exprs, savelist)
1618        self.mayReturnEmpty = True
1619        for e in exprs:
1620            if not e.mayReturnEmpty:
1621                self.mayReturnEmpty = False
1622                break
1623        self.skipWhitespace = exprs[0].skipWhitespace
1624        self.whiteChars = exprs[0].whiteChars
1625
1626    def parseImpl( self, instring, loc, doActions=True ):
1627        loc, resultlist = self.exprs[0].parse( instring, loc, doActions )
1628        for e in self.exprs[1:]:
1629            loc, exprtokens = e.parse( instring, loc, doActions )
1630            if exprtokens or exprtokens.keys():
1631                resultlist += exprtokens
1632        return loc, resultlist
1633
1634    def __iadd__(self, other ):
1635        if isinstance( other, basestring ):
1636            other = Literal( other )
1637        return self.append( other ) #And( [ self, other ] )
1638       
1639    def checkRecursion( self, parseElementList ):
1640        subRecCheckList = parseElementList[:] + [ self ]
1641        for e in self.exprs:
1642            e.checkRecursion( subRecCheckList )
1643            if not e.mayReturnEmpty:
1644                break
1645               
1646    def __str__( self ):
1647        if hasattr(self,"name"):
1648            return self.name
1649           
1650        if self.strRepr is None:
1651            self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1652       
1653        return self.strRepr
1654   
1655
1656class Or(ParseExpression):
1657    """Requires that at least one ParseExpression is found.
1658       If two expressions match, the expression that matches the longest string will be used.
1659       May be constructed using the '^' operator.
1660    """
1661    def __init__( self, exprs, savelist = False ):
1662        super(Or,self).__init__(exprs, savelist)
1663        self.mayReturnEmpty = False
1664        for e in exprs:
1665            if e.mayReturnEmpty:
1666                self.mayReturnEmpty = True
1667                break
1668   
1669    def parseImpl( self, instring, loc, doActions=True ):
1670        maxExcLoc = -1
1671        maxMatchLoc = -1
1672        for e in self.exprs:
1673            try:
1674                loc2 = e.tryParse( instring, loc )
1675            except ParseException, err:
1676                if err.loc > maxExcLoc:
1677                    maxException = err
1678                    maxExcLoc = err.loc
1679            except IndexError, err:
1680                if len(instring) > maxExcLoc:
1681                    maxException = ParseException(instring,len(instring),e.errmsg,self)
1682                    maxExcLoc = len(instring)
1683            else:
1684                if loc2 > maxMatchLoc:
1685                    maxMatchLoc = loc2
1686                    maxMatchExp = e
1687       
1688        if maxMatchLoc < 0:
1689            if self.exprs:
1690                raise maxException
1691            else:
1692                raise ParseException(instring, loc, "no defined alternatives to match", self)
1693
1694        return maxMatchExp.parse( instring, loc, doActions )
1695
1696    def __ixor__(self, other ):
1697        if isinstance( other, basestring ):
1698            other = Literal( other )
1699        return self.append( other ) #Or( [ self, other ] )
1700
1701    def __str__( self ):
1702        if hasattr(self,"name"):
1703            return self.name
1704           
1705        if self.strRepr is None:
1706            self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1707       
1708        return self.strRepr
1709   
1710    def checkRecursion( self, parseElementList ):
1711        subRecCheckList = parseElementList[:] + [ self ]
1712        for e in self.exprs:
1713            e.checkRecursion( subRecCheckList )
1714
1715
1716class MatchFirst(ParseExpression):
1717    """Requires that at least one ParseExpression is found.
1718       If two expressions match, the first one listed is the one that will match.
1719       May be constructed using the '|' operator.
1720    """
1721    def __init__( self, exprs, savelist = False ):
1722        super(MatchFirst,self).__init__(exprs, savelist)
1723        if exprs:
1724            self.mayReturnEmpty = False
1725            for e in exprs:
1726                if e.mayReturnEmpty:
1727                    self.mayReturnEmpty = True
1728                    break
1729        else:
1730            self.mayReturnEmpty = True
1731   
1732    def parseImpl( self, instring, loc, doActions=True ):
1733        maxExcLoc = -1
1734        for e in self.exprs:
1735            try:
1736                ret = e.parse( instring, loc, doActions )
1737                return ret
1738            except ParseException, err:
1739                if err.loc > maxExcLoc:
1740                    maxException = err
1741                    maxExcLoc = err.loc
1742            except IndexError, err:
1743                if len(instring) > maxExcLoc:
1744                    maxException = ParseException(instring,len(instring),e.errmsg,self)
1745                    maxExcLoc = len(instring)
1746
1747        # only got here if no expression matched, raise exception for match that made it the furthest
1748        else:
1749            if self.exprs:
1750                raise maxException
1751            else:
1752                raise ParseException(instring, loc, "no defined alternatives to match", self)
1753
1754    def __ior__(self, other ):
1755        if isinstance( other, basestring ):
1756            other = Literal( other )
1757        return self.append( other ) #MatchFirst( [ self, other ] )
1758
1759    def __str__( self ):
1760        if hasattr(self,"name"):
1761            return self.name
1762           
1763        if self.strRepr is None:
1764            self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1765       
1766        return self.strRepr
1767   
1768    def checkRecursion( self, parseElementList ):
1769        subRecCheckList = parseElementList[:] + [ self ]
1770        for e in self.exprs:
1771            e.checkRecursion( subRecCheckList )
1772
1773class Each(ParseExpression):
1774    """Requires all given ParseExpressions to be found, but in any order.
1775       Expressions may be separated by whitespace.
1776       May be constructed using the '&' operator.
1777    """
1778    def __init__( self, exprs, savelist = True ):
1779        super(Each,self).__init__(exprs, savelist)
1780        self.mayReturnEmpty = True
1781        for e in exprs:
1782            if not e.mayReturnEmpty:
1783                self.mayReturnEmpty = False
1784                break
1785        self.skipWhitespace = True
1786        self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
1787        self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
1788        self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
1789        self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
1790        self.required += self.multirequired
1791
1792    def parseImpl( self, instring, loc, doActions=True ):
1793        tmpLoc = loc
1794        tmpReqd = self.required[:]
1795        tmpOpt  = self.optionals[:]
1796        matchOrder = []
1797
1798        keepMatching = True
1799        while keepMatching:
1800            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
1801            failed = []
1802            for e in tmpExprs:
1803                try:
1804                    tmpLoc = e.tryParse( instring, tmpLoc )
1805                except ParseException:
1806                    failed.append(e)
1807                else:
1808                    matchOrder.append(e)
1809                    if e in tmpReqd:
1810                        tmpReqd.remove(e)
1811                    elif e in tmpOpt:
1812                        tmpOpt.remove(e)
1813            if len(failed) == len(tmpExprs):
1814                keepMatching = False
1815       
1816        if tmpReqd:
1817            missing = ", ".join( [ str(e) for e in tmpReqd ] )
1818            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
1819
1820        resultlist = []
1821        for e in matchOrder:
1822            loc,results = e.parse(instring,loc,doActions)
1823            resultlist.append(results)
1824           
1825        finalResults = ParseResults([])
1826        for r in resultlist:
1827            dups = {}
1828            for k in r.keys():
1829                if k in finalResults.keys():
1830                    tmp = ParseResults(finalResults[k])
1831                    tmp += ParseResults(r[k])
1832                    dups[k] = tmp
1833            finalResults += ParseResults(r)
1834            for k,v in dups.items():
1835                finalResults[k] = v
1836        return loc, finalResults
1837
1838    def __str__( self ):
1839        if hasattr(self,"name"):
1840            return self.name
1841           
1842        if self.strRepr is None:
1843            self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
1844       
1845        return self.strRepr
1846   
1847    def checkRecursion( self, parseElementList ):
1848        subRecCheckList = parseElementList[:] + [ self ]
1849        for e in self.exprs:
1850            e.checkRecursion( subRecCheckList )
1851
1852
1853class ParseElementEnhance(ParserElement):
1854    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1855    def __init__( self, expr, savelist=False ):
1856        super(ParseElementEnhance,self).__init__(savelist)
1857        if isinstance( expr, basestring ):
1858            expr = Literal(expr)
1859        self.expr = expr
1860        self.strRepr = None
1861        if expr is not None:
1862            self.mayIndexError = expr.mayIndexError
1863            self.skipWhitespace = expr.skipWhitespace
1864            self.whiteChars = expr.whiteChars
1865
1866    def parseImpl( self, instring, loc, doActions=True ):
1867        if self.expr is not None:
1868            return self.expr.parse( instring, loc, doActions )
1869        else:
1870            raise ParseException(instring,loc,"",self)
1871
1872    def leaveWhitespace( self ):
1873        self.skipWhitespace = False
1874        self.expr = copy.copy(self.expr)
1875        if self.expr is not None:
1876            self.expr.leaveWhitespace()
1877        return self
1878
1879    def ignore( self, other ):
1880        if isinstance( other, Suppress ):
1881            if other not in self.ignoreExprs:
1882                super( ParseElementEnhance, self).ignore( other )
1883                if self.expr is not None:
1884                    self.expr.ignore( self.ignoreExprs[-1] )
1885        else:
1886            super( ParseElementEnhance, self).ignore( other )
1887            if self.expr is not None:
1888                self.expr.ignore( self.ignoreExprs[-1] )
1889        return self
1890
1891    def streamline( self ):
1892        super(ParseElementEnhance,self).streamline()
1893        if self.expr is not None:
1894            self.expr.streamline()
1895        return self
1896
1897    def checkRecursion( self, parseElementList ):
1898        if self in parseElementList:
1899            raise RecursiveGrammarException( parseElementList+[self] )
1900        subRecCheckList = parseElementList[:] + [ self ]
1901        if self.expr is not None:
1902            self.expr.checkRecursion( subRecCheckList )
1903       
1904    def validate( self, validateTrace=[] ):
1905        tmp = validateTrace[:]+[self]
1906        if self.expr is not None:
1907            self.expr.validate(tmp)
1908        self.checkRecursion( [] )
1909   
1910    def __str__( self ):
1911        try:
1912            return super(ParseElementEnhance,self).__str__()
1913        except:
1914            pass
1915           
1916        if self.strRepr is None and self.expr is not None:
1917            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
1918        return self.strRepr
1919
1920
1921class FollowedBy(ParseElementEnhance):
1922    """Lookahead matching of the given parse expression.  FollowedBy
1923    does *not* advance the parsing position within the input string, it only
1924    verifies that the specified parse expression matches at the current
1925    position.  FollowedBy always returns a null token list."""
1926    def __init__( self, expr ):
1927        super(FollowedBy,self).__init__(expr)
1928        self.mayReturnEmpty = True
1929       
1930    def parseImpl( self, instring, loc, doActions=True ):
1931        self.expr.tryParse( instring, loc )
1932        return loc, []
1933
1934
1935class NotAny(ParseElementEnhance):
1936    """Lookahead to disallow matching with the given parse expression.  NotAny
1937    does *not* advance the parsing position within the input string, it only
1938    verifies that the specified parse expression does *not* match at the current
1939    position.  Also, NotAny does *not* skip over leading whitespace. NotAny
1940    always returns a null token list.  May be constructed using the '~' operator."""
1941    def __init__( self, expr ):
1942        super(NotAny,self).__init__(expr)
1943        #~ self.leaveWhitespace()
1944        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
1945        self.mayReturnEmpty = True
1946        self.errmsg = "Found unexpected token, "+_ustr(self.expr)
1947        self.myException = ParseException("",0,self.errmsg,self)
1948       
1949    def parseImpl( self, instring, loc, doActions=True ):
1950        try:
1951            self.expr.tryParse( instring, loc )
1952        except (ParseException,IndexError):
1953            pass
1954        else:
1955            #~ raise ParseException, (instring, loc, self.errmsg )
1956            exc = self.myException
1957            exc.loc = loc
1958            exc.pstr = instring
1959            raise exc
1960        return loc, []
1961
1962    def __str__( self ):
1963        if hasattr(self,"name"):
1964            return self.name
1965           
1966        if self.strRepr is None:
1967            self.strRepr = "~{" + _ustr(self.expr) + "}"
1968       
1969        return self.strRepr
1970
1971
1972class ZeroOrMore(ParseElementEnhance):
1973    """Optional repetition of zero or more of the given expression."""
1974    def __init__( self, expr ):
1975        super(ZeroOrMore,self).__init__(expr)
1976        self.mayReturnEmpty = True
1977   
1978    def parseImpl( self, instring, loc, doActions=True ):
1979        tokens = []
1980        try:
1981            loc, tokens = self.expr.parse( instring, loc, doActions )
1982            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
1983            while 1:
1984                if hasIgnoreExprs:
1985                    loc = self.skipIgnorables( instring, loc )
1986                loc, tmptokens = self.expr.parse( instring, loc, doActions )
1987                if tmptokens or tmptokens.keys():
1988                    tokens += tmptokens
1989        except (ParseException,IndexError):
1990            pass
1991
1992        return loc, tokens
1993
1994    def __str__( self ):
1995        if hasattr(self,"name"):
1996            return self.name
1997           
1998        if self.strRepr is None:
1999            self.strRepr = "[" + _ustr(self.expr) + "]..."
2000       
2001        return self.strRepr
2002   
2003    def setResultsName( self, name, listAllMatches=False ):
2004        ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2005        ret.saveAsList = True
2006        return ret
2007   
2008
2009class OneOrMore(ParseElementEnhance):
2010    """Repetition of one or more of the given expression."""
2011    def parseImpl( self, instring, loc, doActions=True ):
2012        # must be at least one
2013        loc, tokens = self.expr.parse( instring, loc, doActions )
2014        try:
2015            hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2016            while 1:
2017                if hasIgnoreExprs:
2018                    loc = self.skipIgnorables( instring, loc )
2019                loc, tmptokens = self.expr.parse( instring, loc, doActions )
2020                if tmptokens or tmptokens.keys():
2021                    tokens += tmptokens
2022        except (ParseException,IndexError):
2023            pass
2024
2025        return loc, tokens
2026
2027    def __str__( self ):
2028        if hasattr(self,"name"):
2029            return self.name
2030           
2031        if self.strRepr is None:
2032            self.strRepr = "{" + _ustr(self.expr) + "}..."
2033       
2034        return self.strRepr
2035   
2036    def setResultsName( self, name, listAllMatches=False ):
2037        ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2038        ret.saveAsList = True
2039        return ret
2040   
2041
2042class Optional(ParseElementEnhance):
2043    """Optional matching of the given expression.
2044       A default return string can also be specified, if the optional expression
2045       is not found.
2046    """
2047    def __init__( self, exprs, default=None ):
2048        super(Optional,self).__init__( exprs, savelist=False )
2049        self.defaultValue = default
2050        self.mayReturnEmpty = True
2051
2052    def parseImpl( self, instring, loc, doActions=True ):
2053        try:
2054            loc, tokens = self.expr.parse( instring, loc, doActions )
2055        except (ParseException,IndexError):
2056            if self.defaultValue is not None:
2057                tokens = [ self.defaultValue ]
2058            else:
2059                tokens = []
2060
2061        return loc, tokens
2062
2063    def __str__( self ):
2064        if hasattr(self,"name"):
2065            return self.name
2066           
2067        if self.strRepr is None:
2068            self.strRepr = "[" + _ustr(self.expr) + "]"
2069       
2070        return self.strRepr
2071
2072
2073class SkipTo(ParseElementEnhance):
2074    """Token for skipping over all undefined text until the matched expression is found.
2075       If include is set to true, the matched expression is also consumed.  The ignore
2076       argument is used to define grammars (typically quoted strings and comments) that
2077       might contain false matches.
2078    """
2079    def __init__( self, other, include=False, ignore=None ):
2080        super( SkipTo, self ).__init__( other )
2081        if ignore is not None:
2082            self.expr = copy.copy( self.expr )
2083            self.expr.ignore(ignore)
2084        self.mayReturnEmpty = True
2085        self.mayIndexError = False
2086        self.includeMatch = include
2087        self.errmsg = "No match found for "+_ustr(self.expr)
2088        self.myException = ParseException("",0,self.errmsg,self)
2089
2090    def parseImpl( self, instring, loc, doActions=True ):
2091        startLoc = loc
2092        instrlen = len(instring)
2093        expr = self.expr
2094        while loc < instrlen:
2095            try:
2096                loc = expr.skipIgnorables( instring, loc )
2097                expr.parse( instring, loc, doActions=False, callPreParse=False )
2098                if self.includeMatch:
2099                    skipText = instring[startLoc:loc]
2100                    loc,mat = expr.parse(instring,loc)
2101                    if mat:
2102                        return loc, [ skipText, mat ]
2103                    else:
2104                        return loc, [ skipText ]
2105                else:
2106                    return loc, [ instring[startLoc:loc] ]
2107            except (ParseException,IndexError):
2108                loc += 1
2109        exc = self.myException
2110        exc.loc = loc
2111        exc.pstr = instring
2112        raise exc
2113
2114class Forward(ParseElementEnhance):
2115    """Forward declaration of an expression to be defined later -
2116       used for recursive grammars, such as algebraic infix notation.
2117       When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2118       
2119       Note: take care when assigning to Forward to not overlook precedence of operators.
2120       Specifically, '|' has a lower precedence than '<<', so that::
2121          fwdExpr << a | b | c
2122       will actually be evaluated as::
2123          (fwdExpr << a) | b | c
2124       thereby leaving b and c out as parseable alternatives.  It is recommended that you
2125       explicitly group the values inserted into the Forward::
2126          fwdExpr << (a | b | c)
2127    """
2128    def __init__( self, other=None ):
2129        super(Forward,self).__init__( other, savelist=False )
2130
2131    def __lshift__( self, other ):
2132        self.expr = other
2133        self.mayReturnEmpty = other.mayReturnEmpty
2134        self.strRepr = None
2135        return self
2136
2137    def leaveWhitespace( self ):
2138        self.skipWhitespace = False
2139        return self
2140
2141    def streamline( self ):
2142        if not self.streamlined:
2143            self.streamlined = True
2144            if self.expr is not None:
2145                self.expr.streamline()
2146        return self
2147
2148    def validate( self, validateTrace=[] ):
2149        if self not in validateTrace:
2150            tmp = validateTrace[:]+[self]
2151            if self.expr is not None:
2152                self.expr.validate(tmp)
2153        self.checkRecursion([])       
2154       
2155    def __str__( self ):
2156        if hasattr(self,"name"):
2157            return self.name
2158
2159        self.__class__ = _ForwardNoRecurse
2160        try:
2161            if self.expr is not None:
2162                retString = _ustr(self.expr)
2163            else:
2164                retString = "None"
2165        finally:
2166            self.__class__ = Forward
2167        return "Forward: "+retString
2168
2169class _ForwardNoRecurse(Forward):
2170    def __str__( self ):
2171        return "..."
2172       
2173class TokenConverter(ParseElementEnhance):
2174    """Abstract subclass of ParseExpression, for converting parsed results."""
2175    def __init__( self, expr, savelist=False ):
2176        super(TokenConverter,self).__init__( expr )#, savelist )
2177
2178
2179class Upcase(TokenConverter):
2180    """Converter to upper case all matching tokens."""
2181    def __init__(self, *args):
2182        super(Upcase,self).__init__(*args)
2183        warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2184                       DeprecationWarning,stacklevel=2)
2185   
2186    def postParse( self, instring, loc, tokenlist ):
2187        return map( string.upper, tokenlist )
2188
2189
2190class Combine(TokenConverter):
2191    """Converter to concatenate all matching tokens to a single string.
2192       By default, the matching patterns must also be contiguous in the input string;
2193       this can be disabled by specifying 'adjacent=False' in the constructor.
2194    """
2195    def __init__( self, expr, joinString="", adjacent=True ):
2196        super(Combine,self).__init__( expr )
2197        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2198        if adjacent:
2199            self.leaveWhitespace()
2200        self.adjacent = adjacent
2201        self.skipWhitespace = True
2202        self.joinString = joinString
2203
2204    def ignore( self, other ):
2205        if self.adjacent:
2206            ParserElement.ignore(self, other)
2207        else:
2208            super( Combine, self).ignore( other )
2209        return self
2210
2211    def postParse( self, instring, loc, tokenlist ):
2212        retToks = tokenlist.copy()
2213        del retToks[:]
2214        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2215
2216        if self.resultsName and len(retToks.keys())>0:
2217            return [ retToks ]
2218        else:
2219            return retToks
2220
2221class Group(TokenConverter):
2222    """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2223    def __init__( self, expr ):
2224        super(Group,self).__init__( expr )
2225        self.saveAsList = True
2226
2227    def postParse( self, instring, loc, tokenlist ):
2228        return [ tokenlist ]
2229       
2230class Dict(TokenConverter):
2231    """Converter to return a repetitive expression as a list, but also as a dictionary.
2232       Each element can also be referenced using the first token in the expression as its key.
2233       Useful for tabular report scraping when the first column can be used as a item key.
2234    """
2235    def __init__( self, exprs ):
2236        super(Dict,self).__init__( exprs )
2237        self.saveAsList = True
2238
2239    def postParse( self, instring, loc, tokenlist ):
2240        for i,tok in enumerate(tokenlist):
2241            ikey = _ustr(tok[0]).strip()
2242            if len(tok)==1:
2243                tokenlist[ikey] = ("",i)
2244            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2245                tokenlist[ikey] = (tok[1],i)
2246            else:
2247                dictvalue = tok.copy() #ParseResults(i)
2248                del dictvalue[0]
2249                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2250                    tokenlist[ikey] = (dictvalue,i)
2251                else:
2252                    tokenlist[ikey] = (dictvalue[0],i)
2253
2254        if self.resultsName:
2255            return [ tokenlist ]
2256        else:
2257            return tokenlist
2258
2259
2260class Suppress(TokenConverter):
2261    """Converter for ignoring the results of a parsed expression."""
2262    def postParse( self, instring, loc, tokenlist ):
2263        return []
2264   
2265    def suppress( self ):
2266        return self
2267
2268#
2269# global helpers
2270#
2271def delimitedList( expr, delim=",", combine=False ):
2272    """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2273       By default, the list elements and delimiters can have intervening whitespace, and
2274       comments, but this can be overridden by passing 'combine=True' in the constructor.
2275       If combine is set to True, the matching tokens are returned as a single token
2276       string, with the delimiters included; otherwise, the matching tokens are returned
2277       as a list of tokens, with the delimiters suppressed.
2278    """
2279    if combine:
2280        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2281    else:
2282        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2283
2284def _escapeRegexRangeChars(s):
2285    #~  escape these chars: ^-]
2286    for c in r"\^-]":
2287        s = s.replace(c,"\\"+c)
2288    s = s.replace("\n",r"\n")
2289    s = s.replace("\t",r"\t")
2290    return _ustr(s)
2291   
2292def oneOf( strs, caseless=False, useRegex=True ):
2293    """Helper to quickly define a set of alternative Literals, and makes sure to do
2294       longest-first testing when there is a conflict, regardless of the input order,
2295       but returns a MatchFirst for best performance. 
2296       
2297       Parameters:
2298        - strs - a string of space-delimited literals, or a list of string literals
2299        - caseless - (default=False) - treat all literals as caseless
2300        - useRegex - (default=True) - as an optimization, will generate a Regex
2301          object; otherwise, will generate a MatchFirst object (if caseless=True, or
2302          if creating a Regex raises an exception)
2303    """
2304    if caseless:
2305        isequal = ( lambda a,b: a.upper() == b.upper() )
2306        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2307        parseElementClass = CaselessLiteral
2308    else:
2309        isequal = ( lambda a,b: a == b )
2310        masks = ( lambda a,b: b.startswith(a) )
2311        parseElementClass = Literal
2312   
2313    if isinstance(strs,list):
2314        symbols = strs[:]
2315    elif isinstance(strs,basestring):
2316        symbols = strs.split()
2317    else:
2318        warnings.warn("Invalid argument to oneOf, expected string or list",
2319                SyntaxWarning, stacklevel=2)
2320       
2321    i = 0
2322    while i < len(symbols)-1:
2323        cur = symbols[i]
2324        for j,other in enumerate(symbols[i+1:]):
2325            if ( isequal(other, cur) ):
2326                del symbols[i+j+1]
2327                break
2328            elif ( masks(cur, other) ):
2329                del symbols[i+j+1]
2330                symbols.insert(i,other)
2331                cur = other
2332                break
2333        else:
2334            i += 1
2335
2336    if not caseless and useRegex:
2337        #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2338        try:
2339            if len(symbols)==len("".join(symbols)):
2340                return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
2341            else:
2342                return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
2343        except:
2344            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
2345                    SyntaxWarning, stacklevel=2)
2346
2347
2348    # last resort, just use MatchFirst
2349    return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2350
2351def dictOf( key, value ):
2352    """Helper to easily and clearly define a dictionary by specifying the respective patterns
2353       for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
2354       in the proper order.  The key pattern can include delimiting markers or punctuation,
2355       as long as they are suppressed, thereby leaving the significant key text.  The value
2356       pattern can include named results, so that the Dict results can include named token
2357       fields.
2358    """
2359    return Dict( ZeroOrMore( Group ( key + value ) ) )
2360
2361_bslash = "\\"
2362printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2363
2364# convenience constants for positional expressions
2365empty       = Empty().setName("empty")
2366lineStart   = LineStart().setName("lineStart")
2367lineEnd     = LineEnd().setName("lineEnd")
2368stringStart = StringStart().setName("stringStart")
2369stringEnd   = StringEnd().setName("stringEnd")
2370
2371_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2372_printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
2373_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2374_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2375_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
2376_charRange = Group(_singleChar + Suppress("-") + _singleChar)
2377_reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
2378
2379_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2380       
2381def srange(s):
2382    r"""Helper to easily define string ranges for use in Word construction.  Borrows
2383       syntax from regexp '[]' string range definitions::
2384          srange("[0-9]")   -> "0123456789"
2385          srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
2386          srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2387       The input string must be enclosed in []'s, and the returned string is the expanded
2388       character set joined into a single string.
2389       The values enclosed in the []'s may be::
2390          a single character
2391          an escaped character with a leading backslash (such as \- or \])
2392          an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2393          an escaped octal character with a leading '\0' (\041, which is a '!' character)
2394          a range of any of the above, separated by a dash ('a-z', etc.)
2395          any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2396    """
2397    try:
2398        return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2399    except:
2400        return ""
2401
2402def replaceWith(replStr):
2403    """Helper method for common parse actions that simply return a literal value.  Especially
2404       useful when used with transformString().
2405    """
2406    def _replFunc(*args):
2407        return [replStr]
2408    return _replFunc
2409
2410def removeQuotes(s,l,t):
2411    """Helper parse action for removing quotation marks from parsed quoted strings.
2412       To use, add this parse action to quoted string using::
2413         quotedString.setParseAction( removeQuotes )
2414    """
2415    return t[0][1:-1]
2416
2417def upcaseTokens(s,l,t):
2418    """Helper parse action to convert tokens to upper case."""
2419    return map( str.upper, t )
2420
2421def downcaseTokens(s,l,t):
2422    """Helper parse action to convert tokens to lower case."""
2423    return map( str.lower, t )
2424
2425def _makeTags(tagStr, xml):
2426    """Internal helper to construct opening and closing tag expressions, given a tag name"""
2427    tagAttrName = Word(alphanums)
2428    if (xml):
2429        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
2430        openTag = Suppress("<") + Keyword(tagStr) + \
2431                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
2432                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2433    else:
2434        printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
2435        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
2436        openTag = Suppress("<") + Keyword(tagStr,caseless=True) + \
2437                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
2438                Suppress("=") + tagAttrValue ))) + \
2439                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2440    closeTag = Combine("</" + Keyword(tagStr,caseless=not xml) + ">")
2441   
2442    openTag = openTag.setResultsName("start"+"".join(tagStr.replace(":"," ").title().split())).setName("<%s>" % tagStr)
2443    closeTag = closeTag.setResultsName("end"+"".join(tagStr.replace(":"," ").title().split())).setName("</%s>" % tagStr)
2444   
2445    return openTag, closeTag
2446
2447def makeHTMLTags(tagStr):
2448    """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
2449    return _makeTags( tagStr, False )
2450
2451def makeXMLTags(tagStr):
2452    """Helper to construct opening and closing tag expressions for XML, given a tag name"""
2453    return _makeTags( tagStr, True )
2454
2455alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
2456
2457_escapedChar = Regex(r"\\.")
2458dblQuotedString = Regex(r'"([^"\n\r\\]|("")|(\\.))*"').setName("string enclosed in double quotes")
2459sglQuotedString = Regex(r"'([^'\n\r\\]|('')|(\\.))*'").setName("string enclosed in single quotes")
2460quotedString = Regex(r'''("([^"\n\r\\]|("")|(\\.))*")|('([^'\n\r\\]|('')|(\\.))*')''').setName("quotedString using single or double quotes")
2461
2462# it's easy to get these comment structures wrong - they're very common, so may as well make them available
2463cStyleComment = Regex(r"\/\*[\s\S]*?\*\/").setName("C style comment")
2464htmlComment = Regex(r"<!--[\s\S]*?-->")
2465restOfLine = Regex(r".*").leaveWhitespace()
2466dblSlashComment = Regex(r"\/\/.*").setName("// comment")
2467cppStyleComment = Regex(r"(\/\*[\s\S]*?\*\/)|(\/\/.*)").setName("C++ style comment")
2468javaStyleComment = cppStyleComment
2469pythonStyleComment = Regex(r"#.*").setName("Python style comment")
2470_noncomma = "".join( [ c for c in printables if c != "," ] )
2471_commasepitem = Combine(OneOrMore(Word(_noncomma) +
2472                                  Optional( Word(" \t") +
2473                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
2474commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
2475
2476
2477if __name__ == "__main__":
2478
2479    def test( teststring ):
2480        print teststring,"->",
2481        try:
2482            tokens = simpleSQL.parseString( teststring )
2483            tokenlist = tokens.asList()
2484            print tokenlist
2485            print "tokens = ",        tokens
2486            print "tokens.columns =", tokens.columns
2487            print "tokens.tables =",  tokens.tables
2488            print tokens.asXML("SQL",True)
2489        except ParseException, err:
2490            print err.line
2491            print " "*(err.column-1) + "^"
2492            print err
2493        print
2494
2495    selectToken    = CaselessLiteral( "select" )
2496    fromToken      = CaselessLiteral( "from" )
2497
2498    ident          = Word( alphas, alphanums + "_$" )
2499    columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2500    columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
2501    tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
2502    tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
2503    simpleSQL      = ( selectToken + \
2504                     ( '*' | columnNameList ).setResultsName( "columns" ) + \
2505                     fromToken + \
2506                     tableNameList.setResultsName( "tables" ) )
2507   
2508    test( "SELECT * from XYZZY, ABC" )
2509    test( "select * from SYS.XYZZY" )
2510    test( "Select A from Sys.dual" )
2511    test( "Select AA,BB,CC from Sys.dual" )
2512    test( "Select A, B, C from Sys.dual" )
2513    test( "Select A, B, C from Sys.dual" )
2514    test( "Xelect A, B, C from Sys.dual" )
2515    test( "Select A, B, C frox Sys.dual" )
2516    test( "Select" )
2517    test( "Select ^^^ frox Sys.dual" )
2518    test( "Select A, B, C from Sys.dual, Table2   " )
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。