# $Id: SourceReader.py,v 1.15 2007/04/03 01:57:42 tavis_rudd Exp $ """SourceReader class for Cheetah's Parser and CodeGenerator Meta-Data ================================================================================ Author: Tavis Rudd License: This software is released for unlimited distribution under the terms of the MIT license. See the LICENSE file. Version: $Revision: 1.15 $ Start Date: 2001/09/19 Last Revision Date: $Date: 2007/04/03 01:57:42 $ """ __author__ = "Tavis Rudd " __revision__ = "$Revision: 1.15 $"[11:-2] import re import sys EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)') EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)') ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search class Error(Exception): pass class SourceReader: def __init__(self, src, filename=None, breakPoint=None, encoding=None): ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway ## contributed to add unicode support to the reading of Cheetah source ## files with dynamically compiled templates. All the existing unit ## tests pass but, it needs more testing and some test cases of its ## own. My instinct is to move this up into the code that passes in the ## src string rather than leaving it here. As implemented here it ## forces all src strings to unicode, which IMO is not what we want. # if encoding is None: # # peek at the encoding in the first two lines # m = EOLZre.search(src) # pos = m.end() # if pos= self._BOLs[i] and pos <= self._EOLs[i]: return i def getRowCol(self, pos=None): if pos == None: pos = self._pos lineNum = self.lineNum(pos) BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum] return lineNum+1, pos-BOL+1 def getRowColLine(self, pos=None): if pos == None: pos = self._pos row, col = self.getRowCol(pos) return row, col, self.splitlines()[row-1] def getLine(self, pos): if pos == None: pos = self._pos lineNum = self.lineNum(pos) return self.splitlines()[lineNum] def pos(self): return self._pos def setPos(self, pos): self.checkPos(pos) self._pos = pos def validPos(self, pos): return pos <= self._breakPoint and pos >=0 def checkPos(self, pos): if not pos <= self._breakPoint: raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" + str(self._breakPoint-1) + ")" ) elif not pos >=0: raise Error("pos (" + str(pos) + ") is invalid: less than 0" ) def breakPoint(self): return self._breakPoint def setBreakPoint(self, pos): if pos > self._srcLen: raise Error("New breakpoint (" + str(pos) + ") is invalid: beyond the end of stream's source string (" + str(self._srcLen) + ")" ) elif not pos >= 0: raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" ) self._breakPoint = pos def setBookmark(self, name): self._bookmarks[name] = self._pos self._posTobookmarkMap[self._pos] = name def hasBookmark(self, name): return self._bookmarks.has_key(name) def gotoBookmark(self, name): if not self.hasBookmark(name): raise Error("Invalid bookmark (" + name + ") is invalid: does not exist") pos = self._bookmarks[name] if not self.validPos(pos): raise Error("Invalid bookmark (" + name + ', '+ str(pos) + ") is invalid: pos is out of range" ) self._pos = pos def atEnd(self): return self._pos >= self._breakPoint def atStart(self): return self._pos == 0 def peek(self, offset=0): self.checkPos(self._pos+offset) pos = self._pos + offset return self._src[pos] def getc(self): pos = self._pos if self.validPos(pos+1): self._pos += 1 return self._src[pos] def ungetc(self, c=None): if not self.atStart(): raise Error('Already at beginning of stream') self._pos -= 1 if not c==None: self._src[self._pos] = c def advance(self, offset=1): self.checkPos(self._pos + offset) self._pos += offset def rev(self, offset=1): self.checkPos(self._pos - offset) self._pos -= offset def read(self, offset): self.checkPos(self._pos + offset) start = self._pos self._pos += offset return self._src[start:self._pos] def readTo(self, to, start=None): self.checkPos(to) if start == None: start = self._pos self._pos = to return self._src[start:to] def readToEOL(self, start=None, gobble=True): EOLmatch = EOLZre.search(self.src(), self.pos()) if gobble: pos = EOLmatch.end() else: pos = EOLmatch.start() return self.readTo(to=pos, start=start) def find(self, it, pos=None): if pos == None: pos = self._pos return self._src.find(it, pos ) def startswith(self, it, pos=None): if self.find(it, pos) == self.pos(): return True else: return False def rfind(self, it, pos): if pos == None: pos = self._pos return self._src.rfind(it, pos) def findBOL(self, pos=None): if pos == None: pos = self._pos src = self.src() return max(src.rfind('\n',0,pos)+1, src.rfind('\r',0,pos)+1, 0) def findEOL(self, pos=None, gobble=False): if pos == None: pos = self._pos match = EOLZre.search(self.src(), pos) if gobble: return match.end() else: return match.start() def isLineClearToPos(self, pos=None): if pos == None: pos = self.pos() self.checkPos(pos) src = self.src() BOL = self.findBOL() return BOL == pos or src[BOL:pos].isspace() def matches(self, strOrRE): if isinstance(strOrRE, (str, unicode)): return self.startswith(strOrRE, pos=self.pos()) else: # assume an re object return strOrRE.match(self.src(), self.pos()) def matchWhiteSpace(self, WSchars=' \f\t'): return (not self.atEnd()) and self.peek() in WSchars def getWhiteSpace(self, max=None, WSchars=' \f\t'): if not self.matchWhiteSpace(WSchars): return '' start = self.pos() breakPoint = self.breakPoint() if max is not None: breakPoint = min(breakPoint, self.pos()+max) while self.pos() < breakPoint: self.advance() if not self.matchWhiteSpace(WSchars): break return self.src()[start:self.pos()] def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'): return self.atEnd() or not self.peek() in WSchars def getNonWhiteSpace(self, WSchars=' \f\t\n\r'): if not self.matchNonWhiteSpace(WSchars): return '' start = self.pos() while self.pos() < self.breakPoint(): self.advance() if not self.matchNonWhiteSpace(WSchars): break return self.src()[start:self.pos()]