[3] | 1 | # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $ |
---|
| 2 | """File utitilies for Python: |
---|
| 3 | |
---|
| 4 | Meta-Data |
---|
| 5 | ================================================================================ |
---|
| 6 | Author: Tavis Rudd <tavis@damnsimple.com> |
---|
| 7 | License: This software is released for unlimited distribution under the |
---|
| 8 | terms of the MIT license. See the LICENSE file. |
---|
| 9 | Version: $Revision: 1.12 $ |
---|
| 10 | Start Date: 2001/09/26 |
---|
| 11 | Last Revision Date: $Date: 2005/11/02 22:26:07 $ |
---|
| 12 | """ |
---|
| 13 | __author__ = "Tavis Rudd <tavis@damnsimple.com>" |
---|
| 14 | __revision__ = "$Revision: 1.12 $"[11:-2] |
---|
| 15 | |
---|
| 16 | |
---|
| 17 | from glob import glob |
---|
| 18 | import os |
---|
| 19 | from os import listdir |
---|
| 20 | import os.path |
---|
| 21 | import re |
---|
| 22 | from types import StringType |
---|
| 23 | from tempfile import mktemp |
---|
| 24 | |
---|
| 25 | def _escapeRegexChars(txt, |
---|
| 26 | escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')): |
---|
| 27 | return escapeRE.sub(r'\\\1' , txt) |
---|
| 28 | |
---|
| 29 | def findFiles(*args, **kw): |
---|
| 30 | """Recursively find all the files matching a glob pattern. |
---|
| 31 | |
---|
| 32 | This function is a wrapper around the FileFinder class. See its docstring |
---|
| 33 | for details about the accepted arguments, etc.""" |
---|
| 34 | |
---|
| 35 | return FileFinder(*args, **kw).files() |
---|
| 36 | |
---|
| 37 | def replaceStrInFiles(files, theStr, repl): |
---|
| 38 | |
---|
| 39 | """Replace all instances of 'theStr' with 'repl' for each file in the 'files' |
---|
| 40 | list. Returns a dictionary with data about the matches found. |
---|
| 41 | |
---|
| 42 | This is like string.replace() on a multi-file basis. |
---|
| 43 | |
---|
| 44 | This function is a wrapper around the FindAndReplace class. See its |
---|
| 45 | docstring for more details.""" |
---|
| 46 | |
---|
| 47 | pattern = _escapeRegexChars(theStr) |
---|
| 48 | return FindAndReplace(files, pattern, repl).results() |
---|
| 49 | |
---|
| 50 | def replaceRegexInFiles(files, pattern, repl): |
---|
| 51 | |
---|
| 52 | """Replace all instances of regex 'pattern' with 'repl' for each file in the |
---|
| 53 | 'files' list. Returns a dictionary with data about the matches found. |
---|
| 54 | |
---|
| 55 | This is like re.sub on a multi-file basis. |
---|
| 56 | |
---|
| 57 | This function is a wrapper around the FindAndReplace class. See its |
---|
| 58 | docstring for more details.""" |
---|
| 59 | |
---|
| 60 | return FindAndReplace(files, pattern, repl).results() |
---|
| 61 | |
---|
| 62 | |
---|
| 63 | ################################################## |
---|
| 64 | ## CLASSES |
---|
| 65 | |
---|
| 66 | class FileFinder: |
---|
| 67 | |
---|
| 68 | """Traverses a directory tree and finds all files in it that match one of |
---|
| 69 | the specified glob patterns.""" |
---|
| 70 | |
---|
| 71 | def __init__(self, rootPath, |
---|
| 72 | globPatterns=('*',), |
---|
| 73 | ignoreBasenames=('CVS','.svn'), |
---|
| 74 | ignoreDirs=(), |
---|
| 75 | ): |
---|
| 76 | |
---|
| 77 | self._rootPath = rootPath |
---|
| 78 | self._globPatterns = globPatterns |
---|
| 79 | self._ignoreBasenames = ignoreBasenames |
---|
| 80 | self._ignoreDirs = ignoreDirs |
---|
| 81 | self._files = [] |
---|
| 82 | |
---|
| 83 | self.walkDirTree(rootPath) |
---|
| 84 | |
---|
| 85 | def walkDirTree(self, dir='.', |
---|
| 86 | |
---|
| 87 | listdir=os.listdir, |
---|
| 88 | isdir=os.path.isdir, |
---|
| 89 | join=os.path.join, |
---|
| 90 | ): |
---|
| 91 | |
---|
| 92 | """Recursively walk through a directory tree and find matching files.""" |
---|
| 93 | processDir = self.processDir |
---|
| 94 | filterDir = self.filterDir |
---|
| 95 | |
---|
| 96 | pendingDirs = [dir] |
---|
| 97 | addDir = pendingDirs.append |
---|
| 98 | getDir = pendingDirs.pop |
---|
| 99 | |
---|
| 100 | while pendingDirs: |
---|
| 101 | dir = getDir() |
---|
| 102 | ## process this dir |
---|
| 103 | processDir(dir) |
---|
| 104 | |
---|
| 105 | ## and add sub-dirs |
---|
| 106 | for baseName in listdir(dir): |
---|
| 107 | fullPath = join(dir, baseName) |
---|
| 108 | if isdir(fullPath): |
---|
| 109 | if filterDir(baseName, fullPath): |
---|
| 110 | addDir( fullPath ) |
---|
| 111 | |
---|
| 112 | def filterDir(self, baseName, fullPath): |
---|
| 113 | |
---|
| 114 | """A hook for filtering out certain dirs. """ |
---|
| 115 | |
---|
| 116 | return not (baseName in self._ignoreBasenames or |
---|
| 117 | fullPath in self._ignoreDirs) |
---|
| 118 | |
---|
| 119 | def processDir(self, dir, glob=glob): |
---|
| 120 | extend = self._files.extend |
---|
| 121 | for pattern in self._globPatterns: |
---|
| 122 | extend( glob(os.path.join(dir, pattern)) ) |
---|
| 123 | |
---|
| 124 | def files(self): |
---|
| 125 | return self._files |
---|
| 126 | |
---|
| 127 | class _GenSubberFunc: |
---|
| 128 | |
---|
| 129 | """Converts a 'sub' string in the form that one feeds to re.sub (backrefs, |
---|
| 130 | groups, etc.) into a function that can be used to do the substitutions in |
---|
| 131 | the FindAndReplace class.""" |
---|
| 132 | |
---|
| 133 | backrefRE = re.compile(r'\\([1-9][0-9]*)') |
---|
| 134 | groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>') |
---|
| 135 | |
---|
| 136 | def __init__(self, replaceStr): |
---|
| 137 | self._src = replaceStr |
---|
| 138 | self._pos = 0 |
---|
| 139 | self._codeChunks = [] |
---|
| 140 | self.parse() |
---|
| 141 | |
---|
| 142 | def src(self): |
---|
| 143 | return self._src |
---|
| 144 | |
---|
| 145 | def pos(self): |
---|
| 146 | return self._pos |
---|
| 147 | |
---|
| 148 | def setPos(self, pos): |
---|
| 149 | self._pos = pos |
---|
| 150 | |
---|
| 151 | def atEnd(self): |
---|
| 152 | return self._pos >= len(self._src) |
---|
| 153 | |
---|
| 154 | def advance(self, offset=1): |
---|
| 155 | self._pos += offset |
---|
| 156 | |
---|
| 157 | def readTo(self, to, start=None): |
---|
| 158 | if start == None: |
---|
| 159 | start = self._pos |
---|
| 160 | self._pos = to |
---|
| 161 | if self.atEnd(): |
---|
| 162 | return self._src[start:] |
---|
| 163 | else: |
---|
| 164 | return self._src[start:to] |
---|
| 165 | |
---|
| 166 | ## match and get methods |
---|
| 167 | |
---|
| 168 | def matchBackref(self): |
---|
| 169 | return self.backrefRE.match(self.src(), self.pos()) |
---|
| 170 | |
---|
| 171 | def getBackref(self): |
---|
| 172 | m = self.matchBackref() |
---|
| 173 | self.setPos(m.end()) |
---|
| 174 | return m.group(1) |
---|
| 175 | |
---|
| 176 | def matchGroup(self): |
---|
| 177 | return self.groupRE.match(self.src(), self.pos()) |
---|
| 178 | |
---|
| 179 | def getGroup(self): |
---|
| 180 | m = self.matchGroup() |
---|
| 181 | self.setPos(m.end()) |
---|
| 182 | return m.group(1) |
---|
| 183 | |
---|
| 184 | ## main parse loop and the eat methods |
---|
| 185 | |
---|
| 186 | def parse(self): |
---|
| 187 | while not self.atEnd(): |
---|
| 188 | if self.matchBackref(): |
---|
| 189 | self.eatBackref() |
---|
| 190 | elif self.matchGroup(): |
---|
| 191 | self.eatGroup() |
---|
| 192 | else: |
---|
| 193 | self.eatStrConst() |
---|
| 194 | |
---|
| 195 | def eatStrConst(self): |
---|
| 196 | startPos = self.pos() |
---|
| 197 | while not self.atEnd(): |
---|
| 198 | if self.matchBackref() or self.matchGroup(): |
---|
| 199 | break |
---|
| 200 | else: |
---|
| 201 | self.advance() |
---|
| 202 | strConst = self.readTo(self.pos(), start=startPos) |
---|
| 203 | self.addChunk(repr(strConst)) |
---|
| 204 | |
---|
| 205 | def eatBackref(self): |
---|
| 206 | self.addChunk( 'm.group(' + self.getBackref() + ')' ) |
---|
| 207 | |
---|
| 208 | def eatGroup(self): |
---|
| 209 | self.addChunk( 'm.group("' + self.getGroup() + '")' ) |
---|
| 210 | |
---|
| 211 | def addChunk(self, chunk): |
---|
| 212 | self._codeChunks.append(chunk) |
---|
| 213 | |
---|
| 214 | ## code wrapping methods |
---|
| 215 | |
---|
| 216 | def codeBody(self): |
---|
| 217 | return ', '.join(self._codeChunks) |
---|
| 218 | |
---|
| 219 | def code(self): |
---|
| 220 | return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody()) |
---|
| 221 | |
---|
| 222 | def subberFunc(self): |
---|
| 223 | exec self.code() |
---|
| 224 | return subber |
---|
| 225 | |
---|
| 226 | |
---|
| 227 | class FindAndReplace: |
---|
| 228 | |
---|
| 229 | """Find and replace all instances of 'patternOrRE' with 'replacement' for |
---|
| 230 | each file in the 'files' list. This is a multi-file version of re.sub(). |
---|
| 231 | |
---|
| 232 | 'patternOrRE' can be a raw regex pattern or |
---|
| 233 | a regex object as generated by the re module. 'replacement' can be any |
---|
| 234 | string that would work with patternOrRE.sub(replacement, fileContents). |
---|
| 235 | """ |
---|
| 236 | |
---|
| 237 | def __init__(self, files, patternOrRE, replacement, |
---|
| 238 | recordResults=True): |
---|
| 239 | |
---|
| 240 | |
---|
| 241 | if type(patternOrRE) == StringType: |
---|
| 242 | self._regex = re.compile(patternOrRE) |
---|
| 243 | else: |
---|
| 244 | self._regex = patternOrRE |
---|
| 245 | if type(replacement) == StringType: |
---|
| 246 | self._subber = _GenSubberFunc(replacement).subberFunc() |
---|
| 247 | else: |
---|
| 248 | self._subber = replacement |
---|
| 249 | |
---|
| 250 | self._pattern = pattern = self._regex.pattern |
---|
| 251 | self._files = files |
---|
| 252 | self._results = {} |
---|
| 253 | self._recordResults = recordResults |
---|
| 254 | |
---|
| 255 | ## see if we should use pgrep to do the file matching |
---|
| 256 | self._usePgrep = False |
---|
| 257 | if (os.popen3('pgrep')[2].read()).startswith('Usage:'): |
---|
| 258 | ## now check to make sure pgrep understands the pattern |
---|
| 259 | tmpFile = mktemp() |
---|
| 260 | open(tmpFile, 'w').write('#') |
---|
| 261 | if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()): |
---|
| 262 | # it didn't print an error msg so we're ok |
---|
| 263 | self._usePgrep = True |
---|
| 264 | os.remove(tmpFile) |
---|
| 265 | |
---|
| 266 | self._run() |
---|
| 267 | |
---|
| 268 | def results(self): |
---|
| 269 | return self._results |
---|
| 270 | |
---|
| 271 | def _run(self): |
---|
| 272 | regex = self._regex |
---|
| 273 | subber = self._subDispatcher |
---|
| 274 | usePgrep = self._usePgrep |
---|
| 275 | pattern = self._pattern |
---|
| 276 | for file in self._files: |
---|
| 277 | if not os.path.isfile(file): |
---|
| 278 | continue # skip dirs etc. |
---|
| 279 | |
---|
| 280 | self._currFile = file |
---|
| 281 | found = False |
---|
| 282 | if locals().has_key('orig'): |
---|
| 283 | del orig |
---|
| 284 | if self._usePgrep: |
---|
| 285 | if os.popen('pgrep "' + pattern + '" ' + file ).read(): |
---|
| 286 | found = True |
---|
| 287 | else: |
---|
| 288 | orig = open(file).read() |
---|
| 289 | if regex.search(orig): |
---|
| 290 | found = True |
---|
| 291 | if found: |
---|
| 292 | if not locals().has_key('orig'): |
---|
| 293 | orig = open(file).read() |
---|
| 294 | new = regex.sub(subber, orig) |
---|
| 295 | open(file, 'w').write(new) |
---|
| 296 | |
---|
| 297 | def _subDispatcher(self, match): |
---|
| 298 | if self._recordResults: |
---|
| 299 | if not self._results.has_key(self._currFile): |
---|
| 300 | res = self._results[self._currFile] = {} |
---|
| 301 | res['count'] = 0 |
---|
| 302 | res['matches'] = [] |
---|
| 303 | else: |
---|
| 304 | res = self._results[self._currFile] |
---|
| 305 | res['count'] += 1 |
---|
| 306 | res['matches'].append({'contents':match.group(), |
---|
| 307 | 'start':match.start(), |
---|
| 308 | 'end':match.end(), |
---|
| 309 | } |
---|
| 310 | ) |
---|
| 311 | return self._subber(match) |
---|
| 312 | |
---|
| 313 | |
---|
| 314 | class SourceFileStats: |
---|
| 315 | |
---|
| 316 | """ |
---|
| 317 | """ |
---|
| 318 | |
---|
| 319 | _fileStats = None |
---|
| 320 | |
---|
| 321 | def __init__(self, files): |
---|
| 322 | self._fileStats = stats = {} |
---|
| 323 | for file in files: |
---|
| 324 | stats[file] = self.getFileStats(file) |
---|
| 325 | |
---|
| 326 | def rawStats(self): |
---|
| 327 | return self._fileStats |
---|
| 328 | |
---|
| 329 | def summary(self): |
---|
| 330 | codeLines = 0 |
---|
| 331 | blankLines = 0 |
---|
| 332 | commentLines = 0 |
---|
| 333 | totalLines = 0 |
---|
| 334 | for fileStats in self.rawStats().values(): |
---|
| 335 | codeLines += fileStats['codeLines'] |
---|
| 336 | blankLines += fileStats['blankLines'] |
---|
| 337 | commentLines += fileStats['commentLines'] |
---|
| 338 | totalLines += fileStats['totalLines'] |
---|
| 339 | |
---|
| 340 | stats = {'codeLines':codeLines, |
---|
| 341 | 'blankLines':blankLines, |
---|
| 342 | 'commentLines':commentLines, |
---|
| 343 | 'totalLines':totalLines, |
---|
| 344 | } |
---|
| 345 | return stats |
---|
| 346 | |
---|
| 347 | def printStats(self): |
---|
| 348 | pass |
---|
| 349 | |
---|
| 350 | def getFileStats(self, fileName): |
---|
| 351 | codeLines = 0 |
---|
| 352 | blankLines = 0 |
---|
| 353 | commentLines = 0 |
---|
| 354 | commentLineRe = re.compile(r'\s#.*$') |
---|
| 355 | blankLineRe = re.compile('\s$') |
---|
| 356 | lines = open(fileName).read().splitlines() |
---|
| 357 | totalLines = len(lines) |
---|
| 358 | |
---|
| 359 | for line in lines: |
---|
| 360 | if commentLineRe.match(line): |
---|
| 361 | commentLines += 1 |
---|
| 362 | elif blankLineRe.match(line): |
---|
| 363 | blankLines += 1 |
---|
| 364 | else: |
---|
| 365 | codeLines += 1 |
---|
| 366 | |
---|
| 367 | stats = {'codeLines':codeLines, |
---|
| 368 | 'blankLines':blankLines, |
---|
| 369 | 'commentLines':commentLines, |
---|
| 370 | 'totalLines':totalLines, |
---|
| 371 | } |
---|
| 372 | |
---|
| 373 | return stats |
---|