1 | # $Id: FileUtils.py,v 1.12 2005/11/02 22:26:07 tavis_rudd Exp $ |
---|
2 | """File utitilies for Python: |
---|
3 | |
---|
4 | Meta-Data |
---|
5 | ================================================================================ |
---|
6 | Author: Tavis Rudd <tavis@damnsimple.com> |
---|
7 | License: This software is released for unlimited distribution under the |
---|
8 | terms of the MIT license. See the LICENSE file. |
---|
9 | Version: $Revision: 1.12 $ |
---|
10 | Start Date: 2001/09/26 |
---|
11 | Last Revision Date: $Date: 2005/11/02 22:26:07 $ |
---|
12 | """ |
---|
13 | __author__ = "Tavis Rudd <tavis@damnsimple.com>" |
---|
14 | __revision__ = "$Revision: 1.12 $"[11:-2] |
---|
15 | |
---|
16 | |
---|
17 | from glob import glob |
---|
18 | import os |
---|
19 | from os import listdir |
---|
20 | import os.path |
---|
21 | import re |
---|
22 | from types import StringType |
---|
23 | from tempfile import mktemp |
---|
24 | |
---|
25 | def _escapeRegexChars(txt, |
---|
26 | escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')): |
---|
27 | return escapeRE.sub(r'\\\1' , txt) |
---|
28 | |
---|
29 | def findFiles(*args, **kw): |
---|
30 | """Recursively find all the files matching a glob pattern. |
---|
31 | |
---|
32 | This function is a wrapper around the FileFinder class. See its docstring |
---|
33 | for details about the accepted arguments, etc.""" |
---|
34 | |
---|
35 | return FileFinder(*args, **kw).files() |
---|
36 | |
---|
37 | def replaceStrInFiles(files, theStr, repl): |
---|
38 | |
---|
39 | """Replace all instances of 'theStr' with 'repl' for each file in the 'files' |
---|
40 | list. Returns a dictionary with data about the matches found. |
---|
41 | |
---|
42 | This is like string.replace() on a multi-file basis. |
---|
43 | |
---|
44 | This function is a wrapper around the FindAndReplace class. See its |
---|
45 | docstring for more details.""" |
---|
46 | |
---|
47 | pattern = _escapeRegexChars(theStr) |
---|
48 | return FindAndReplace(files, pattern, repl).results() |
---|
49 | |
---|
50 | def replaceRegexInFiles(files, pattern, repl): |
---|
51 | |
---|
52 | """Replace all instances of regex 'pattern' with 'repl' for each file in the |
---|
53 | 'files' list. Returns a dictionary with data about the matches found. |
---|
54 | |
---|
55 | This is like re.sub on a multi-file basis. |
---|
56 | |
---|
57 | This function is a wrapper around the FindAndReplace class. See its |
---|
58 | docstring for more details.""" |
---|
59 | |
---|
60 | return FindAndReplace(files, pattern, repl).results() |
---|
61 | |
---|
62 | |
---|
63 | ################################################## |
---|
64 | ## CLASSES |
---|
65 | |
---|
66 | class FileFinder: |
---|
67 | |
---|
68 | """Traverses a directory tree and finds all files in it that match one of |
---|
69 | the specified glob patterns.""" |
---|
70 | |
---|
71 | def __init__(self, rootPath, |
---|
72 | globPatterns=('*',), |
---|
73 | ignoreBasenames=('CVS','.svn'), |
---|
74 | ignoreDirs=(), |
---|
75 | ): |
---|
76 | |
---|
77 | self._rootPath = rootPath |
---|
78 | self._globPatterns = globPatterns |
---|
79 | self._ignoreBasenames = ignoreBasenames |
---|
80 | self._ignoreDirs = ignoreDirs |
---|
81 | self._files = [] |
---|
82 | |
---|
83 | self.walkDirTree(rootPath) |
---|
84 | |
---|
85 | def walkDirTree(self, dir='.', |
---|
86 | |
---|
87 | listdir=os.listdir, |
---|
88 | isdir=os.path.isdir, |
---|
89 | join=os.path.join, |
---|
90 | ): |
---|
91 | |
---|
92 | """Recursively walk through a directory tree and find matching files.""" |
---|
93 | processDir = self.processDir |
---|
94 | filterDir = self.filterDir |
---|
95 | |
---|
96 | pendingDirs = [dir] |
---|
97 | addDir = pendingDirs.append |
---|
98 | getDir = pendingDirs.pop |
---|
99 | |
---|
100 | while pendingDirs: |
---|
101 | dir = getDir() |
---|
102 | ## process this dir |
---|
103 | processDir(dir) |
---|
104 | |
---|
105 | ## and add sub-dirs |
---|
106 | for baseName in listdir(dir): |
---|
107 | fullPath = join(dir, baseName) |
---|
108 | if isdir(fullPath): |
---|
109 | if filterDir(baseName, fullPath): |
---|
110 | addDir( fullPath ) |
---|
111 | |
---|
112 | def filterDir(self, baseName, fullPath): |
---|
113 | |
---|
114 | """A hook for filtering out certain dirs. """ |
---|
115 | |
---|
116 | return not (baseName in self._ignoreBasenames or |
---|
117 | fullPath in self._ignoreDirs) |
---|
118 | |
---|
119 | def processDir(self, dir, glob=glob): |
---|
120 | extend = self._files.extend |
---|
121 | for pattern in self._globPatterns: |
---|
122 | extend( glob(os.path.join(dir, pattern)) ) |
---|
123 | |
---|
124 | def files(self): |
---|
125 | return self._files |
---|
126 | |
---|
127 | class _GenSubberFunc: |
---|
128 | |
---|
129 | """Converts a 'sub' string in the form that one feeds to re.sub (backrefs, |
---|
130 | groups, etc.) into a function that can be used to do the substitutions in |
---|
131 | the FindAndReplace class.""" |
---|
132 | |
---|
133 | backrefRE = re.compile(r'\\([1-9][0-9]*)') |
---|
134 | groupRE = re.compile(r'\\g<([a-zA-Z_][a-zA-Z_]*)>') |
---|
135 | |
---|
136 | def __init__(self, replaceStr): |
---|
137 | self._src = replaceStr |
---|
138 | self._pos = 0 |
---|
139 | self._codeChunks = [] |
---|
140 | self.parse() |
---|
141 | |
---|
142 | def src(self): |
---|
143 | return self._src |
---|
144 | |
---|
145 | def pos(self): |
---|
146 | return self._pos |
---|
147 | |
---|
148 | def setPos(self, pos): |
---|
149 | self._pos = pos |
---|
150 | |
---|
151 | def atEnd(self): |
---|
152 | return self._pos >= len(self._src) |
---|
153 | |
---|
154 | def advance(self, offset=1): |
---|
155 | self._pos += offset |
---|
156 | |
---|
157 | def readTo(self, to, start=None): |
---|
158 | if start == None: |
---|
159 | start = self._pos |
---|
160 | self._pos = to |
---|
161 | if self.atEnd(): |
---|
162 | return self._src[start:] |
---|
163 | else: |
---|
164 | return self._src[start:to] |
---|
165 | |
---|
166 | ## match and get methods |
---|
167 | |
---|
168 | def matchBackref(self): |
---|
169 | return self.backrefRE.match(self.src(), self.pos()) |
---|
170 | |
---|
171 | def getBackref(self): |
---|
172 | m = self.matchBackref() |
---|
173 | self.setPos(m.end()) |
---|
174 | return m.group(1) |
---|
175 | |
---|
176 | def matchGroup(self): |
---|
177 | return self.groupRE.match(self.src(), self.pos()) |
---|
178 | |
---|
179 | def getGroup(self): |
---|
180 | m = self.matchGroup() |
---|
181 | self.setPos(m.end()) |
---|
182 | return m.group(1) |
---|
183 | |
---|
184 | ## main parse loop and the eat methods |
---|
185 | |
---|
186 | def parse(self): |
---|
187 | while not self.atEnd(): |
---|
188 | if self.matchBackref(): |
---|
189 | self.eatBackref() |
---|
190 | elif self.matchGroup(): |
---|
191 | self.eatGroup() |
---|
192 | else: |
---|
193 | self.eatStrConst() |
---|
194 | |
---|
195 | def eatStrConst(self): |
---|
196 | startPos = self.pos() |
---|
197 | while not self.atEnd(): |
---|
198 | if self.matchBackref() or self.matchGroup(): |
---|
199 | break |
---|
200 | else: |
---|
201 | self.advance() |
---|
202 | strConst = self.readTo(self.pos(), start=startPos) |
---|
203 | self.addChunk(repr(strConst)) |
---|
204 | |
---|
205 | def eatBackref(self): |
---|
206 | self.addChunk( 'm.group(' + self.getBackref() + ')' ) |
---|
207 | |
---|
208 | def eatGroup(self): |
---|
209 | self.addChunk( 'm.group("' + self.getGroup() + '")' ) |
---|
210 | |
---|
211 | def addChunk(self, chunk): |
---|
212 | self._codeChunks.append(chunk) |
---|
213 | |
---|
214 | ## code wrapping methods |
---|
215 | |
---|
216 | def codeBody(self): |
---|
217 | return ', '.join(self._codeChunks) |
---|
218 | |
---|
219 | def code(self): |
---|
220 | return "def subber(m):\n\treturn ''.join([%s])\n" % (self.codeBody()) |
---|
221 | |
---|
222 | def subberFunc(self): |
---|
223 | exec self.code() |
---|
224 | return subber |
---|
225 | |
---|
226 | |
---|
227 | class FindAndReplace: |
---|
228 | |
---|
229 | """Find and replace all instances of 'patternOrRE' with 'replacement' for |
---|
230 | each file in the 'files' list. This is a multi-file version of re.sub(). |
---|
231 | |
---|
232 | 'patternOrRE' can be a raw regex pattern or |
---|
233 | a regex object as generated by the re module. 'replacement' can be any |
---|
234 | string that would work with patternOrRE.sub(replacement, fileContents). |
---|
235 | """ |
---|
236 | |
---|
237 | def __init__(self, files, patternOrRE, replacement, |
---|
238 | recordResults=True): |
---|
239 | |
---|
240 | |
---|
241 | if type(patternOrRE) == StringType: |
---|
242 | self._regex = re.compile(patternOrRE) |
---|
243 | else: |
---|
244 | self._regex = patternOrRE |
---|
245 | if type(replacement) == StringType: |
---|
246 | self._subber = _GenSubberFunc(replacement).subberFunc() |
---|
247 | else: |
---|
248 | self._subber = replacement |
---|
249 | |
---|
250 | self._pattern = pattern = self._regex.pattern |
---|
251 | self._files = files |
---|
252 | self._results = {} |
---|
253 | self._recordResults = recordResults |
---|
254 | |
---|
255 | ## see if we should use pgrep to do the file matching |
---|
256 | self._usePgrep = False |
---|
257 | if (os.popen3('pgrep')[2].read()).startswith('Usage:'): |
---|
258 | ## now check to make sure pgrep understands the pattern |
---|
259 | tmpFile = mktemp() |
---|
260 | open(tmpFile, 'w').write('#') |
---|
261 | if not (os.popen3('pgrep "' + pattern + '" ' + tmpFile)[2].read()): |
---|
262 | # it didn't print an error msg so we're ok |
---|
263 | self._usePgrep = True |
---|
264 | os.remove(tmpFile) |
---|
265 | |
---|
266 | self._run() |
---|
267 | |
---|
268 | def results(self): |
---|
269 | return self._results |
---|
270 | |
---|
271 | def _run(self): |
---|
272 | regex = self._regex |
---|
273 | subber = self._subDispatcher |
---|
274 | usePgrep = self._usePgrep |
---|
275 | pattern = self._pattern |
---|
276 | for file in self._files: |
---|
277 | if not os.path.isfile(file): |
---|
278 | continue # skip dirs etc. |
---|
279 | |
---|
280 | self._currFile = file |
---|
281 | found = False |
---|
282 | if locals().has_key('orig'): |
---|
283 | del orig |
---|
284 | if self._usePgrep: |
---|
285 | if os.popen('pgrep "' + pattern + '" ' + file ).read(): |
---|
286 | found = True |
---|
287 | else: |
---|
288 | orig = open(file).read() |
---|
289 | if regex.search(orig): |
---|
290 | found = True |
---|
291 | if found: |
---|
292 | if not locals().has_key('orig'): |
---|
293 | orig = open(file).read() |
---|
294 | new = regex.sub(subber, orig) |
---|
295 | open(file, 'w').write(new) |
---|
296 | |
---|
297 | def _subDispatcher(self, match): |
---|
298 | if self._recordResults: |
---|
299 | if not self._results.has_key(self._currFile): |
---|
300 | res = self._results[self._currFile] = {} |
---|
301 | res['count'] = 0 |
---|
302 | res['matches'] = [] |
---|
303 | else: |
---|
304 | res = self._results[self._currFile] |
---|
305 | res['count'] += 1 |
---|
306 | res['matches'].append({'contents':match.group(), |
---|
307 | 'start':match.start(), |
---|
308 | 'end':match.end(), |
---|
309 | } |
---|
310 | ) |
---|
311 | return self._subber(match) |
---|
312 | |
---|
313 | |
---|
314 | class SourceFileStats: |
---|
315 | |
---|
316 | """ |
---|
317 | """ |
---|
318 | |
---|
319 | _fileStats = None |
---|
320 | |
---|
321 | def __init__(self, files): |
---|
322 | self._fileStats = stats = {} |
---|
323 | for file in files: |
---|
324 | stats[file] = self.getFileStats(file) |
---|
325 | |
---|
326 | def rawStats(self): |
---|
327 | return self._fileStats |
---|
328 | |
---|
329 | def summary(self): |
---|
330 | codeLines = 0 |
---|
331 | blankLines = 0 |
---|
332 | commentLines = 0 |
---|
333 | totalLines = 0 |
---|
334 | for fileStats in self.rawStats().values(): |
---|
335 | codeLines += fileStats['codeLines'] |
---|
336 | blankLines += fileStats['blankLines'] |
---|
337 | commentLines += fileStats['commentLines'] |
---|
338 | totalLines += fileStats['totalLines'] |
---|
339 | |
---|
340 | stats = {'codeLines':codeLines, |
---|
341 | 'blankLines':blankLines, |
---|
342 | 'commentLines':commentLines, |
---|
343 | 'totalLines':totalLines, |
---|
344 | } |
---|
345 | return stats |
---|
346 | |
---|
347 | def printStats(self): |
---|
348 | pass |
---|
349 | |
---|
350 | def getFileStats(self, fileName): |
---|
351 | codeLines = 0 |
---|
352 | blankLines = 0 |
---|
353 | commentLines = 0 |
---|
354 | commentLineRe = re.compile(r'\s#.*$') |
---|
355 | blankLineRe = re.compile('\s$') |
---|
356 | lines = open(fileName).read().splitlines() |
---|
357 | totalLines = len(lines) |
---|
358 | |
---|
359 | for line in lines: |
---|
360 | if commentLineRe.match(line): |
---|
361 | commentLines += 1 |
---|
362 | elif blankLineRe.match(line): |
---|
363 | blankLines += 1 |
---|
364 | else: |
---|
365 | codeLines += 1 |
---|
366 | |
---|
367 | stats = {'codeLines':codeLines, |
---|
368 | 'blankLines':blankLines, |
---|
369 | 'commentLines':commentLines, |
---|
370 | 'totalLines':totalLines, |
---|
371 | } |
---|
372 | |
---|
373 | return stats |
---|