1 | # Filename: grep.py |
---|
2 | # Author: Ian N. Schenck |
---|
3 | # Version: 8/23/2005 |
---|
4 | # |
---|
5 | # This script accepts regular expressions, as well as an "invert" |
---|
6 | # option, and applies the regular expression using grep. This wrapper |
---|
7 | # provides security and pipeline. |
---|
8 | # |
---|
9 | # Grep is launched based on these inputs: |
---|
10 | # -i Input file |
---|
11 | # -o Output file |
---|
12 | # -pattern RegEx pattern |
---|
13 | # -v true or false (output NON-matching lines) |
---|
14 | |
---|
15 | import sys |
---|
16 | import os |
---|
17 | import re |
---|
18 | import string |
---|
19 | import commands |
---|
20 | from tempfile import NamedTemporaryFile |
---|
21 | |
---|
22 | # This function is exceedingly useful, perhaps package for reuse? |
---|
23 | def getopts(argv): |
---|
24 | opts = {} |
---|
25 | while argv: |
---|
26 | if argv[0][0] == '-': |
---|
27 | opts[argv[0]] = argv[1] |
---|
28 | argv = argv[2:] |
---|
29 | else: |
---|
30 | argv = argv[1:] |
---|
31 | return opts |
---|
32 | |
---|
33 | def main(): |
---|
34 | args = sys.argv[1:] |
---|
35 | |
---|
36 | try: |
---|
37 | opts = getopts(args) |
---|
38 | except IndexError: |
---|
39 | print "Usage:" |
---|
40 | print " -i Input file" |
---|
41 | print " -o Output file" |
---|
42 | print " -pattern RegEx pattern" |
---|
43 | print " -v true or false (Invert match)" |
---|
44 | return 0 |
---|
45 | |
---|
46 | outputfile = opts.get("-o") |
---|
47 | if outputfile == None: |
---|
48 | print "No output file specified." |
---|
49 | return -1 |
---|
50 | |
---|
51 | inputfile = opts.get("-i") |
---|
52 | if inputfile == None: |
---|
53 | print "No input file specified." |
---|
54 | return -2 |
---|
55 | |
---|
56 | invert = opts.get("-v") |
---|
57 | if invert == None: |
---|
58 | print "Match style (Invert or normal) not specified." |
---|
59 | return -3 |
---|
60 | |
---|
61 | pattern = opts.get("-pattern") |
---|
62 | if pattern == None: |
---|
63 | print "RegEx pattern not specified." |
---|
64 | return -4 |
---|
65 | |
---|
66 | # All inputs have been specified at this point, now validate. |
---|
67 | |
---|
68 | # replace if input has been escaped, remove sq |
---|
69 | # characters that are allowed but need to be escaped |
---|
70 | mapped_chars = { '>' :'__gt__', |
---|
71 | '<' :'__lt__', |
---|
72 | '\'' :'__sq__', |
---|
73 | '"' :'__dq__', |
---|
74 | '[' :'__ob__', |
---|
75 | ']' :'__cb__', |
---|
76 | '{' :'__oc__', |
---|
77 | '}' :'__cc__' |
---|
78 | } |
---|
79 | |
---|
80 | #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility |
---|
81 | for key, value in mapped_chars.items(): |
---|
82 | pattern = pattern.replace(value, key) |
---|
83 | |
---|
84 | fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why? |
---|
85 | invertRegEx = re.compile("(true)|(false)") #why? |
---|
86 | |
---|
87 | if not fileRegEx.match(outputfile): |
---|
88 | print "Illegal output filename." |
---|
89 | return -5 |
---|
90 | if not fileRegEx.match(inputfile): |
---|
91 | print "Illegal input filename." |
---|
92 | return -6 |
---|
93 | if not invertRegEx.match(invert): |
---|
94 | print "Illegal invert option." |
---|
95 | return -7 |
---|
96 | |
---|
97 | # invert grep search? |
---|
98 | if invert == "true": |
---|
99 | invertflag = " -v" |
---|
100 | print "Not matching pattern: %s" % pattern |
---|
101 | else: |
---|
102 | invertflag = "" |
---|
103 | print "Matching pattern: %s" % pattern |
---|
104 | |
---|
105 | #Create temp file holding pattern |
---|
106 | #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern |
---|
107 | pattern_file_name = NamedTemporaryFile().name |
---|
108 | open( pattern_file_name, 'w' ).write( pattern ) |
---|
109 | |
---|
110 | #generate grep command |
---|
111 | commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile ) |
---|
112 | |
---|
113 | #run grep |
---|
114 | errorcode, stdout = commands.getstatusoutput(commandline) |
---|
115 | |
---|
116 | #remove temp pattern file |
---|
117 | os.unlink( pattern_file_name ) |
---|
118 | |
---|
119 | #return error code |
---|
120 | return errorcode |
---|
121 | |
---|
122 | if __name__ == "__main__": |
---|
123 | main() |
---|