[2] | 1 | # Filename: grep.py |
---|
| 2 | # Author: Ian N. Schenck |
---|
| 3 | # Version: 8/23/2005 |
---|
| 4 | # |
---|
| 5 | # This script accepts regular expressions, as well as an "invert" |
---|
| 6 | # option, and applies the regular expression using grep. This wrapper |
---|
| 7 | # provides security and pipeline. |
---|
| 8 | # |
---|
| 9 | # Grep is launched based on these inputs: |
---|
| 10 | # -i Input file |
---|
| 11 | # -o Output file |
---|
| 12 | # -pattern RegEx pattern |
---|
| 13 | # -v true or false (output NON-matching lines) |
---|
| 14 | |
---|
| 15 | import sys |
---|
| 16 | import os |
---|
| 17 | import re |
---|
| 18 | import string |
---|
| 19 | import commands |
---|
| 20 | from tempfile import NamedTemporaryFile |
---|
| 21 | |
---|
| 22 | # This function is exceedingly useful, perhaps package for reuse? |
---|
| 23 | def getopts(argv): |
---|
| 24 | opts = {} |
---|
| 25 | while argv: |
---|
| 26 | if argv[0][0] == '-': |
---|
| 27 | opts[argv[0]] = argv[1] |
---|
| 28 | argv = argv[2:] |
---|
| 29 | else: |
---|
| 30 | argv = argv[1:] |
---|
| 31 | return opts |
---|
| 32 | |
---|
| 33 | def main(): |
---|
| 34 | args = sys.argv[1:] |
---|
| 35 | |
---|
| 36 | try: |
---|
| 37 | opts = getopts(args) |
---|
| 38 | except IndexError: |
---|
| 39 | print "Usage:" |
---|
| 40 | print " -i Input file" |
---|
| 41 | print " -o Output file" |
---|
| 42 | print " -pattern RegEx pattern" |
---|
| 43 | print " -v true or false (Invert match)" |
---|
| 44 | return 0 |
---|
| 45 | |
---|
| 46 | outputfile = opts.get("-o") |
---|
| 47 | if outputfile == None: |
---|
| 48 | print "No output file specified." |
---|
| 49 | return -1 |
---|
| 50 | |
---|
| 51 | inputfile = opts.get("-i") |
---|
| 52 | if inputfile == None: |
---|
| 53 | print "No input file specified." |
---|
| 54 | return -2 |
---|
| 55 | |
---|
| 56 | invert = opts.get("-v") |
---|
| 57 | if invert == None: |
---|
| 58 | print "Match style (Invert or normal) not specified." |
---|
| 59 | return -3 |
---|
| 60 | |
---|
| 61 | pattern = opts.get("-pattern") |
---|
| 62 | if pattern == None: |
---|
| 63 | print "RegEx pattern not specified." |
---|
| 64 | return -4 |
---|
| 65 | |
---|
| 66 | # All inputs have been specified at this point, now validate. |
---|
| 67 | |
---|
| 68 | # replace if input has been escaped, remove sq |
---|
| 69 | # characters that are allowed but need to be escaped |
---|
| 70 | mapped_chars = { '>' :'__gt__', |
---|
| 71 | '<' :'__lt__', |
---|
| 72 | '\'' :'__sq__', |
---|
| 73 | '"' :'__dq__', |
---|
| 74 | '[' :'__ob__', |
---|
| 75 | ']' :'__cb__', |
---|
| 76 | '{' :'__oc__', |
---|
| 77 | '}' :'__cc__' |
---|
| 78 | } |
---|
| 79 | |
---|
| 80 | #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility |
---|
| 81 | for key, value in mapped_chars.items(): |
---|
| 82 | pattern = pattern.replace(value, key) |
---|
| 83 | |
---|
| 84 | fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why? |
---|
| 85 | invertRegEx = re.compile("(true)|(false)") #why? |
---|
| 86 | |
---|
| 87 | if not fileRegEx.match(outputfile): |
---|
| 88 | print "Illegal output filename." |
---|
| 89 | return -5 |
---|
| 90 | if not fileRegEx.match(inputfile): |
---|
| 91 | print "Illegal input filename." |
---|
| 92 | return -6 |
---|
| 93 | if not invertRegEx.match(invert): |
---|
| 94 | print "Illegal invert option." |
---|
| 95 | return -7 |
---|
| 96 | |
---|
| 97 | # invert grep search? |
---|
| 98 | if invert == "true": |
---|
| 99 | invertflag = " -v" |
---|
| 100 | print "Not matching pattern: %s" % pattern |
---|
| 101 | else: |
---|
| 102 | invertflag = "" |
---|
| 103 | print "Matching pattern: %s" % pattern |
---|
| 104 | |
---|
| 105 | #Create temp file holding pattern |
---|
| 106 | #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern |
---|
| 107 | pattern_file_name = NamedTemporaryFile().name |
---|
| 108 | open( pattern_file_name, 'w' ).write( pattern ) |
---|
| 109 | |
---|
| 110 | #generate grep command |
---|
| 111 | commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile ) |
---|
| 112 | |
---|
| 113 | #run grep |
---|
| 114 | errorcode, stdout = commands.getstatusoutput(commandline) |
---|
| 115 | |
---|
| 116 | #remove temp pattern file |
---|
| 117 | os.unlink( pattern_file_name ) |
---|
| 118 | |
---|
| 119 | #return error code |
---|
| 120 | return errorcode |
---|
| 121 | |
---|
| 122 | if __name__ == "__main__": |
---|
| 123 | main() |
---|