| 1 | # Filename: grep.py |
|---|
| 2 | # Author: Ian N. Schenck |
|---|
| 3 | # Version: 8/23/2005 |
|---|
| 4 | # |
|---|
| 5 | # This script accepts regular expressions, as well as an "invert" |
|---|
| 6 | # option, and applies the regular expression using grep. This wrapper |
|---|
| 7 | # provides security and pipeline. |
|---|
| 8 | # |
|---|
| 9 | # Grep is launched based on these inputs: |
|---|
| 10 | # -i Input file |
|---|
| 11 | # -o Output file |
|---|
| 12 | # -pattern RegEx pattern |
|---|
| 13 | # -v true or false (output NON-matching lines) |
|---|
| 14 | |
|---|
| 15 | import sys |
|---|
| 16 | import os |
|---|
| 17 | import re |
|---|
| 18 | import string |
|---|
| 19 | import commands |
|---|
| 20 | from tempfile import NamedTemporaryFile |
|---|
| 21 | |
|---|
| 22 | # This function is exceedingly useful, perhaps package for reuse? |
|---|
| 23 | def getopts(argv): |
|---|
| 24 | opts = {} |
|---|
| 25 | while argv: |
|---|
| 26 | if argv[0][0] == '-': |
|---|
| 27 | opts[argv[0]] = argv[1] |
|---|
| 28 | argv = argv[2:] |
|---|
| 29 | else: |
|---|
| 30 | argv = argv[1:] |
|---|
| 31 | return opts |
|---|
| 32 | |
|---|
| 33 | def main(): |
|---|
| 34 | args = sys.argv[1:] |
|---|
| 35 | |
|---|
| 36 | try: |
|---|
| 37 | opts = getopts(args) |
|---|
| 38 | except IndexError: |
|---|
| 39 | print "Usage:" |
|---|
| 40 | print " -i Input file" |
|---|
| 41 | print " -o Output file" |
|---|
| 42 | print " -pattern RegEx pattern" |
|---|
| 43 | print " -v true or false (Invert match)" |
|---|
| 44 | return 0 |
|---|
| 45 | |
|---|
| 46 | outputfile = opts.get("-o") |
|---|
| 47 | if outputfile == None: |
|---|
| 48 | print "No output file specified." |
|---|
| 49 | return -1 |
|---|
| 50 | |
|---|
| 51 | inputfile = opts.get("-i") |
|---|
| 52 | if inputfile == None: |
|---|
| 53 | print "No input file specified." |
|---|
| 54 | return -2 |
|---|
| 55 | |
|---|
| 56 | invert = opts.get("-v") |
|---|
| 57 | if invert == None: |
|---|
| 58 | print "Match style (Invert or normal) not specified." |
|---|
| 59 | return -3 |
|---|
| 60 | |
|---|
| 61 | pattern = opts.get("-pattern") |
|---|
| 62 | if pattern == None: |
|---|
| 63 | print "RegEx pattern not specified." |
|---|
| 64 | return -4 |
|---|
| 65 | |
|---|
| 66 | # All inputs have been specified at this point, now validate. |
|---|
| 67 | |
|---|
| 68 | # replace if input has been escaped, remove sq |
|---|
| 69 | # characters that are allowed but need to be escaped |
|---|
| 70 | mapped_chars = { '>' :'__gt__', |
|---|
| 71 | '<' :'__lt__', |
|---|
| 72 | '\'' :'__sq__', |
|---|
| 73 | '"' :'__dq__', |
|---|
| 74 | '[' :'__ob__', |
|---|
| 75 | ']' :'__cb__', |
|---|
| 76 | '{' :'__oc__', |
|---|
| 77 | '}' :'__cc__' |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility |
|---|
| 81 | for key, value in mapped_chars.items(): |
|---|
| 82 | pattern = pattern.replace(value, key) |
|---|
| 83 | |
|---|
| 84 | fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why? |
|---|
| 85 | invertRegEx = re.compile("(true)|(false)") #why? |
|---|
| 86 | |
|---|
| 87 | if not fileRegEx.match(outputfile): |
|---|
| 88 | print "Illegal output filename." |
|---|
| 89 | return -5 |
|---|
| 90 | if not fileRegEx.match(inputfile): |
|---|
| 91 | print "Illegal input filename." |
|---|
| 92 | return -6 |
|---|
| 93 | if not invertRegEx.match(invert): |
|---|
| 94 | print "Illegal invert option." |
|---|
| 95 | return -7 |
|---|
| 96 | |
|---|
| 97 | # invert grep search? |
|---|
| 98 | if invert == "true": |
|---|
| 99 | invertflag = " -v" |
|---|
| 100 | print "Not matching pattern: %s" % pattern |
|---|
| 101 | else: |
|---|
| 102 | invertflag = "" |
|---|
| 103 | print "Matching pattern: %s" % pattern |
|---|
| 104 | |
|---|
| 105 | #Create temp file holding pattern |
|---|
| 106 | #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern |
|---|
| 107 | pattern_file_name = NamedTemporaryFile().name |
|---|
| 108 | open( pattern_file_name, 'w' ).write( pattern ) |
|---|
| 109 | |
|---|
| 110 | #generate grep command |
|---|
| 111 | commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile ) |
|---|
| 112 | |
|---|
| 113 | #run grep |
|---|
| 114 | errorcode, stdout = commands.getstatusoutput(commandline) |
|---|
| 115 | |
|---|
| 116 | #remove temp pattern file |
|---|
| 117 | os.unlink( pattern_file_name ) |
|---|
| 118 | |
|---|
| 119 | #return error code |
|---|
| 120 | return errorcode |
|---|
| 121 | |
|---|
| 122 | if __name__ == "__main__": |
|---|
| 123 | main() |
|---|