root/galaxy-central/tools/filters/grep.py

リビジョン 2, 3.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1# Filename: grep.py
2# Author: Ian N. Schenck
3# Version: 8/23/2005
4#
5# This script accepts regular expressions, as well as an "invert"
6# option, and applies the regular expression using grep.  This wrapper
7# provides security and pipeline.
8#
9# Grep is launched based on these inputs:
10# -i            Input file
11# -o            Output file
12# -pattern      RegEx pattern
13# -v            true or false (output NON-matching lines)
14
15import sys
16import os
17import re
18import string
19import commands
20from tempfile import NamedTemporaryFile
21
22# This function is exceedingly useful, perhaps package for reuse?
23def getopts(argv):
24    opts = {}
25    while argv:
26        if argv[0][0] == '-':
27            opts[argv[0]] = argv[1]
28            argv = argv[2:]
29        else:
30            argv = argv[1:]
31    return opts
32
33def main():
34    args = sys.argv[1:]
35
36    try:
37        opts = getopts(args)
38    except IndexError:
39        print "Usage:"
40        print " -i              Input file"
41        print " -o              Output file"
42        print " -pattern        RegEx pattern"
43        print " -v              true or false (Invert match)"
44        return 0
45
46    outputfile = opts.get("-o")
47    if outputfile == None:
48        print "No output file specified."
49        return -1
50   
51    inputfile = opts.get("-i")
52    if inputfile == None:
53        print "No input file specified."
54        return -2
55
56    invert = opts.get("-v")
57    if invert == None:
58        print "Match style (Invert or normal) not specified."
59        return -3
60
61    pattern = opts.get("-pattern")
62    if pattern == None:
63        print "RegEx pattern not specified."
64        return -4
65
66    # All inputs have been specified at this point, now validate.
67
68    # replace if input has been escaped, remove sq
69    # characters that are allowed but need to be escaped
70    mapped_chars = { '>' :'__gt__',
71                 '<' :'__lt__',
72                 '\'' :'__sq__',
73                 '"' :'__dq__',
74                 '[' :'__ob__',
75                 ']' :'__cb__',
76                 '{' :'__oc__',
77                 '}' :'__cc__'
78                 }
79   
80    #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility
81    for key, value in mapped_chars.items():
82        pattern = pattern.replace(value, key)
83   
84    fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why?
85    invertRegEx = re.compile("(true)|(false)") #why?
86
87    if not fileRegEx.match(outputfile):
88        print "Illegal output filename."
89        return -5
90    if not fileRegEx.match(inputfile):
91        print "Illegal input filename."
92        return -6
93    if not invertRegEx.match(invert):
94        print "Illegal invert option."
95        return -7
96
97    # invert grep search?
98    if invert == "true":
99        invertflag = " -v"
100        print "Not matching pattern: %s" % pattern
101    else:
102        invertflag = ""
103        print "Matching pattern: %s" % pattern
104   
105    #Create temp file holding pattern
106    #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern
107    pattern_file_name = NamedTemporaryFile().name
108    open( pattern_file_name, 'w' ).write( pattern )
109   
110    #generate grep command
111    commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile )
112   
113    #run grep
114    errorcode, stdout = commands.getstatusoutput(commandline)
115   
116    #remove temp pattern file
117    os.unlink( pattern_file_name )
118   
119    #return error code
120    return errorcode
121
122if __name__ == "__main__":
123    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。