1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | import sys |
---|
4 | import optparse |
---|
5 | |
---|
6 | def stop_err( msg ): |
---|
7 | sys.stderr.write( msg ) |
---|
8 | sys.exit() |
---|
9 | |
---|
10 | def main(): |
---|
11 | usage = """%prog [options] |
---|
12 | |
---|
13 | options (listed below) default to 'None' if omitted |
---|
14 | """ |
---|
15 | parser = optparse.OptionParser(usage=usage) |
---|
16 | |
---|
17 | parser.add_option( |
---|
18 | '-a','--ascii', |
---|
19 | dest='ascii', |
---|
20 | action='store_true', |
---|
21 | default = False, |
---|
22 | help='Use ascii codes to defined ignored beginnings instead of raw characters') |
---|
23 | |
---|
24 | parser.add_option( |
---|
25 | '-q','--fastq', |
---|
26 | dest='fastq', |
---|
27 | action='store_true', |
---|
28 | default = False, |
---|
29 | help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') |
---|
30 | |
---|
31 | parser.add_option( |
---|
32 | '-i','--ignore', |
---|
33 | dest='ignore', |
---|
34 | help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') |
---|
35 | |
---|
36 | parser.add_option( |
---|
37 | '-s','--start', |
---|
38 | dest='start', |
---|
39 | default = '0', |
---|
40 | help='Trim from beginning to here (1-based)') |
---|
41 | |
---|
42 | parser.add_option( |
---|
43 | '-e','--end', |
---|
44 | dest='end', |
---|
45 | default = '0', |
---|
46 | help='Trim from here to the ned (1-based)') |
---|
47 | |
---|
48 | parser.add_option( |
---|
49 | '-f','--file', |
---|
50 | dest='input_txt', |
---|
51 | default = False, |
---|
52 | help='Name of file to be chopped. STDIN is default') |
---|
53 | |
---|
54 | parser.add_option( |
---|
55 | '-c','--column', |
---|
56 | dest='col', |
---|
57 | default = '0', |
---|
58 | help='Column to chop. If 0 = chop the whole line') |
---|
59 | |
---|
60 | |
---|
61 | options, args = parser.parse_args() |
---|
62 | invalid_starts = [] |
---|
63 | |
---|
64 | if options.input_txt: |
---|
65 | infile = open ( options.input_txt, 'r') |
---|
66 | else: |
---|
67 | infile = sys.stdin |
---|
68 | |
---|
69 | if options.ignore and options.ignore != "None": |
---|
70 | invalid_starts = options.ignore.split(',') |
---|
71 | |
---|
72 | if options.ascii and options.ignore and options.ignore != "None": |
---|
73 | for i, item in enumerate( invalid_starts ): |
---|
74 | invalid_starts[i] = chr( int( item ) ) |
---|
75 | |
---|
76 | col = int( options.col ) |
---|
77 | |
---|
78 | for i, line in enumerate( infile ): |
---|
79 | line = line.rstrip( '\r\n' ) |
---|
80 | if line: |
---|
81 | |
---|
82 | if options.fastq and i % 2 == 0: |
---|
83 | print line |
---|
84 | continue |
---|
85 | |
---|
86 | |
---|
87 | if line[0] not in invalid_starts: |
---|
88 | if col == 0: |
---|
89 | if int( options.end ) > 0: |
---|
90 | line = line[ int( options.start )-1 : int( options.end ) ] |
---|
91 | else: |
---|
92 | line = line[ int( options.start )-1 : ] |
---|
93 | else: |
---|
94 | fields = line.split( '\t' ) |
---|
95 | if col-1 > len( fields ): |
---|
96 | stop_err('Column %d does not exist. Check input parameters\n' % col) |
---|
97 | |
---|
98 | if int( options.end ) > 0: |
---|
99 | fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] |
---|
100 | else: |
---|
101 | fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] |
---|
102 | line = '\t'.join(fields) |
---|
103 | print line |
---|
104 | |
---|
105 | if __name__ == "__main__": main() |
---|
106 | |
---|