[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | import sys |
---|
| 4 | import optparse |
---|
| 5 | |
---|
| 6 | def stop_err( msg ): |
---|
| 7 | sys.stderr.write( msg ) |
---|
| 8 | sys.exit() |
---|
| 9 | |
---|
| 10 | def main(): |
---|
| 11 | usage = """%prog [options] |
---|
| 12 | |
---|
| 13 | options (listed below) default to 'None' if omitted |
---|
| 14 | """ |
---|
| 15 | parser = optparse.OptionParser(usage=usage) |
---|
| 16 | |
---|
| 17 | parser.add_option( |
---|
| 18 | '-a','--ascii', |
---|
| 19 | dest='ascii', |
---|
| 20 | action='store_true', |
---|
| 21 | default = False, |
---|
| 22 | help='Use ascii codes to defined ignored beginnings instead of raw characters') |
---|
| 23 | |
---|
| 24 | parser.add_option( |
---|
| 25 | '-q','--fastq', |
---|
| 26 | dest='fastq', |
---|
| 27 | action='store_true', |
---|
| 28 | default = False, |
---|
| 29 | help='The input data in fastq format. It selected the script skips every even line since they contain sequence ids') |
---|
| 30 | |
---|
| 31 | parser.add_option( |
---|
| 32 | '-i','--ignore', |
---|
| 33 | dest='ignore', |
---|
| 34 | help='A comma separated list on ignored beginnings (e.g., ">,@"), or its ascii codes (e.g., "60,42") if option -a is enabled') |
---|
| 35 | |
---|
| 36 | parser.add_option( |
---|
| 37 | '-s','--start', |
---|
| 38 | dest='start', |
---|
| 39 | default = '0', |
---|
| 40 | help='Trim from beginning to here (1-based)') |
---|
| 41 | |
---|
| 42 | parser.add_option( |
---|
| 43 | '-e','--end', |
---|
| 44 | dest='end', |
---|
| 45 | default = '0', |
---|
| 46 | help='Trim from here to the ned (1-based)') |
---|
| 47 | |
---|
| 48 | parser.add_option( |
---|
| 49 | '-f','--file', |
---|
| 50 | dest='input_txt', |
---|
| 51 | default = False, |
---|
| 52 | help='Name of file to be chopped. STDIN is default') |
---|
| 53 | |
---|
| 54 | parser.add_option( |
---|
| 55 | '-c','--column', |
---|
| 56 | dest='col', |
---|
| 57 | default = '0', |
---|
| 58 | help='Column to chop. If 0 = chop the whole line') |
---|
| 59 | |
---|
| 60 | |
---|
| 61 | options, args = parser.parse_args() |
---|
| 62 | invalid_starts = [] |
---|
| 63 | |
---|
| 64 | if options.input_txt: |
---|
| 65 | infile = open ( options.input_txt, 'r') |
---|
| 66 | else: |
---|
| 67 | infile = sys.stdin |
---|
| 68 | |
---|
| 69 | if options.ignore and options.ignore != "None": |
---|
| 70 | invalid_starts = options.ignore.split(',') |
---|
| 71 | |
---|
| 72 | if options.ascii and options.ignore and options.ignore != "None": |
---|
| 73 | for i, item in enumerate( invalid_starts ): |
---|
| 74 | invalid_starts[i] = chr( int( item ) ) |
---|
| 75 | |
---|
| 76 | col = int( options.col ) |
---|
| 77 | |
---|
| 78 | for i, line in enumerate( infile ): |
---|
| 79 | line = line.rstrip( '\r\n' ) |
---|
| 80 | if line: |
---|
| 81 | |
---|
| 82 | if options.fastq and i % 2 == 0: |
---|
| 83 | print line |
---|
| 84 | continue |
---|
| 85 | |
---|
| 86 | |
---|
| 87 | if line[0] not in invalid_starts: |
---|
| 88 | if col == 0: |
---|
| 89 | if int( options.end ) > 0: |
---|
| 90 | line = line[ int( options.start )-1 : int( options.end ) ] |
---|
| 91 | else: |
---|
| 92 | line = line[ int( options.start )-1 : ] |
---|
| 93 | else: |
---|
| 94 | fields = line.split( '\t' ) |
---|
| 95 | if col-1 > len( fields ): |
---|
| 96 | stop_err('Column %d does not exist. Check input parameters\n' % col) |
---|
| 97 | |
---|
| 98 | if int( options.end ) > 0: |
---|
| 99 | fields[col - 1] = fields[col - 1][ int( options.start )-1 : int( options.end ) ] |
---|
| 100 | else: |
---|
| 101 | fields[col - 1] = fields[col - 1][ int( options.start )-1 : ] |
---|
| 102 | line = '\t'.join(fields) |
---|
| 103 | print line |
---|
| 104 | |
---|
| 105 | if __name__ == "__main__": main() |
---|
| 106 | |
---|