[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | import sys |
---|
| 4 | import optparse |
---|
| 5 | import re |
---|
| 6 | |
---|
| 7 | def stop_err( msg ): |
---|
| 8 | sys.stderr.write( msg ) |
---|
| 9 | sys.exit() |
---|
| 10 | |
---|
| 11 | def main(): |
---|
| 12 | usage = """%prog [options] |
---|
| 13 | |
---|
| 14 | options (listed below) default to 'None' if omitted |
---|
| 15 | """ |
---|
| 16 | parser = optparse.OptionParser(usage=usage) |
---|
| 17 | |
---|
| 18 | parser.add_option( |
---|
| 19 | '-f','--input_sam_file', |
---|
| 20 | metavar="INPUT_SAM_FILE", |
---|
| 21 | dest='input_sam', |
---|
| 22 | default = False, |
---|
| 23 | help='Name of the SAM file to be filtered. STDIN is default') |
---|
| 24 | |
---|
| 25 | parser.add_option( |
---|
| 26 | '-c','--flag_column', |
---|
| 27 | dest='flag_col', |
---|
| 28 | default = '2', |
---|
| 29 | help='Column containing SAM bitwise flag. 1-based') |
---|
| 30 | |
---|
| 31 | parser.add_option( |
---|
| 32 | '-s','--start_column', |
---|
| 33 | dest='start_col', |
---|
| 34 | default = '4', |
---|
| 35 | help='Column containing position. 1-based') |
---|
| 36 | |
---|
| 37 | parser.add_option( |
---|
| 38 | '-g','--cigar_column', |
---|
| 39 | dest='cigar_col', |
---|
| 40 | default = '6', |
---|
| 41 | help='Column containing CIGAR or extended CIGAR string') |
---|
| 42 | |
---|
| 43 | parser.add_option( |
---|
| 44 | '-r','--ref_column', |
---|
| 45 | dest='ref_col', |
---|
| 46 | default = '3', |
---|
| 47 | help='Column containing name of the refernce sequence coordinate. 1-based') |
---|
| 48 | |
---|
| 49 | parser.add_option( |
---|
| 50 | '-e','--read_column', |
---|
| 51 | dest='read_col', |
---|
| 52 | default = '1', |
---|
| 53 | help='Column containing read name. 1-based') |
---|
| 54 | |
---|
| 55 | parser.add_option( |
---|
| 56 | '-d','--debug', |
---|
| 57 | dest='debug', |
---|
| 58 | action='store_true', |
---|
| 59 | default = False, |
---|
| 60 | help='Print debugging info') |
---|
| 61 | |
---|
| 62 | parser.add_option( |
---|
| 63 | '-p','--print_all', |
---|
| 64 | dest='prt_all', |
---|
| 65 | action='store_true', |
---|
| 66 | default = False, |
---|
| 67 | help='Print coordinates and original SAM?') |
---|
| 68 | |
---|
| 69 | |
---|
| 70 | options, args = parser.parse_args() |
---|
| 71 | |
---|
| 72 | if options.input_sam: |
---|
| 73 | infile = open ( options.input_sam, 'r') |
---|
| 74 | else: |
---|
| 75 | infile = sys.stdin |
---|
| 76 | |
---|
| 77 | cigar = re.compile( '\d+M|\d+N|\d+D|\d+P' ) |
---|
| 78 | |
---|
| 79 | print '#chrom\tstart\tend\tstrand' # provide a (partial) header so that strand is automatically set in metadata |
---|
| 80 | |
---|
| 81 | for line in infile: |
---|
| 82 | line = line.rstrip( '\r\n' ) |
---|
| 83 | if line and not line.startswith( '#' ) and not line.startswith( '@' ) : |
---|
| 84 | fields = line.split( '\t' ) |
---|
| 85 | start = int( fields[ int( options.start_col ) - 1 ] ) - 1 |
---|
| 86 | end = 0 |
---|
| 87 | for op in cigar.findall( fields[ int( options.cigar_col) - 1 ] ): |
---|
| 88 | end += int( op[ 0:len( op ) - 1 ] ) |
---|
| 89 | |
---|
| 90 | strand = '+' |
---|
| 91 | if bool( int( fields[ int( options.flag_col ) - 1 ] ) & 0x0010 ): |
---|
| 92 | strand = '-' |
---|
| 93 | read_name = fields[ int( options.read_col ) - 1 ] |
---|
| 94 | ref_name = fields[ int( options.ref_col ) - 1 ] |
---|
| 95 | |
---|
| 96 | if options.prt_all: |
---|
| 97 | print '%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand, line) |
---|
| 98 | else: |
---|
| 99 | print '%s\t%s\t%s\t%s' % (ref_name, str(start), str(end+start), strand) |
---|
| 100 | |
---|
| 101 | if __name__ == "__main__": main() |
---|
| 102 | |
---|