[3] | 1 | #!/usr/bin/python2.6 |
---|
| 2 | |
---|
| 3 | """ |
---|
| 4 | FIXME! |
---|
| 5 | |
---|
| 6 | usage: %prog feature.bed ar.bed snp.bed div_directory [options] |
---|
| 7 | -m, --mask=M: Mask AR and features with this file |
---|
| 8 | -s, --suffix=S: append suffix to chromosomes to get filenames from div_directory |
---|
| 9 | -l, --lens=l: Set chromosome ends using LEN file |
---|
| 10 | """ |
---|
| 11 | |
---|
| 12 | import sys |
---|
| 13 | import bx.bitset |
---|
| 14 | from bx.bitset_builders import * |
---|
| 15 | from bx.cookbook import doc_optparse |
---|
| 16 | |
---|
| 17 | def main(): |
---|
| 18 | |
---|
| 19 | options, args = doc_optparse.parse( __doc__ ) |
---|
| 20 | try: |
---|
| 21 | lens = {} |
---|
| 22 | if options.lens: |
---|
| 23 | for line in open( options.lens ): |
---|
| 24 | chrom, length = line.split() |
---|
| 25 | lens[chrom] = int( length ) |
---|
| 26 | |
---|
| 27 | if options.suffix: suffix = options.suffix |
---|
| 28 | else: suffix = "" |
---|
| 29 | |
---|
| 30 | print >>sys.stderr, "\nReading feature", |
---|
| 31 | interval_file = open(args[0]) |
---|
| 32 | feature = binned_bitsets_from_file(interval_file, lens=lens) |
---|
| 33 | interval_file.close() |
---|
| 34 | # reuse interval file |
---|
| 35 | intervals = {} |
---|
| 36 | interval_file = open(args[0]) |
---|
| 37 | for line in interval_file: |
---|
| 38 | fields = line.split() |
---|
| 39 | chrom, start, end = fields[0], int(fields[1]), int(fields[2]) |
---|
| 40 | if chrom not in intervals: intervals[chrom] = [] |
---|
| 41 | intervals[chrom].append( [start,end] ) |
---|
| 42 | interval_file.close() |
---|
| 43 | |
---|
| 44 | print >>sys.stderr, "\nReading ar", |
---|
| 45 | ar = binned_bitsets_from_file(open( args[1] ), lens=lens) |
---|
| 46 | |
---|
| 47 | print >>sys.stderr, "\nReading snps", |
---|
| 48 | snp = binned_bitsets_from_file(open( args[2] ), lens=lens) |
---|
| 49 | snp_mask = clone_inverted( snp ) |
---|
| 50 | snp_copy = clone( snp ) |
---|
| 51 | |
---|
| 52 | print >>sys.stderr, "\nMasking AR", |
---|
| 53 | ar_mask = clone_inverted( ar ) |
---|
| 54 | print >>sys.stderr |
---|
| 55 | |
---|
| 56 | dirname = args[3] |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | if options.mask: mask = binned_bitsets_from_file(open(options.mask), lens=lens) |
---|
| 60 | else: mask = None |
---|
| 61 | except: |
---|
| 62 | doc_optparse.exit() |
---|
| 63 | |
---|
| 64 | if mask: |
---|
| 65 | for chrom in mask.keys(): |
---|
| 66 | if chrom in feature: feature[chrom].iand( mask[chrom] ) |
---|
| 67 | if chrom in ar: ar[chrom].iand( mask[chrom] ) |
---|
| 68 | |
---|
| 69 | |
---|
| 70 | # divergence and snp counts for all features |
---|
| 71 | feature_div_count = 0 |
---|
| 72 | feature_snp_count = 0 |
---|
| 73 | ar_div_count = 0 |
---|
| 74 | ar_snp_count = 0 |
---|
| 75 | |
---|
| 76 | # collect snp and div |
---|
| 77 | for chr in feature.keys(): |
---|
| 78 | |
---|
| 79 | if chr not in snp: continue |
---|
| 80 | if chr not in ar: continue |
---|
| 81 | |
---|
| 82 | print >>sys.stderr, "reading %s ..." % chr, |
---|
| 83 | try: |
---|
| 84 | div = binned_bitsets_from_file( open( dirname + "/%s.bed" % (chr+suffix) ), lens=lens) |
---|
| 85 | except: |
---|
| 86 | print >>sys.stderr,"%s.bed not found" % chr |
---|
| 87 | continue |
---|
| 88 | |
---|
| 89 | div[chr].iand( snp_mask[chr] ) # div/snp sites count snp-only |
---|
| 90 | div_copy = clone( div ) |
---|
| 91 | |
---|
| 92 | print >>sys.stderr, "AR:", chr, |
---|
| 93 | snp[chr].iand( ar[chr] ) |
---|
| 94 | div[chr].iand( ar[chr] ) |
---|
| 95 | snp_count = snp[chr].count_range(0,snp[chr].size) |
---|
| 96 | ar_snp_count += snp_count |
---|
| 97 | print >>sys.stderr, snp_count, |
---|
| 98 | try: |
---|
| 99 | div_count = div[chr].count_range(0,div[chr].size) |
---|
| 100 | ar_div_count += div_count |
---|
| 101 | print >>sys.stderr, div_count |
---|
| 102 | except: |
---|
| 103 | print >>sys.stderr, chr, "failed" |
---|
| 104 | |
---|
| 105 | div = div_copy |
---|
| 106 | snp[chr] = snp_copy[chr] |
---|
| 107 | print >>sys.stderr, "feature:", chr, |
---|
| 108 | feature[chr].iand( ar_mask[chr] ) # clip to non-AR only |
---|
| 109 | snp[chr].iand( feature[chr] ) |
---|
| 110 | div[chr].iand( feature[chr] ) |
---|
| 111 | feature_snp_count += snp[chr].count_range(0,snp[chr].size) |
---|
| 112 | print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size) |
---|
| 113 | feature_div_count += div[chr].count_range(0,div[chr].size) |
---|
| 114 | print >>sys.stderr, snp[chr].count_range(0,snp[chr].size), div[chr].count_range(0,div[chr].size) |
---|
| 115 | |
---|
| 116 | # Note: can loop over feature intervals here for individual counts |
---|
| 117 | if chr in intervals: |
---|
| 118 | for start,end in intervals[chr]: |
---|
| 119 | ind_div_count = div[chr].count_range(start,end-start) |
---|
| 120 | ind_snp_count = snp[chr].count_range(start,end-start) |
---|
| 121 | print chr, start, end, ind_div_count, ind_snp_count |
---|
| 122 | |
---|
| 123 | print "feature snp\t%d" %feature_snp_count |
---|
| 124 | print "feature div\t%d" %feature_div_count |
---|
| 125 | print "ar snp\t%d" %ar_snp_count |
---|
| 126 | print "ar div\t%d" %ar_div_count |
---|
| 127 | |
---|
| 128 | # copies a dictionary of bitsets |
---|
| 129 | def copybits( binnedbits ): |
---|
| 130 | bitset = BinnedBitSet( binnedbits.size ) |
---|
| 131 | bitset.ior( binnedbits ) |
---|
| 132 | return bitset |
---|
| 133 | |
---|
| 134 | def clone( bitsets ): |
---|
| 135 | r = {} |
---|
| 136 | for k,b in bitsets.items(): r[ k ] = copybits( b ) |
---|
| 137 | return r |
---|
| 138 | |
---|
| 139 | def clone_inverted( bitsets ): |
---|
| 140 | r = {} |
---|
| 141 | for k,b in bitsets.items(): |
---|
| 142 | r[ k ] = copybits( b ) |
---|
| 143 | r[ k ].invert() |
---|
| 144 | return r |
---|
| 145 | |
---|
| 146 | main() |
---|