1 | #!/usr/bin/python2.6 |
---|
2 | """ |
---|
3 | Create a bed file listing all the divergent sites between two specific species |
---|
4 | in a maf. |
---|
5 | |
---|
6 | usage: %prog maf_file reference_species_name other_species_name |
---|
7 | """ |
---|
8 | |
---|
9 | import sys |
---|
10 | import bx.align.maf |
---|
11 | import bx.bitset |
---|
12 | from bx.bitset_builders import * |
---|
13 | from itertools import * |
---|
14 | |
---|
15 | def main(): |
---|
16 | bitsets = {} |
---|
17 | maf = sys.argv[1] |
---|
18 | reference_sp, other_sp = sys.argv[2], sys.argv[3] |
---|
19 | |
---|
20 | for block in bx.align.maf.Reader( open(maf) ): |
---|
21 | ref = block.get_component_by_src_start( reference_sp ) |
---|
22 | other = block.get_component_by_src_start( other_sp ) |
---|
23 | |
---|
24 | if not ref or not other: continue |
---|
25 | ref_chrom = ref.src.split( '.' )[1] |
---|
26 | ref_start = ref.start |
---|
27 | ref_end = ref.end |
---|
28 | chrom_size = ref.get_src_size() |
---|
29 | |
---|
30 | if ref_chrom not in bitsets: bitsets[ ref_chrom ] = bx.bitset.BinnedBitSet( chrom_size ) |
---|
31 | |
---|
32 | pos = ref_start |
---|
33 | for i,j in izip( ref.text.upper(), other.text.upper() ): |
---|
34 | if i != '-': |
---|
35 | if i != j: # mismatch |
---|
36 | if i != 'N' and j != 'N' and j != '-': |
---|
37 | # set if all valid chars |
---|
38 | bitsets[ ref_chrom ].set( pos ) |
---|
39 | pos += 1 |
---|
40 | |
---|
41 | |
---|
42 | # bits --> bed file |
---|
43 | for chrom in bitsets: |
---|
44 | bits = bitsets[chrom] |
---|
45 | end = 0 |
---|
46 | while 1: |
---|
47 | start = bits.next_set( end ) |
---|
48 | if start == bits.size: break |
---|
49 | end = bits.next_clear( start ) |
---|
50 | print "%s\t%d\t%d" % ( chrom, start, end ) |
---|
51 | |
---|
52 | |
---|
53 | main() |
---|
54 | |
---|
55 | |
---|