| 1 | #!/usr/bin/python2.6 |
|---|
| 2 | |
|---|
| 3 | """ |
|---|
| 4 | Read a maf file and print the regions covered to a set of bed files (one for |
|---|
| 5 | each sequence source referenced in the maf). Only blocks with a positive |
|---|
| 6 | percent identity are written out. |
|---|
| 7 | |
|---|
| 8 | TODO: Can this be generalized to be made more useful? |
|---|
| 9 | |
|---|
| 10 | usage: %prog bed_outfile_prefix < maf |
|---|
| 11 | """ |
|---|
| 12 | |
|---|
| 13 | from __future__ import division |
|---|
| 14 | |
|---|
| 15 | import psyco_full |
|---|
| 16 | import bx.align.maf |
|---|
| 17 | import sys |
|---|
| 18 | |
|---|
| 19 | def block_pid( comp1, comp2 ): |
|---|
| 20 | match = 0 |
|---|
| 21 | total = 0 |
|---|
| 22 | t1 = comp1.text.lower() |
|---|
| 23 | t2 = comp2.text.lower() |
|---|
| 24 | for i in range( 0, len(t1) ): |
|---|
| 25 | a, b = t1[i], t2[i] |
|---|
| 26 | if a == '-' or b == '-': |
|---|
| 27 | continue |
|---|
| 28 | elif a == b: |
|---|
| 29 | match += 1 |
|---|
| 30 | total += 1 |
|---|
| 31 | if total == 0: return None |
|---|
| 32 | return ( match / total ) |
|---|
| 33 | |
|---|
| 34 | def main(): |
|---|
| 35 | out_prefix = sys.argv[1] |
|---|
| 36 | print out_prefix |
|---|
| 37 | out_files = dict() |
|---|
| 38 | for block in bx.align.maf.Reader( sys.stdin ): |
|---|
| 39 | ref_comp = block.components[0] |
|---|
| 40 | ref_chrom = ref_comp.src.split('.')[1] |
|---|
| 41 | for comp in block.components[1:]: |
|---|
| 42 | comp_species, comp_chrom = comp.src.split('.')[:2] |
|---|
| 43 | if comp_species not in out_files: |
|---|
| 44 | f = open( "%s%s.bed" % ( out_prefix, comp_species ), "w" ) |
|---|
| 45 | out_files[comp_species] = f |
|---|
| 46 | pid = block_pid( ref_comp, comp ) |
|---|
| 47 | if pid: |
|---|
| 48 | out_files[comp_species].write( "%s\t%d\t%d\t%s:%d-%d,%s\t%f\n" % |
|---|
| 49 | ( ref_chrom, ref_comp.forward_strand_start, ref_comp.forward_strand_end, \ |
|---|
| 50 | comp_chrom, comp.start, comp.end, comp.strand, pid ) ) |
|---|
| 51 | |
|---|
| 52 | for f in out_files.values(): |
|---|
| 53 | f.close() |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | if __name__ == "__main__": main() |
|---|