1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | """ |
---|
4 | Read a maf and output intervals for specified list of species. |
---|
5 | """ |
---|
6 | import sys, os, tempfile |
---|
7 | from galaxy import eggs |
---|
8 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
9 | from bx.align import maf |
---|
10 | |
---|
11 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
12 | |
---|
13 | def __main__(): |
---|
14 | |
---|
15 | input_filename = sys.argv[1] |
---|
16 | output_filename = sys.argv[2] |
---|
17 | #where to store files that become additional output |
---|
18 | database_tmp_dir = sys.argv[5] |
---|
19 | |
---|
20 | species = sys.argv[3].split(',') |
---|
21 | partial = sys.argv[4] |
---|
22 | out_files = {} |
---|
23 | primary_spec = None |
---|
24 | |
---|
25 | if "None" in species: |
---|
26 | species = {} |
---|
27 | try: |
---|
28 | for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ): |
---|
29 | for c in m.components: |
---|
30 | spec,chrom = maf.src_split( c.src ) |
---|
31 | if not spec or not chrom: |
---|
32 | spec = chrom = c.src |
---|
33 | species[spec] = "" |
---|
34 | species = species.keys() |
---|
35 | except: |
---|
36 | print >>sys.stderr, "Invalid MAF file specified" |
---|
37 | return |
---|
38 | |
---|
39 | if "?" in species: |
---|
40 | print >>sys.stderr, "Invalid dbkey specified" |
---|
41 | return |
---|
42 | |
---|
43 | |
---|
44 | for i in range( 0, len( species ) ): |
---|
45 | spec = species[i] |
---|
46 | if i == 0: |
---|
47 | out_files[spec] = open( output_filename, 'w' ) |
---|
48 | primary_spec = spec |
---|
49 | else: |
---|
50 | out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) |
---|
51 | filename = out_files[spec].name |
---|
52 | out_files[spec].close() |
---|
53 | out_files[spec] = open( filename, 'w' ) |
---|
54 | num_species = len( species ) |
---|
55 | |
---|
56 | print "Restricted to species:", ",".join( species ) |
---|
57 | |
---|
58 | file_in = open( input_filename, 'r' ) |
---|
59 | maf_reader = maf.Reader( file_in ) |
---|
60 | |
---|
61 | block_num = -1 |
---|
62 | |
---|
63 | for i, m in enumerate( maf_reader ): |
---|
64 | block_num += 1 |
---|
65 | if "None" not in species: |
---|
66 | m = m.limit_to_species( species ) |
---|
67 | l = m.components |
---|
68 | if len(l) < num_species and partial == "partial_disallowed": continue |
---|
69 | for c in l: |
---|
70 | spec,chrom = maf.src_split( c.src ) |
---|
71 | if not spec or not chrom: |
---|
72 | spec = chrom = c.src |
---|
73 | if spec not in out_files.keys(): |
---|
74 | out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) |
---|
75 | filename = out_files[spec].name |
---|
76 | out_files[spec].close() |
---|
77 | out_files[spec] = open( filename, 'w' ) |
---|
78 | |
---|
79 | if c.strand == "-": |
---|
80 | out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) |
---|
81 | else: |
---|
82 | out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) |
---|
83 | |
---|
84 | file_in.close() |
---|
85 | for file_out in out_files.keys(): |
---|
86 | out_files[file_out].close() |
---|
87 | |
---|
88 | for spec in out_files.keys(): |
---|
89 | if spec != primary_spec: |
---|
90 | print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] ) |
---|
91 | else: |
---|
92 | print "#FILE1\t" + spec + "\t" + out_files[spec].name |
---|
93 | |
---|
94 | if __name__ == "__main__": __main__() |
---|