[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | """ |
---|
| 4 | Read a maf and output intervals for specified list of species. |
---|
| 5 | """ |
---|
| 6 | import sys, os, tempfile |
---|
| 7 | from galaxy import eggs |
---|
| 8 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 9 | from bx.align import maf |
---|
| 10 | |
---|
| 11 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 12 | |
---|
| 13 | def __main__(): |
---|
| 14 | |
---|
| 15 | input_filename = sys.argv[1] |
---|
| 16 | output_filename = sys.argv[2] |
---|
| 17 | #where to store files that become additional output |
---|
| 18 | database_tmp_dir = sys.argv[5] |
---|
| 19 | |
---|
| 20 | species = sys.argv[3].split(',') |
---|
| 21 | partial = sys.argv[4] |
---|
| 22 | out_files = {} |
---|
| 23 | primary_spec = None |
---|
| 24 | |
---|
| 25 | if "None" in species: |
---|
| 26 | species = {} |
---|
| 27 | try: |
---|
| 28 | for i, m in enumerate( maf.Reader( open( input_filename, 'r' ) ) ): |
---|
| 29 | for c in m.components: |
---|
| 30 | spec,chrom = maf.src_split( c.src ) |
---|
| 31 | if not spec or not chrom: |
---|
| 32 | spec = chrom = c.src |
---|
| 33 | species[spec] = "" |
---|
| 34 | species = species.keys() |
---|
| 35 | except: |
---|
| 36 | print >>sys.stderr, "Invalid MAF file specified" |
---|
| 37 | return |
---|
| 38 | |
---|
| 39 | if "?" in species: |
---|
| 40 | print >>sys.stderr, "Invalid dbkey specified" |
---|
| 41 | return |
---|
| 42 | |
---|
| 43 | |
---|
| 44 | for i in range( 0, len( species ) ): |
---|
| 45 | spec = species[i] |
---|
| 46 | if i == 0: |
---|
| 47 | out_files[spec] = open( output_filename, 'w' ) |
---|
| 48 | primary_spec = spec |
---|
| 49 | else: |
---|
| 50 | out_files[spec] = tempfile.NamedTemporaryFile( mode = 'w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) |
---|
| 51 | filename = out_files[spec].name |
---|
| 52 | out_files[spec].close() |
---|
| 53 | out_files[spec] = open( filename, 'w' ) |
---|
| 54 | num_species = len( species ) |
---|
| 55 | |
---|
| 56 | print "Restricted to species:", ",".join( species ) |
---|
| 57 | |
---|
| 58 | file_in = open( input_filename, 'r' ) |
---|
| 59 | maf_reader = maf.Reader( file_in ) |
---|
| 60 | |
---|
| 61 | block_num = -1 |
---|
| 62 | |
---|
| 63 | for i, m in enumerate( maf_reader ): |
---|
| 64 | block_num += 1 |
---|
| 65 | if "None" not in species: |
---|
| 66 | m = m.limit_to_species( species ) |
---|
| 67 | l = m.components |
---|
| 68 | if len(l) < num_species and partial == "partial_disallowed": continue |
---|
| 69 | for c in l: |
---|
| 70 | spec,chrom = maf.src_split( c.src ) |
---|
| 71 | if not spec or not chrom: |
---|
| 72 | spec = chrom = c.src |
---|
| 73 | if spec not in out_files.keys(): |
---|
| 74 | out_files[spec] = tempfile.NamedTemporaryFile( mode='w', dir = database_tmp_dir, suffix = '.maf_to_bed' ) |
---|
| 75 | filename = out_files[spec].name |
---|
| 76 | out_files[spec].close() |
---|
| 77 | out_files[spec] = open( filename, 'w' ) |
---|
| 78 | |
---|
| 79 | if c.strand == "-": |
---|
| 80 | out_files[spec].write( chrom + "\t" + str( c.src_size - c.end ) + "\t" + str( c.src_size - c.start ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) |
---|
| 81 | else: |
---|
| 82 | out_files[spec].write( chrom + "\t" + str( c.start ) + "\t" + str( c.end ) + "\t" + spec + "_" + str( block_num ) + "\t" + "0\t" + c.strand + "\n" ) |
---|
| 83 | |
---|
| 84 | file_in.close() |
---|
| 85 | for file_out in out_files.keys(): |
---|
| 86 | out_files[file_out].close() |
---|
| 87 | |
---|
| 88 | for spec in out_files.keys(): |
---|
| 89 | if spec != primary_spec: |
---|
| 90 | print "#FILE\t" + spec + "\t" + os.path.join( database_tmp_dir, os.path.split( out_files[spec].name )[1] ) |
---|
| 91 | else: |
---|
| 92 | print "#FILE1\t" + spec + "\t" + out_files[spec].name |
---|
| 93 | |
---|
| 94 | if __name__ == "__main__": __main__() |
---|