[2] | 1 | #!/usr/bin/env python
|
---|
| 2 | #Reads a LAV file and writes two BED files.
|
---|
| 3 | import sys
|
---|
| 4 | from galaxy import eggs
|
---|
| 5 | import pkg_resources
|
---|
| 6 | pkg_resources.require( "bx-python" )
|
---|
| 7 | import bx.align.lav
|
---|
| 8 |
|
---|
| 9 | assert sys.version_info[:2] >= ( 2, 4 )
|
---|
| 10 |
|
---|
| 11 | def stop_err( msg ):
|
---|
| 12 | sys.stderr.write( msg )
|
---|
| 13 | sys.exit()
|
---|
| 14 |
|
---|
| 15 | def main():
|
---|
| 16 | try:
|
---|
| 17 | lav_file = open(sys.argv[1],'r')
|
---|
| 18 | bed_file1 = open(sys.argv[2],'w')
|
---|
| 19 | bed_file2 = open(sys.argv[3],'w')
|
---|
| 20 | except Exception, e:
|
---|
| 21 | stop_err( str( e ) )
|
---|
| 22 |
|
---|
| 23 | lavsRead = 0
|
---|
| 24 | bedsWritten = 0
|
---|
| 25 | species = {}
|
---|
| 26 | # TODO: this is really bad since everything is read into memory. Can we eliminate this tool?
|
---|
| 27 | for lavBlock in bx.align.lav.Reader( lav_file ):
|
---|
| 28 | lavsRead += 1
|
---|
| 29 | for c in lavBlock.components:
|
---|
| 30 | spec, chrom = bx.align.lav.src_split( c.src )
|
---|
| 31 | if bedsWritten < 1:
|
---|
| 32 | if len( species )==0:
|
---|
| 33 | species[spec]=bed_file1
|
---|
| 34 | elif len( species )==1:
|
---|
| 35 | species[spec]=bed_file2
|
---|
| 36 | else:
|
---|
| 37 | continue #this is a pairwise alignment...
|
---|
| 38 | if spec in species:
|
---|
| 39 | species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
|
---|
| 40 | bedsWritten += 1
|
---|
| 41 |
|
---|
| 42 |
|
---|
| 43 | for spec,file in species.items():
|
---|
| 44 | print "#FILE\t%s\t%s" % (file.name, spec)
|
---|
| 45 |
|
---|
| 46 | lav_file.close()
|
---|
| 47 | bed_file1.close()
|
---|
| 48 | bed_file2.close()
|
---|
| 49 |
|
---|
| 50 | print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
|
---|
| 51 |
|
---|
| 52 |
|
---|
| 53 |
|
---|
| 54 | if __name__ == "__main__": main() |
---|