1 | #!/usr/bin/env python
|
---|
2 | #Reads a LAV file and writes two BED files.
|
---|
3 | import sys
|
---|
4 | from galaxy import eggs
|
---|
5 | import pkg_resources
|
---|
6 | pkg_resources.require( "bx-python" )
|
---|
7 | import bx.align.lav
|
---|
8 |
|
---|
9 | assert sys.version_info[:2] >= ( 2, 4 )
|
---|
10 |
|
---|
11 | def stop_err( msg ):
|
---|
12 | sys.stderr.write( msg )
|
---|
13 | sys.exit()
|
---|
14 |
|
---|
15 | def main():
|
---|
16 | try:
|
---|
17 | lav_file = open(sys.argv[1],'r')
|
---|
18 | bed_file1 = open(sys.argv[2],'w')
|
---|
19 | bed_file2 = open(sys.argv[3],'w')
|
---|
20 | except Exception, e:
|
---|
21 | stop_err( str( e ) )
|
---|
22 |
|
---|
23 | lavsRead = 0
|
---|
24 | bedsWritten = 0
|
---|
25 | species = {}
|
---|
26 | # TODO: this is really bad since everything is read into memory. Can we eliminate this tool?
|
---|
27 | for lavBlock in bx.align.lav.Reader( lav_file ):
|
---|
28 | lavsRead += 1
|
---|
29 | for c in lavBlock.components:
|
---|
30 | spec, chrom = bx.align.lav.src_split( c.src )
|
---|
31 | if bedsWritten < 1:
|
---|
32 | if len( species )==0:
|
---|
33 | species[spec]=bed_file1
|
---|
34 | elif len( species )==1:
|
---|
35 | species[spec]=bed_file2
|
---|
36 | else:
|
---|
37 | continue #this is a pairwise alignment...
|
---|
38 | if spec in species:
|
---|
39 | species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
|
---|
40 | bedsWritten += 1
|
---|
41 |
|
---|
42 |
|
---|
43 | for spec,file in species.items():
|
---|
44 | print "#FILE\t%s\t%s" % (file.name, spec)
|
---|
45 |
|
---|
46 | lav_file.close()
|
---|
47 | bed_file1.close()
|
---|
48 | bed_file2.close()
|
---|
49 |
|
---|
50 | print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
|
---|
51 |
|
---|
52 |
|
---|
53 |
|
---|
54 | if __name__ == "__main__": main() |
---|