root/galaxy-central/tools/filters/lav_to_bed.py

リビジョン 2, 1.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2#Reads a LAV file and writes two BED files.
3import sys
4from galaxy import eggs
5import pkg_resources
6pkg_resources.require( "bx-python" )
7import bx.align.lav
8
9assert sys.version_info[:2] >= ( 2, 4 )
10
11def stop_err( msg ):
12    sys.stderr.write( msg )
13    sys.exit()
14
15def main():
16    try:
17        lav_file = open(sys.argv[1],'r')
18        bed_file1 = open(sys.argv[2],'w')
19        bed_file2 = open(sys.argv[3],'w')
20    except Exception, e:
21        stop_err( str( e ) )
22       
23    lavsRead = 0
24    bedsWritten = 0
25    species = {}
26    # TODO: this is really bad since everything is read into memory.  Can we eliminate this tool?
27    for lavBlock in bx.align.lav.Reader( lav_file ):
28        lavsRead += 1
29        for c in lavBlock.components:
30            spec, chrom = bx.align.lav.src_split( c.src )
31            if bedsWritten < 1:
32                if len( species )==0:
33                    species[spec]=bed_file1
34                elif len( species )==1:
35                    species[spec]=bed_file2
36                else:
37                    continue #this is a pairwise alignment...
38            if spec in species:
39                species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
40        bedsWritten += 1
41       
42
43    for spec,file in species.items():
44        print "#FILE\t%s\t%s" % (file.name, spec)
45   
46    lav_file.close()
47    bed_file1.close()
48    bed_file2.close()
49   
50    print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
51
52
53
54if __name__ == "__main__": main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。