1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Build an index file for a set of MAF alignment blocks. |
---|
5 | |
---|
6 | If index_file is not provided maf_file.index is used. |
---|
7 | |
---|
8 | usage: %prog maf_file index_file |
---|
9 | -s, --species=a,b,c: only index the position of the block in the listed species |
---|
10 | """ |
---|
11 | |
---|
12 | import psyco_full |
---|
13 | |
---|
14 | from bx.cookbook import doc_optparse |
---|
15 | |
---|
16 | import sys |
---|
17 | import os.path |
---|
18 | |
---|
19 | from bx import interval_index_file |
---|
20 | import bx.align.maf |
---|
21 | from bx.misc.seekbzip2 import SeekableBzip2File |
---|
22 | |
---|
23 | def main(): |
---|
24 | |
---|
25 | # Parse command line |
---|
26 | |
---|
27 | options, args = doc_optparse.parse( __doc__ ) |
---|
28 | |
---|
29 | try: |
---|
30 | maf_file = args[0] |
---|
31 | # If it appears to be a bz2 file, attempt to open with table |
---|
32 | if maf_file.endswith( ".bz2" ): |
---|
33 | table_file = maf_file + "t" |
---|
34 | if not os.path.exists( table_file ): |
---|
35 | doc_optparse.exit( "To index bz2 compressed files first " |
---|
36 | "create a bz2t file with bzip-table." ) |
---|
37 | # Open with SeekableBzip2File so we have tell support |
---|
38 | maf_in = SeekableBzip2File( maf_file, table_file ) |
---|
39 | # Strip .bz2 from the filename before adding ".index" |
---|
40 | maf_file = maf_file[:-4] |
---|
41 | elif maf_file.endswith( ".lzo" ): |
---|
42 | from bx.misc.seeklzop import SeekableLzopFile |
---|
43 | table_file = maf_file + "t" |
---|
44 | if not os.path.exists( table_file ): |
---|
45 | doc_optparse.exit( "To index lzo compressed files first " |
---|
46 | "create a lzot file with lzop_build_offset_table." ) |
---|
47 | # Open with SeekableBzip2File so we have tell support |
---|
48 | maf_in = SeekableLzopFile( maf_file, table_file ) |
---|
49 | # Strip .lzo from the filename before adding ".index" |
---|
50 | maf_file = maf_file[:-4] |
---|
51 | else: |
---|
52 | maf_in = open( maf_file ) |
---|
53 | # Determine the name of the index file |
---|
54 | if len( args ) > 1: |
---|
55 | index_file = args[1] |
---|
56 | else: |
---|
57 | index_file = maf_file + ".index" |
---|
58 | if options.species: |
---|
59 | species = options.species.split( "," ) |
---|
60 | else: |
---|
61 | species = None |
---|
62 | except: |
---|
63 | doc_optparse.exception() |
---|
64 | |
---|
65 | maf_reader = bx.align.maf.Reader( maf_in ) |
---|
66 | |
---|
67 | indexes = interval_index_file.Indexes() |
---|
68 | |
---|
69 | # Need to be a bit tricky in our iteration here to get the 'tells' right |
---|
70 | while 1: |
---|
71 | pos = maf_reader.file.tell() |
---|
72 | block = maf_reader.next() |
---|
73 | if block is None: break |
---|
74 | for c in block.components: |
---|
75 | if species is not None and c.src.split('.')[0] not in species: |
---|
76 | continue |
---|
77 | indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size ) |
---|
78 | |
---|
79 | out = open( index_file, 'w' ) |
---|
80 | indexes.write( out ) |
---|
81 | out.close() |
---|
82 | |
---|
83 | if __name__ == "__main__": main() |
---|