1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Reads a list of intervals and a set of indexed mafs. For each interval print |
---|
5 | the amount covered by each species other than the reference. |
---|
6 | |
---|
7 | usage: %prog maf_files [options] < interval_file |
---|
8 | -s, --src=s: Use this src for all intervals |
---|
9 | -p, --prefix=p: Prepend this to each src before lookup |
---|
10 | """ |
---|
11 | |
---|
12 | from __future__ import division |
---|
13 | |
---|
14 | import psyco_full |
---|
15 | |
---|
16 | from bx.cookbook import doc_optparse |
---|
17 | |
---|
18 | import bx.align.maf |
---|
19 | from bx import intervals |
---|
20 | from bx import misc |
---|
21 | import sys |
---|
22 | |
---|
23 | def __main__(): |
---|
24 | |
---|
25 | # Parse Command Line |
---|
26 | |
---|
27 | options, args = doc_optparse.parse( __doc__ ) |
---|
28 | |
---|
29 | try: |
---|
30 | maf_files = args |
---|
31 | if options.prefix: prefix = options.prefix |
---|
32 | else: prefix = None |
---|
33 | except: |
---|
34 | doc_optparse.exit() |
---|
35 | |
---|
36 | # Open indexed access to mafs |
---|
37 | indexes = [ bx.align.maf.Indexed( maf_file, maf_file + ".index" ) for maf_file in maf_files ] |
---|
38 | |
---|
39 | # Iterate over input ranges |
---|
40 | |
---|
41 | for line in sys.stdin: |
---|
42 | fields = line.split() |
---|
43 | src, start, end = fields[0], int( fields[1] ), int( fields[2] ) |
---|
44 | if prefix: src = prefix + src |
---|
45 | |
---|
46 | total_length = end - start |
---|
47 | |
---|
48 | # Find overlap with reference component |
---|
49 | blocks = [] |
---|
50 | for index in indexes: blocks += index.get( src, start, end ) |
---|
51 | |
---|
52 | coverage = dict() |
---|
53 | for block in blocks: |
---|
54 | overlap_start = max( start, block.components[0].start ) |
---|
55 | overlap_end = min( end, block.components[0].end ) |
---|
56 | length = overlap_end - overlap_start |
---|
57 | assert length > 0 |
---|
58 | for c in block.components[1:]: |
---|
59 | species = c.src.split( '.' )[0] |
---|
60 | try: coverage[ species ] += length |
---|
61 | except: coverage[ species ] = length |
---|
62 | |
---|
63 | print line, |
---|
64 | for key, value in coverage.iteritems(): |
---|
65 | print " ", key.ljust(10), "%0.2f" % ( value / total_length ) |
---|
66 | |
---|
67 | |
---|
68 | if __name__ == "__main__": __main__() |
---|