root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/maf_interval_alignibility.py @ 3

リビジョン 3, 4.0 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4WARNING: bz2/bz2t support and file cache support are new and not as well
5         tested.
6
7usage: %prog maf_files [options] < interval_file
8    -s, --species=SPECIES: Comma separated list of species to include
9    -p, --prefix=PREFIX: Prefix to add to each interval chrom (usually reference species)
10   -C, --usecache:   Use a cache that keeps blocks of the MAF files in memory (requires ~20MB per MAF)
11"""
12
13from __future__ import division
14
15import psyco_full
16
17from bx.cookbook import doc_optparse
18
19import bx.align.maf
20from bx import misc
21import os
22import sys
23
24from numpy import *
25
26def main():
27    # Parse Command Line
28    options, args = doc_optparse.parse( __doc__ )
29    try:
30        maf_files = args
31        species = options.species.split( "," )
32        prefix = options.prefix
33        use_cache = bool( options.usecache )
34        if not prefix:
35            prefix = ""
36    except:
37        doc_optparse.exit()
38    # Open indexed access to mafs
39    index = bx.align.maf.MultiIndexed( maf_files,
40                                      parse_e_rows=True,
41                                      use_cache=use_cache )
42    # Print header
43    print "#chr", "start", "end",
44    for s in species:
45        print s,
46    print
47    # Iterate over input ranges
48    for line in sys.stdin:
49        fields = line.split()
50        # Input is BED3+
51        chr, start, end = fields[0], int( fields[1] ), int( fields[2] )
52        length = end - start
53        assert length > 0, "Interval has length less than one"
54        # Prepend prefix if specified
55        src = prefix + chr   
56        # Keep a bitset for each species noting covered pieces
57        aligned_bits = []
58        missing_bits = []
59        for s in species:
60            aligned_bits.append( zeros( length, dtype=bool ) )
61            missing_bits.append( zeros( length, dtype=bool ) )
62        # Find overlap with reference component
63        blocks = index.get( src, start, end )
64        # Determine alignability for each position
65        for block in blocks:
66            # Determine the piece of the human interval this block covers,
67            # relative to the start of the interval of interest
68            ref = block.get_component_by_src( src )
69            assert ref.strand == "+", \
70                "Reference species blocks must be on '+' strand"
71            rel_start = max( start, ref.start ) - start
72            rel_end = min( end, ref.end ) - start
73            # Check alignability for each species
74            for i, s in enumerate( species ):
75                other = block.get_component_by_src_start( s )
76                # Species does not appear at all indicates unaligned (best we
77                # can do here?)
78                if other is None:
79                    continue
80                # An empty component might indicate missing data, all other
81                # cases (even contiguous) we count as not aligned
82                if other.empty:
83                    if other.synteny_empty == bx.align.maf.MAF_MISSING_STATUS:
84                        missing_bits[i][rel_start:rel_end] = True
85                # Otherwise we have a local alignment with some text, call
86                # it aligned
87                else:
88                    aligned_bits[i][rel_start:rel_end] = True
89        # Now determine the total alignment coverage of each interval
90        print chr, start, end,
91        for i, s in enumerate( species ):
92            aligned = sum( aligned_bits[i] )
93            missing = sum( missing_bits[i] )
94            # An interval will be called missing if it is < 100bp and <50%
95            # present, or more than 100bp and less that 50bp present (yes,
96            # arbitrary)
97            is_missing = False
98            if length < 100 and missing > ( length / 2 ):
99                print "NA",
100            elif length >= 100 and missing > 50:
101                print "NA",
102            else:
103                print aligned / ( length - missing ),
104               
105        print
106         
107    # Close MAF files
108    index.close()
109
110if __name__ == "__main__":
111    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。