root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/maf_extract_ranges_indexed.py

リビジョン 3, 4.3 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Reads a list of intervals and a maf. Produces a new maf containing the
5blocks or parts of blocks in the original that overlapped the intervals.
6
7It is assumed that each file `maf_fname` has a corresponding `maf_fname`.index
8file.
9
10NOTE: If two intervals overlap the same block it will be written twice. With
11      non-overlapping intervals and --chop this is never a problem.
12     
13NOTE: Intervals are relative to the + strand, regardless of the strands in
14      the alignments.
15
16     
17WARNING: bz2/bz2t support and file cache support are new and not as well
18         tested.
19
20usage: %prog maf_fname1 maf_fname2 ... [options] < interval_file
21   -m, --mincols=0: Minimum length (columns) required for alignment to be output
22   -c, --chop:       Should blocks be chopped to only portion overlapping (no by default)
23   -s, --src=s:      Use this src for all intervals
24   -p, --prefix=p:   Prepend this to each src before lookup
25   -d, --dir=d:      Write each interval as a separate file in this directory
26   -S, --strand:     Strand is included as an additional column, and the blocks are reverse complemented (if necessary) so that they are always on that strand w/r/t the src species.
27   -C, --usecache:   Use a cache that keeps blocks of the MAF files in memory (requires ~20MB per MAF)
28"""
29
30import psyco_full
31
32from bx.cookbook import doc_optparse
33
34import bx.align.maf
35from bx import misc
36import os
37import sys
38
39def main():
40    # Parse Command Line
41    options, args = doc_optparse.parse( __doc__ )
42    try:
43        maf_files = args
44        if options.mincols: mincols = int( options.mincols )
45        else: mincols = 0
46        if options.src: fixed_src = options.src
47        else: fixed_src = None
48        if options.prefix: prefix = options.prefix
49        else: prefix = None
50        if options.dir: dir = options.dir
51        else: dir = None
52        chop = bool( options.chop )
53        do_strand = bool( options.strand )
54        use_cache = bool( options.usecache )
55    except:
56        doc_optparse.exit()
57    # Open indexed access to mafs
58    index = bx.align.maf.MultiIndexed( maf_files, keep_open=True,
59                                                  parse_e_rows=True,
60                                                  use_cache=use_cache )
61    # Start MAF on stdout
62    if dir is None:
63        out = bx.align.maf.Writer( sys.stdout )
64    # Iterate over input ranges
65    for line in sys.stdin:
66        strand = None
67        fields = line.split()
68        if fixed_src:
69            src, start, end = fixed_src, int( fields[0] ), int( fields[1] )
70            if do_strand: strand = fields[2]
71        else:
72            src, start, end = fields[0], int( fields[1] ), int( fields[2] )
73            if do_strand: strand = fields[3]
74        if prefix: src = prefix + src
75        # Find overlap with reference component
76        blocks = index.get( src, start, end )
77        # Open file if needed
78        if dir:
79            out = bx.align.maf.Writer( open( os.path.join( dir, "%s:%09d-%09d.maf" % ( src, start, end ) ), 'w' ) )
80        # Write each intersecting block
81        if chop:
82            for block in blocks:
83                ref = block.get_component_by_src( src )
84                slice_start = max( start, ref.get_forward_strand_start() )
85                slice_end = min( end, ref.get_forward_strand_end() )
86                sliced = block.slice_by_component( ref, slice_start, slice_end )
87                # If the block is shorter than the minimum allowed size, stop
88                if mincols and ( sliced.text_size < mincols ):
89                    continue
90                # If the reference component is empty, don't write the block
91                if sliced.get_component_by_src( src ).size < 1:
92                    continue
93                # Keep only components that are not empty
94                sliced.components = [ c for c in sliced.components if c.size > 0 ]
95                # Reverse complement if needed
96                if ( strand != None ) and ( ref.strand != strand ):
97                    sliced = sliced.reverse_complement()
98                # Write the block
99                out.write( sliced )
100        else:
101            for block in blocks:
102                out.write( block )
103        if dir:
104            out.close()
105    # Close output MAF
106    out.close()
107    index.close()
108
109if __name__ == "__main__":
110    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。