root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/maf_chunk.py @ 3

リビジョン 3, 2.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Read a MAF from stdin and break into several new mafs containing no more than
5`chunk_size` columns. The new mafs will be written to `out_dir` along with a
6file "intervals.txt" specifying the range covered by each new maf file. A
7probability for writing each chunk can optionally be specified, resulting in
8a random fraction of chunks from the input MAF being produced.
9
10usage: %prog [options] chunk_size out_dir < maf
11  --prob: probability of writing versus skipping each chunk.
12"""
13
14usage = "usage: %prog chunk_size out_dir"
15
16import sys
17from optparse import OptionParser
18import bx.align.maf
19import psyco_full
20import random
21
22INF="inf"
23
24def __main__():
25
26    # Parse command line arguments
27
28    parser = OptionParser( "usage: %prog chunk_size out_dir" )
29    parser.add_option( "--prob", action="store", default=None, type="float",
30                       help="Probability of writing a given chunk" )
31   
32    ( options, args ) = parser.parse_args()
33
34    chunk_size = int( args[0] )
35    out_dir = args[1]
36    prob = options.prob
37
38    maf_reader = bx.align.maf.Reader( sys.stdin )
39
40    maf_writer = None
41
42    count = 0
43    current_chunk = -1
44
45    chunk_min = INF
46    chunk_max = 0
47
48    write_current_chunk = True
49
50    interval_file = file( "%s/intervals.txt" % out_dir, "w" )   
51
52    for m in maf_reader:
53        chunk_min = min( chunk_min, m.components[0].start )
54        chunk_max = max( chunk_max, m.components[0].end )
55        if not maf_writer or count + m.text_size > chunk_size:
56            current_chunk += 1
57            # Finish the last chunk           
58            if maf_writer:
59                maf_writer.close()
60                interval_file.write( "%s %s\n" % ( chunk_min, chunk_max ) )
61                chunk_min = INF
62                chunk_max = 0
63            # Decide if the new chunk will be written     
64            if prob: write_current_chunk = bool( random.random() <= prob )
65            else: write_current_chunk = True
66            if write_current_chunk:
67                maf_writer = bx.align.maf.Writer( file( "%s/%09d.maf" % ( out_dir, current_chunk ), "w" ) )
68            else:
69                maf_writer = None
70            count = 0
71        if maf_writer: maf_writer.write( m )
72        #count += m.text_size
73        count += m.components[0].size
74   
75    if maf_writer:
76        maf_writer.close()
77        interval_file.write( "%s %s\n" % ( chunk_min, chunk_max ) )
78        interval_file.close()
79
80if __name__ == "__main__": __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。