root/galaxy-central/tools/fasta_tools/fasta_filter_by_length.py @ 2

リビジョン 2, 1.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Input: fasta, minimal length, maximal length
4Output: fasta
5Return sequences whose lengths are within the range.
6"""
7
8import sys, os
9
10assert sys.version_info[:2] >= ( 2, 4 )
11
12def stop_err( msg ):
13    sys.stderr.write( msg )
14    sys.exit()
15
16def __main__():
17    input_filename = sys.argv[1]
18    try:
19        min_length = int( sys.argv[2] )
20    except:
21        stop_err( "Minimal length of the return sequence requires a numerical value." )
22    try:
23        max_length = int( sys.argv[3] )
24    except:
25        stop_err( "Maximum length of the return sequence requires a numerical value." )
26    output_filename = sys.argv[4]
27    output_handle = open( output_filename, 'w' )
28    tmp_size = 0 #-1
29    tmp_buf = ''
30    at_least_one = 0
31    for line in file(input_filename):
32        if not line or line.startswith('#'):
33            continue
34        if line[0] == '>':
35            if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
36                output_handle.write(tmp_buf)
37                at_least_one = 1
38            tmp_buf = line
39            tmp_size = 0                                                       
40        else:
41            if max_length == 0 or tmp_size < max_length:
42                tmp_size += len(line.rstrip('\r\n'))
43                tmp_buf += line
44    # final flush of buffer
45    if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
46        output_handle.write(tmp_buf.rstrip('\r\n'))
47        at_least_one = 1
48    output_handle.close()
49    if at_least_one == 0:
50        print "There is no sequence that falls within your range."
51
52if __name__ == "__main__" : __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。