root/galaxy-central/tools/fasta_tools/fasta_compute_length.py

リビジョン 2, 1.1 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Input: fasta, int
4Output: tabular
5Return titles with lengths of corresponding seq
6"""
7
8import sys, os
9
10assert sys.version_info[:2] >= ( 2, 4 )
11
12def __main__():
13   
14    infile = sys.argv[1]
15    out = open( sys.argv[2], 'w')
16    keep_first_char = int( sys.argv[3] )
17
18    fasta_title = ''
19    seq_len = 0
20
21    # number of char to keep in the title
22    if keep_first_char == 0:
23        keep_first_char = None
24    else:
25        keep_first_char += 1
26
27    first_entry = True
28
29    for line in open( infile ):
30        line = line.strip()
31        if not line or line.startswith( '#' ):
32            continue
33        if line[0] == '>':
34            if first_entry == False:
35                out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
36            else:
37                first_entry = False
38            fasta_title = line
39            seq_len = 0
40        else:
41            seq_len += len(line)
42
43    # last fasta-entry
44    out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
45    out.close()
46
47if __name__ == "__main__" : __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。