1 | #!/usr/bin/env python |
---|
2 | """ |
---|
3 | Input: fasta, int |
---|
4 | Output: tabular |
---|
5 | Return titles with lengths of corresponding seq |
---|
6 | """ |
---|
7 | |
---|
8 | import sys, os |
---|
9 | |
---|
10 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
11 | |
---|
12 | def __main__(): |
---|
13 | |
---|
14 | infile = sys.argv[1] |
---|
15 | out = open( sys.argv[2], 'w') |
---|
16 | keep_first_char = int( sys.argv[3] ) |
---|
17 | |
---|
18 | fasta_title = '' |
---|
19 | seq_len = 0 |
---|
20 | |
---|
21 | # number of char to keep in the title |
---|
22 | if keep_first_char == 0: |
---|
23 | keep_first_char = None |
---|
24 | else: |
---|
25 | keep_first_char += 1 |
---|
26 | |
---|
27 | first_entry = True |
---|
28 | |
---|
29 | for line in open( infile ): |
---|
30 | line = line.strip() |
---|
31 | if not line or line.startswith( '#' ): |
---|
32 | continue |
---|
33 | if line[0] == '>': |
---|
34 | if first_entry == False: |
---|
35 | out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) |
---|
36 | else: |
---|
37 | first_entry = False |
---|
38 | fasta_title = line |
---|
39 | seq_len = 0 |
---|
40 | else: |
---|
41 | seq_len += len(line) |
---|
42 | |
---|
43 | # last fasta-entry |
---|
44 | out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) |
---|
45 | out.close() |
---|
46 | |
---|
47 | if __name__ == "__main__" : __main__() |
---|