[2] | 1 | #!/usr/bin/env python |
---|
| 2 | """ |
---|
| 3 | Input: fasta, int |
---|
| 4 | Output: tabular |
---|
| 5 | Return titles with lengths of corresponding seq |
---|
| 6 | """ |
---|
| 7 | |
---|
| 8 | import sys, os |
---|
| 9 | |
---|
| 10 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 11 | |
---|
| 12 | def __main__(): |
---|
| 13 | |
---|
| 14 | infile = sys.argv[1] |
---|
| 15 | out = open( sys.argv[2], 'w') |
---|
| 16 | keep_first_char = int( sys.argv[3] ) |
---|
| 17 | |
---|
| 18 | fasta_title = '' |
---|
| 19 | seq_len = 0 |
---|
| 20 | |
---|
| 21 | # number of char to keep in the title |
---|
| 22 | if keep_first_char == 0: |
---|
| 23 | keep_first_char = None |
---|
| 24 | else: |
---|
| 25 | keep_first_char += 1 |
---|
| 26 | |
---|
| 27 | first_entry = True |
---|
| 28 | |
---|
| 29 | for line in open( infile ): |
---|
| 30 | line = line.strip() |
---|
| 31 | if not line or line.startswith( '#' ): |
---|
| 32 | continue |
---|
| 33 | if line[0] == '>': |
---|
| 34 | if first_entry == False: |
---|
| 35 | out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) |
---|
| 36 | else: |
---|
| 37 | first_entry = False |
---|
| 38 | fasta_title = line |
---|
| 39 | seq_len = 0 |
---|
| 40 | else: |
---|
| 41 | seq_len += len(line) |
---|
| 42 | |
---|
| 43 | # last fasta-entry |
---|
| 44 | out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) |
---|
| 45 | out.close() |
---|
| 46 | |
---|
| 47 | if __name__ == "__main__" : __main__() |
---|