root/galaxy-central/lib/galaxy/datatypes/converters/fasta_to_tabular_converter.py @ 2

リビジョン 2, 1.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2# This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
3"""
4Input: fasta, minimal length, maximal length
5Output: fasta
6Return sequences whose lengths are within the range.
7"""
8
9import sys, os
10
11seq_hash = {}
12
13def __main__():
14    infile = sys.argv[1]
15    outfile = sys.argv[2]
16    title = ''
17    sequence = ''
18    sequence_count = 0
19    for i, line in enumerate( open( infile ) ):
20        line = line.rstrip( '\r\n' )
21        if line.startswith( '>' ):
22            if sequence:
23                sequence_count += 1
24                seq_hash[( sequence_count, title )] = sequence
25            title = line
26            sequence = ''
27        else:
28            if line:
29                sequence += line
30                if line.split() and line.split()[0].isdigit():
31                    sequence += ' '
32    if sequence:
33        seq_hash[( sequence_count, title )] = sequence
34    # return only those lengths are in the range
35    out = open( outfile, 'w' )
36    title_keys = seq_hash.keys()
37    title_keys.sort()
38    for i, fasta_title in title_keys:
39        sequence = seq_hash[( i, fasta_title )]
40        print >> out, "%s\t%s" %( fasta_title, sequence )
41    out.close()
42
43if __name__ == "__main__" : __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。