root/galaxy-central/tools/fasta_tools/fasta_concatenate_by_species.py

リビジョン 2, 1.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2#Dan Blankenberg
3"""
4Takes a Multiple Alignment FASTA file and concatenates
5sequences for each species, resulting in one sequence
6alignment per species.
7"""
8
9import sys, tempfile
10from galaxy import eggs
11from galaxy.tools.util.maf_utilities import iter_fasta_alignment
12from galaxy.util.odict import odict
13
14def __main__():
15    input_filename = sys.argv[1]
16    output_filename = sys.argv[2]
17    species = odict()
18    cur_size = 0
19    for components in iter_fasta_alignment( input_filename ):
20        species_not_written = species.keys()
21        for component in components:
22            if component.species not in species:
23                species[component.species] = tempfile.TemporaryFile()
24                species[component.species].write( "-" * cur_size )
25            species[component.species].write( component.text )
26            try:
27                species_not_written.remove( component.species )
28            except ValueError:
29                #this is a new species
30                pass
31        for spec in species_not_written:
32            species[spec].write( "-" * len( components[0].text ) )
33        cur_size += len( components[0].text )
34    out = open( output_filename, 'wb' )
35    for spec, f in species.iteritems():
36        f.seek( 0 )
37        out.write( ">%s\n%s\n" % ( spec, f.read() ) )
38    out.close()
39
40if __name__ == "__main__" : __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。