1 | #!/usr/bin/env python |
---|
2 | #Dan Blankenberg |
---|
3 | """ |
---|
4 | Takes a Multiple Alignment FASTA file and concatenates |
---|
5 | sequences for each species, resulting in one sequence |
---|
6 | alignment per species. |
---|
7 | """ |
---|
8 | |
---|
9 | import sys, tempfile |
---|
10 | from galaxy import eggs |
---|
11 | from galaxy.tools.util.maf_utilities import iter_fasta_alignment |
---|
12 | from galaxy.util.odict import odict |
---|
13 | |
---|
14 | def __main__(): |
---|
15 | input_filename = sys.argv[1] |
---|
16 | output_filename = sys.argv[2] |
---|
17 | species = odict() |
---|
18 | cur_size = 0 |
---|
19 | for components in iter_fasta_alignment( input_filename ): |
---|
20 | species_not_written = species.keys() |
---|
21 | for component in components: |
---|
22 | if component.species not in species: |
---|
23 | species[component.species] = tempfile.TemporaryFile() |
---|
24 | species[component.species].write( "-" * cur_size ) |
---|
25 | species[component.species].write( component.text ) |
---|
26 | try: |
---|
27 | species_not_written.remove( component.species ) |
---|
28 | except ValueError: |
---|
29 | #this is a new species |
---|
30 | pass |
---|
31 | for spec in species_not_written: |
---|
32 | species[spec].write( "-" * len( components[0].text ) ) |
---|
33 | cur_size += len( components[0].text ) |
---|
34 | out = open( output_filename, 'wb' ) |
---|
35 | for spec, f in species.iteritems(): |
---|
36 | f.seek( 0 ) |
---|
37 | out.write( ">%s\n%s\n" % ( spec, f.read() ) ) |
---|
38 | out.close() |
---|
39 | |
---|
40 | if __name__ == "__main__" : __main__() |
---|