[2] | 1 | #!/usr/bin/env python |
---|
| 2 | #Dan Blankenberg |
---|
| 3 | """ |
---|
| 4 | Takes a Multiple Alignment FASTA file and concatenates |
---|
| 5 | sequences for each species, resulting in one sequence |
---|
| 6 | alignment per species. |
---|
| 7 | """ |
---|
| 8 | |
---|
| 9 | import sys, tempfile |
---|
| 10 | from galaxy import eggs |
---|
| 11 | from galaxy.tools.util.maf_utilities import iter_fasta_alignment |
---|
| 12 | from galaxy.util.odict import odict |
---|
| 13 | |
---|
| 14 | def __main__(): |
---|
| 15 | input_filename = sys.argv[1] |
---|
| 16 | output_filename = sys.argv[2] |
---|
| 17 | species = odict() |
---|
| 18 | cur_size = 0 |
---|
| 19 | for components in iter_fasta_alignment( input_filename ): |
---|
| 20 | species_not_written = species.keys() |
---|
| 21 | for component in components: |
---|
| 22 | if component.species not in species: |
---|
| 23 | species[component.species] = tempfile.TemporaryFile() |
---|
| 24 | species[component.species].write( "-" * cur_size ) |
---|
| 25 | species[component.species].write( component.text ) |
---|
| 26 | try: |
---|
| 27 | species_not_written.remove( component.species ) |
---|
| 28 | except ValueError: |
---|
| 29 | #this is a new species |
---|
| 30 | pass |
---|
| 31 | for spec in species_not_written: |
---|
| 32 | species[spec].write( "-" * len( components[0].text ) ) |
---|
| 33 | cur_size += len( components[0].text ) |
---|
| 34 | out = open( output_filename, 'wb' ) |
---|
| 35 | for spec, f in species.iteritems(): |
---|
| 36 | f.seek( 0 ) |
---|
| 37 | out.write( ">%s\n%s\n" % ( spec, f.read() ) ) |
---|
| 38 | out.close() |
---|
| 39 | |
---|
| 40 | if __name__ == "__main__" : __main__() |
---|