| 1 | #!/usr/bin/env python |
|---|
| 2 | #Dan Blankenberg |
|---|
| 3 | """ |
|---|
| 4 | Takes a Multiple Alignment FASTA file and concatenates |
|---|
| 5 | sequences for each species, resulting in one sequence |
|---|
| 6 | alignment per species. |
|---|
| 7 | """ |
|---|
| 8 | |
|---|
| 9 | import sys, tempfile |
|---|
| 10 | from galaxy import eggs |
|---|
| 11 | from galaxy.tools.util.maf_utilities import iter_fasta_alignment |
|---|
| 12 | from galaxy.util.odict import odict |
|---|
| 13 | |
|---|
| 14 | def __main__(): |
|---|
| 15 | input_filename = sys.argv[1] |
|---|
| 16 | output_filename = sys.argv[2] |
|---|
| 17 | species = odict() |
|---|
| 18 | cur_size = 0 |
|---|
| 19 | for components in iter_fasta_alignment( input_filename ): |
|---|
| 20 | species_not_written = species.keys() |
|---|
| 21 | for component in components: |
|---|
| 22 | if component.species not in species: |
|---|
| 23 | species[component.species] = tempfile.TemporaryFile() |
|---|
| 24 | species[component.species].write( "-" * cur_size ) |
|---|
| 25 | species[component.species].write( component.text ) |
|---|
| 26 | try: |
|---|
| 27 | species_not_written.remove( component.species ) |
|---|
| 28 | except ValueError: |
|---|
| 29 | #this is a new species |
|---|
| 30 | pass |
|---|
| 31 | for spec in species_not_written: |
|---|
| 32 | species[spec].write( "-" * len( components[0].text ) ) |
|---|
| 33 | cur_size += len( components[0].text ) |
|---|
| 34 | out = open( output_filename, 'wb' ) |
|---|
| 35 | for spec, f in species.iteritems(): |
|---|
| 36 | f.seek( 0 ) |
|---|
| 37 | out.write( ">%s\n%s\n" % ( spec, f.read() ) ) |
|---|
| 38 | out.close() |
|---|
| 39 | |
|---|
| 40 | if __name__ == "__main__" : __main__() |
|---|