1 | #Dan Blankenberg |
---|
2 | import sys, os, shutil |
---|
3 | from galaxy_utils.sequence.fastq import fastqWriter, fastqSequencingRead, fastqCombiner, fastqFakeFastaScoreReader |
---|
4 | from galaxy_utils.sequence.fasta import fastaReader, fastaNamedReader |
---|
5 | |
---|
6 | def main(): |
---|
7 | #Read command line arguments |
---|
8 | fasta_filename = sys.argv[1] |
---|
9 | fasta_type = sys.argv[2] or 'fasta' #should always be fasta or csfasta? what if txt? |
---|
10 | qual_filename = sys.argv[3] |
---|
11 | qual_type = sys.argv[4] or 'qualsanger' #qual454 qualsolid |
---|
12 | output_filename = sys.argv[5] |
---|
13 | force_quality_encoding = sys.argv[6] |
---|
14 | if force_quality_encoding == 'None': |
---|
15 | force_quality_encoding = None |
---|
16 | |
---|
17 | format = 'sanger' |
---|
18 | if fasta_type == 'csfasta' or qual_type == 'qualsolid': |
---|
19 | format = 'cssanger' |
---|
20 | elif qual_type == 'qualsolexa': |
---|
21 | format = 'solexa' |
---|
22 | elif qual_type == 'qualillumina': |
---|
23 | format = 'illumina' |
---|
24 | |
---|
25 | out = fastqWriter( open( output_filename, 'wb' ), format = format, force_quality_encoding = force_quality_encoding ) |
---|
26 | if qual_filename == 'None': |
---|
27 | qual_input = fastqFakeFastaScoreReader( format, quality_encoding = force_quality_encoding ) |
---|
28 | else: |
---|
29 | qual_input = fastaNamedReader( open( qual_filename, 'rb' ) ) |
---|
30 | |
---|
31 | fastq_combiner = fastqCombiner( format ) |
---|
32 | i = None |
---|
33 | skip_count = 0 |
---|
34 | for i, sequence in enumerate( fastaReader( open( fasta_filename, 'rb' ) ) ): |
---|
35 | quality = qual_input.get( sequence ) |
---|
36 | if quality: |
---|
37 | fastq_read = fastq_combiner.combine( sequence, quality ) |
---|
38 | out.write( fastq_read ) |
---|
39 | else: |
---|
40 | skip_count += 1 |
---|
41 | out.close() |
---|
42 | if i is None: |
---|
43 | print "Your file contains no valid FASTA sequences." |
---|
44 | else: |
---|
45 | print qual_input.has_data() |
---|
46 | print 'Combined %s of %s sequences with quality scores (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) |
---|
47 | |
---|
48 | if __name__ == "__main__": |
---|
49 | main() |
---|