[2] | 1 | #Dan Blankenberg |
---|
| 2 | import sys, os, shutil |
---|
| 3 | from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner |
---|
| 4 | |
---|
| 5 | def main(): |
---|
| 6 | #Read command line arguments |
---|
| 7 | input1_filename = sys.argv[1] |
---|
| 8 | input1_type = sys.argv[2] or 'sanger' |
---|
| 9 | input2_filename = sys.argv[3] |
---|
| 10 | input2_type = sys.argv[4] or 'sanger' |
---|
| 11 | output_filename = sys.argv[5] |
---|
| 12 | |
---|
| 13 | if input1_type != input2_type: |
---|
| 14 | print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type ) |
---|
| 15 | |
---|
| 16 | input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type ) |
---|
| 17 | joiner = fastqJoiner( input1_type ) |
---|
| 18 | out = fastqWriter( open( output_filename, 'wb' ), format = input1_type ) |
---|
| 19 | |
---|
| 20 | i = None |
---|
| 21 | skip_count = 0 |
---|
| 22 | for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ): |
---|
| 23 | identifier = joiner.get_paired_identifier( fastq_read ) |
---|
| 24 | fastq_paired = input2.get( identifier ) |
---|
| 25 | if fastq_paired is None: |
---|
| 26 | skip_count += 1 |
---|
| 27 | else: |
---|
| 28 | out.write( joiner.join( fastq_read, fastq_paired ) ) |
---|
| 29 | out.close() |
---|
| 30 | |
---|
| 31 | if i is None: |
---|
| 32 | print "Your file contains no valid FASTQ reads." |
---|
| 33 | else: |
---|
| 34 | print input2.has_data() |
---|
| 35 | print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) |
---|
| 36 | |
---|
| 37 | if __name__ == "__main__": |
---|
| 38 | main() |
---|