root/galaxy-central/lib/galaxy_utils/sequence/fastq.py

リビジョン 2, 26.5 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#Dan Blankenberg
2import math
3import string
4import transform
5from sequence import SequencingRead
6from fasta import fastaSequence
7
8class fastqSequencingRead( SequencingRead ):
9    format = 'sanger' #sanger is default
10    ascii_min = 33
11    ascii_max = 126
12    quality_min = 0
13    quality_max = 93
14    score_system = 'phred' #phred or solexa
15    sequence_space = 'base' #base or color
16    @classmethod
17    def get_class_by_format( cls, format ):
18        assert format in FASTQ_FORMATS, 'Unknown format type specified: %s' % format
19        return FASTQ_FORMATS[ format ]
20    @classmethod
21    def convert_score_phred_to_solexa( cls, decimal_score_list ):
22        def phred_to_solexa( score ):
23            if score <= 0: #can't take log10( 1 - 1 ); make <= 0 into -5
24                return -5
25            return int( round( 10.0 * math.log10( math.pow( 10.0, ( float( score ) / 10.0 ) ) - 1.0 ) ) )
26        return map( phred_to_solexa, decimal_score_list )
27    @classmethod
28    def convert_score_solexa_to_phred( cls, decimal_score_list ):
29        def solexa_to_phred( score ):
30            return int( round( 10.0 * math.log10( math.pow( 10.0, ( float( score ) / 10.0 ) ) + 1.0 ) ) )
31        return map( solexa_to_phred, decimal_score_list )
32    @classmethod
33    def restrict_scores_to_valid_range( cls, decimal_score_list ):
34        def restrict_score( score ):
35            return max( min( score, cls.quality_max ), cls.quality_min )
36        return map( restrict_score, decimal_score_list )
37    @classmethod
38    def convert_base_to_color_space( cls, sequence ):
39        return cls.color_space_converter.to_color_space( sequence )
40    @classmethod
41    def convert_color_to_base_space( cls, sequence ):
42        return cls.color_space_converter.to_base_space( sequence )
43    def is_ascii_encoded( self ):
44        #as per fastq definition only decimal quality strings can have spaces (and TABs for our purposes) in them (and must have a trailing space)
45        if ' ' in self.quality:
46            return False
47        if '\t' in self.quality:
48            return False
49        return True
50    def get_ascii_quality_scores( self ):
51        if self.is_ascii_encoded():
52            return list( self.quality )
53        else:
54            quality = self.quality.rstrip() #decimal scores should have a trailing space
55            if quality:
56                try:
57                    return [ chr( int( val ) + self.ascii_min - self.quality_min ) for val in quality.split() ]
58                except ValueError, e:
59                    raise ValueError( 'Error Parsing quality String. ASCII quality strings cannot contain spaces (%s): %s' % ( self.quality, e ) )
60            else:
61                return []
62    def get_decimal_quality_scores( self ):
63        if self.is_ascii_encoded():
64            return [ ord( val ) - self.ascii_min + self.quality_min for val in self.quality ]
65        else:
66            quality = self.quality.rstrip() #decimal scores should have a trailing space
67            if quality:
68                return [ int( val ) for val in quality.split() if val.strip() ]
69            else:
70                return []
71    def convert_read_to_format( self, format, force_quality_encoding = None ):
72        assert format in FASTQ_FORMATS, 'Unknown format type specified: %s' % format
73        assert force_quality_encoding in [ None, 'ascii', 'decimal' ], 'Invalid force_quality_encoding: %s' % force_quality_encoding
74        new_class = FASTQ_FORMATS[ format ]
75        new_read = new_class()
76        new_read.identifier = self.identifier
77        if self.sequence_space == new_class.sequence_space:
78            new_read.sequence = self.sequence
79        else:
80            if self.sequence_space == 'base':
81                new_read.sequence = self.convert_base_to_color_space( self.sequence )
82            else:
83                new_read.sequence = self.convert_color_to_base_space( self.sequence )
84        new_read.description = self.description
85        if self.score_system != new_read.score_system:
86            if self.score_system == 'phred':
87                score_list = self.convert_score_phred_to_solexa( self.get_decimal_quality_scores() )
88            else:
89                score_list = self.convert_score_solexa_to_phred( self.get_decimal_quality_scores() )
90        else:
91            score_list = self.get_decimal_quality_scores()
92        new_read.quality = "%s " % " ".join( map( str, new_class.restrict_scores_to_valid_range( score_list ) ) ) #need trailing space to be valid decimal fastq
93        if force_quality_encoding is None:
94            if self.is_ascii_encoded():
95                new_encoding = 'ascii'
96            else:
97                new_encoding = 'decimal'
98        else:
99            new_encoding = force_quality_encoding
100        if new_encoding == 'ascii':
101            new_read.quality = "".join( new_read.get_ascii_quality_scores() )
102        return new_read
103    def get_sequence( self ):
104        return self.sequence
105    def slice( self, left_column_offset, right_column_offset ):
106        new_read = fastqSequencingRead.get_class_by_format( self.format )()
107        new_read.identifier = self.identifier
108        new_read.sequence = self.get_sequence()[left_column_offset:right_column_offset]
109        new_read.description = self.description
110        if self.is_ascii_encoded():
111            new_read.quality = self.quality[left_column_offset:right_column_offset]
112        else:
113            quality = map( str, self.get_decimal_quality_scores()[left_column_offset:right_column_offset] )
114            if quality:
115                new_read.quality = "%s " % " ".join( quality )
116            else:
117                new_read.quality = ''
118        return new_read
119    def is_valid_format( self ):
120        if self.is_ascii_encoded():
121            for val in self.get_ascii_quality_scores():
122                val = ord( val )
123                if val < self.ascii_min or val > self.ascii_max:
124                    return False
125        else:
126            for val in self.get_decimal_quality_scores():
127                if val < self.quality_min or val > self.quality_max:
128                    return False
129        if not self.is_valid_sequence():
130            return False
131        return True
132    def is_valid_sequence( self ):
133        for base in self.get_sequence():
134            if base not in self.valid_sequence_list:
135                return False
136        return True
137    def insufficient_quality_length( self ):
138        return len( self.get_ascii_quality_scores() ) < len( self.sequence )
139    def assert_sequence_quality_lengths( self ):
140        qual_len = len( self.get_ascii_quality_scores() )
141        seq_len = len( self.sequence )
142        assert qual_len == seq_len, "Invalid FASTQ file: quality score length (%i) does not match sequence length (%i)" % ( qual_len, seq_len )
143    def reverse( self, clone = True ):
144        #need to override how decimal quality scores are reversed
145        if clone:
146            rval = self.clone()
147        else:
148            rval = self
149        rval.sequence = transform.reverse( self.sequence )
150        if rval.is_ascii_encoded():
151            rval.quality = rval.quality[::-1]
152        else:
153            rval.quality = reversed( rval.get_decimal_quality_scores() )
154            rval.quality = "%s " % " ".join( map( str, rval.quality ) )
155        return rval
156
157class fastqSangerRead( fastqSequencingRead ):
158    format = 'sanger'
159    ascii_min = 33
160    ascii_max = 126
161    quality_min = 0
162    quality_max = 93
163    score_system = 'phred'
164    sequence_space = 'base'
165
166class fastqIlluminaRead( fastqSequencingRead ):
167    format = 'illumina'
168    ascii_min = 64
169    ascii_max = 126
170    quality_min = 0
171    quality_max = 62
172    score_system = 'phred'
173    sequence_space = 'base'
174
175class fastqSolexaRead( fastqSequencingRead ):
176    format = 'solexa'
177    ascii_min = 59
178    ascii_max = 126
179    quality_min = -5
180    quality_max = 62
181    score_system = 'solexa'
182    sequence_space = 'base'
183   
184class fastqCSSangerRead( fastqSequencingRead ):
185    format = 'cssanger' #color space
186    ascii_min = 33
187    ascii_max = 126
188    quality_min = 0
189    quality_max = 93
190    score_system = 'phred'
191    sequence_space = 'color'
192    valid_sequence_list = map( str, range( 7 ) ) + [ '.' ]
193    def __len__( self ):
194        if self.has_adapter_base(): #Adapter base is not counted in length of read
195            return len( self.sequence ) - 1
196        return fastqSequencingRead.__len__( self )
197    def has_adapter_base( self ):
198        if self.sequence and self.sequence[0] in string.letters: #adapter base must be a letter
199            return True
200        return False
201    def insufficient_quality_length( self ):
202        if self.has_adapter_base():
203            return len( self.get_ascii_quality_scores() ) + 1 < len( self.sequence )
204        return fastqSequencingRead.insufficient_quality_length( self )
205    def assert_sequence_quality_lengths( self ):
206        if self.has_adapter_base():
207            qual_len = len( self.get_ascii_quality_scores() )
208            seq_len = len( self.sequence )
209            assert qual_len + 1 == seq_len, "Invalid FASTQ file: quality score length (%i) does not match sequence length (%i with adapter base)" % ( qual_len, seq_len )
210        else:
211            return fastqSequencingRead.assert_sequence_quality_lengths( self )
212    def get_sequence( self ):
213        if self.has_adapter_base():
214            return self.sequence[1:]
215        return self.sequence
216    def reverse( self, clone = True ):
217        #need to override how color space is reversed
218        if clone:
219            rval = self.clone()
220        else:
221            rval = self
222        if rval.has_adapter_base():
223            adapter = rval.sequence[0]
224            #sequence = rval.sequence[1:]
225            rval.sequence = self.color_space_converter.to_color_space( transform.reverse( self.color_space_converter.to_base_space( rval.sequence ) ), adapter_base = adapter )
226        else:
227            rval.sequence = transform.reverse( rval.sequence )
228       
229        if rval.is_ascii_encoded():
230            rval.quality = rval.quality[::-1]
231        else:
232            rval.quality = reversed( rval.get_decimal_quality_scores() )
233            rval.quality = "%s " % " ".join( map( str, rval.quality ) )
234        return rval
235    def complement( self, clone = True ):
236        #need to override how color space is complemented
237        if clone:
238            rval = self.clone()
239        else:
240            rval = self
241        if rval.has_adapter_base(): #No adapter, color space stays the same
242            adapter = rval.sequence[0]
243            sequence = rval.sequence[1:]
244            if adapter.lower() != 'u':
245                adapter = transform.DNA_complement( adapter )
246            else:
247                adapter = transform.RNA_complement( adapter )
248            rval.sequence = "%s%s" % ( adapter, sequence )
249        return rval
250    def change_adapter( self, new_adapter, clone = True ):
251        #if new_adapter is empty, remove adapter, otherwise replace with new_adapter
252        if clone:
253            rval = self.clone()
254        else:
255            rval = self
256        if rval.has_adapter_base():
257            if new_adapter:
258                if new_adapter != rval.sequence[0]:
259                    rval.sequence = rval.color_space_converter.to_color_space( rval.color_space_converter.to_base_space( rval.sequence ), adapter_base = new_adapter )
260            else:
261                rval.sequence = rval.sequence[1:]
262        elif new_adapter:
263            rval.sequence = "%s%s" % ( new_adapter, rval.sequence )
264        return rval
265
266
267FASTQ_FORMATS = {}
268for format in [ fastqIlluminaRead, fastqSolexaRead, fastqSangerRead, fastqCSSangerRead ]:
269    FASTQ_FORMATS[ format.format ] = format
270
271
272class fastqAggregator( object ):
273    VALID_FORMATS = FASTQ_FORMATS.keys()
274    def __init__( self,  ):
275        self.ascii_values_used = [] #quick lookup of all ascii chars used
276        self.seq_lens = {} #counts of seqs by read len
277        self.nuc_index_quality = [] #counts of scores by read column
278        self.nuc_index_base = [] #counts of bases by read column
279    def consume_read( self, fastq_read ):
280        #ascii values used
281        for val in fastq_read.get_ascii_quality_scores():
282            if val not in self.ascii_values_used:
283                self.ascii_values_used.append( val )
284        #lengths
285        seq_len = len( fastq_read )
286        self.seq_lens[ seq_len ] = self.seq_lens.get( seq_len, 0 ) + 1
287        #decimal qualities by column
288        for i, val in enumerate( fastq_read.get_decimal_quality_scores() ):
289            if i == len( self.nuc_index_quality ):
290                self.nuc_index_quality.append( {} )
291            self.nuc_index_quality[ i ][ val ] = self.nuc_index_quality[ i ].get( val, 0 ) + 1
292        #bases by column
293        for i, nuc in enumerate( fastq_read.get_sequence() ):
294            if i == len( self.nuc_index_base ):
295                self.nuc_index_base.append( {} )
296            nuc = nuc.upper()
297            self.nuc_index_base[ i ][ nuc ] = self.nuc_index_base[ i ].get( nuc, 0 ) + 1
298    def get_valid_formats( self, check_list = None ):
299        if not check_list:
300            check_list = self.VALID_FORMATS
301        rval = []
302        sequence = []
303        for nuc_dict in self.nuc_index_base:
304            for nuc in nuc_dict.keys():
305                if nuc not in sequence:
306                    sequence.append( nuc )
307        sequence = "".join( sequence )
308        quality = "".join( self.ascii_values_used )
309        for fastq_format in check_list:
310            fastq_read = fastqSequencingRead.get_class_by_format( fastq_format )()
311            fastq_read.quality = quality
312            fastq_read.sequence = sequence
313            if fastq_read.is_valid_format():
314                rval.append( fastq_format )
315        return rval
316    def get_ascii_range( self ):
317        return ( min( self.ascii_values_used ), max( self.ascii_values_used ) )
318    def get_decimal_range( self ):
319        decimal_values_used = []
320        for scores in self.nuc_index_quality:
321            decimal_values_used.extend( scores.keys() )
322        return ( min( decimal_values_used ), max( decimal_values_used ) )
323    def get_length_counts( self ):
324        return self.seq_lens
325    def get_max_read_length( self ):
326        return len( self.nuc_index_quality )
327    def get_read_count_for_column( self, column ):
328        if column >= len( self.nuc_index_quality ):
329            return 0
330        return sum( self.nuc_index_quality[ column ].values() )
331    def get_read_count( self ):
332        return self.get_read_count_for_column( 0 )
333    def get_base_counts_for_column( self, column ):
334        return self.nuc_index_base[ column ]
335    def get_score_list_for_column( self, column ):
336        return self.nuc_index_quality[ column ].keys()
337    def get_score_min_for_column( self, column ):
338        return min( self.nuc_index_quality[ column ].keys() )
339    def get_score_max_for_column( self, column ):
340        return max( self.nuc_index_quality[ column ].keys() )
341    def get_score_sum_for_column( self, column ):
342        return sum( score * count for score, count in self.nuc_index_quality[ column ].iteritems() )
343    def get_score_at_position_for_column( self, column, position ):
344        score_value_dict = self.nuc_index_quality[ column ]
345        scores = sorted( score_value_dict.keys() )
346        for score in scores:
347            if score_value_dict[ score ] <= position:
348                position -= score_value_dict[ score ]
349            else:
350                return score
351    def get_summary_statistics_for_column( self, i ):
352        def _get_med_pos( size ):
353            halfed = int( size / 2 )
354            if size % 2 == 1:
355                return [ halfed ]
356            return[ halfed - 1, halfed ]
357        read_count = self.get_read_count_for_column( i )
358       
359        min_score = self.get_score_min_for_column( i )
360        max_score = self.get_score_max_for_column( i )
361        sum_score = self.get_score_sum_for_column( i )
362        mean_score = float( sum_score ) / float( read_count )
363        #get positions
364        med_pos = _get_med_pos( read_count )
365        if 0 in med_pos:
366            q1_pos = [ 0 ]
367            q3_pos = [ read_count - 1 ]
368        else:
369            q1_pos = _get_med_pos( min( med_pos ) )
370            q3_pos = []
371            for pos in q1_pos:
372                q3_pos.append( max( med_pos ) + 1 + pos )
373        #get scores at position
374        med_score = float( sum( [ self.get_score_at_position_for_column( i, pos ) for pos in med_pos ] ) ) / float( len( med_pos ) )
375        q1 = float( sum( [ self.get_score_at_position_for_column( i, pos ) for pos in q1_pos ] ) ) / float( len( q1_pos ) )
376        q3 = float( sum( [ self.get_score_at_position_for_column( i, pos ) for pos in q3_pos ] ) ) / float( len( q3_pos ) )
377        #determine iqr and step
378        iqr = q3 - q1
379        step = 1.5 * iqr
380       
381        #Determine whiskers and outliers
382        outliers = []
383        score_list = sorted( self.get_score_list_for_column( i ) )
384        left_whisker = q1 - step
385        for score in score_list:
386            if left_whisker <= score:
387                left_whisker = score
388                break
389            else:
390                outliers.append( score )
391       
392        right_whisker = q3 + step
393        score_list.reverse()
394        for score in score_list:
395            if right_whisker >= score:
396                right_whisker = score
397                break
398            else:
399                outliers.append( score )
400       
401        column_stats = { 'read_count': read_count,
402                         'min_score': min_score,
403                         'max_score': max_score,
404                         'sum_score': sum_score,
405                         'mean_score': mean_score,
406                         'q1': q1,
407                         'med_score': med_score,
408                         'q3': q3,
409                         'iqr': iqr,
410                         'left_whisker': left_whisker,
411                         'right_whisker': right_whisker,
412                         'outliers': outliers }
413        return column_stats
414
415class fastqReader( object ):
416    def __init__( self, fh, format = 'sanger' ):
417        self.file = fh
418        self.format = format
419    def close( self ):
420        return self.file.close()
421    def next(self):
422        while True:
423            fastq_header = self.file.readline()
424            if not fastq_header:
425                raise StopIteration
426            fastq_header = fastq_header.rstrip( '\n\r' )
427            #remove empty lines, apparently extra new lines at end of file is common?
428            if fastq_header:
429                break
430               
431        assert fastq_header.startswith( '@' ), 'Invalid fastq header: %s' % fastq_header
432        rval = fastqSequencingRead.get_class_by_format( self.format )()       
433        rval.identifier = fastq_header
434        while True:
435            line = self.file.readline()
436            if not line:
437                raise Exception( 'Invalid FASTQ file: could not parse second instance of sequence identifier.' )
438            line = line.rstrip( '\n\r' )
439            if line.startswith( '+' ) and ( len( line ) == 1 or line[1:].startswith( fastq_header[1:] ) ):
440                rval.description = line
441                break
442            rval.append_sequence( line )
443        while rval.insufficient_quality_length():
444            line = self.file.readline()
445            if not line:
446                break
447            rval.append_quality( line )
448        rval.assert_sequence_quality_lengths()
449        return rval
450    def __iter__( self ):
451        while True:
452            yield self.next()
453
454class fastqNamedReader( object ):
455    def __init__( self, fh, format = 'sanger' ):
456        self.file = fh
457        self.format = format
458        self.reader = fastqReader( self.file, self.format )
459        #self.last_offset = self.file.tell()
460        self.offset_dict = {}
461        self.eof = False
462    def close( self ):
463        return self.file.close()
464    def get( self, sequence_id ):
465        if not isinstance( sequence_id, basestring ):
466            sequence_id = sequence_id.identifier
467        rval = None
468        if sequence_id in self.offset_dict:
469            initial_offset = self.file.tell()
470            seq_offset = self.offset_dict[ sequence_id ].pop( 0 )
471            if not self.offset_dict[ sequence_id ]:
472                del self.offset_dict[ sequence_id ]
473            self.file.seek( seq_offset )
474            rval = self.reader.next()
475            #assert rval.identifier == sequence_id, 'seq id mismatch' #should be able to remove this
476            self.file.seek( initial_offset )
477        else:
478            while True:
479                offset = self.file.tell()
480                try:
481                    fastq_read = self.reader.next()
482                except StopIteration:
483                    self.eof = True
484                    break #eof, id not found, will return None
485                if fastq_read.identifier == sequence_id:
486                    rval = fastq_read
487                    break
488                else:
489                    if fastq_read.identifier not in self.offset_dict:
490                        self.offset_dict[ fastq_read.identifier ] = []
491                    self.offset_dict[ fastq_read.identifier ].append( offset )
492        return rval
493    def has_data( self ):
494        #returns a string representation of remaining data, or empty string (False) if no data remaining
495        eof = self.eof
496        count = 0
497        rval = ''
498        if self.offset_dict:
499            count = sum( map( len, self.offset_dict.values() ) )
500        if not eof:
501            offset = self.file.tell()
502            try:
503                fastq_read = self.reader.next()
504            except StopIteration:
505                eof = True
506            self.file.seek( offset )
507        if count:
508            rval = "There were %i known sequence reads not utilized. "
509        if not eof:
510            rval = "%s%s" % ( rval, "An additional unknown number of reads exist in the input that were not utilized." )
511        return rval
512
513class fastqWriter( object ):
514    def __init__( self, fh, format = None, force_quality_encoding = None ):
515        self.file = fh
516        self.format = format
517        self.force_quality_encoding = force_quality_encoding
518    def write( self, fastq_read ):
519        if self.format:
520            fastq_read = fastq_read.convert_read_to_format( self.format, force_quality_encoding = self.force_quality_encoding )
521        self.file.write( str( fastq_read ) )
522    def close( self ):
523        return self.file.close()
524
525class fastqJoiner( object ):
526    def __init__( self, format, force_quality_encoding = None ):
527        self.format = format
528        self.force_quality_encoding = force_quality_encoding
529    def join( self, read1, read2 ):
530        if read1.identifier.endswith( '/2' ) and read2.identifier.endswith( '/1' ):
531            #swap 1 and 2
532            tmp = read1
533            read1 = read2
534            read2 = tmp
535            del tmp
536        if read1.identifier.endswith( '/1' ) and read2.identifier.endswith( '/2' ):
537            identifier = read1.identifier[:-2]
538        else:
539            identifier = read1.identifier
540       
541        #use force quality encoding, if not present force to encoding of first read
542        force_quality_encoding = self.force_quality_encoding
543        if not force_quality_encoding:
544            if read1.is_ascii_encoded():
545                force_quality_encoding = 'ascii'
546            else:
547                force_quality_encoding = 'decimal'
548       
549        new_read1 = read1.convert_read_to_format( self.format, force_quality_encoding = force_quality_encoding )
550        new_read2 = read2.convert_read_to_format( self.format, force_quality_encoding = force_quality_encoding )
551        rval = FASTQ_FORMATS[ self.format ]()
552        rval.identifier = identifier
553        if len( read1.description ) > 1:
554            rval.description = "+%s" % ( identifier[1:] )
555        else:
556            rval.description = '+'
557        if rval.sequence_space == 'color':
558            #need to handle color space joining differently
559            #convert to nuc space, join, then convert back
560            rval.sequence = rval.convert_base_to_color_space( new_read1.convert_color_to_base_space( new_read1.sequence ) + new_read2.convert_color_to_base_space( new_read2.sequence ) )
561        else:
562            rval.sequence = new_read1.sequence + new_read2.sequence
563        if force_quality_encoding == 'ascii':
564            rval.quality = new_read1.quality + new_read2.quality
565        else:
566            rval.quality = "%s %s" % ( new_read1.quality.strip(), new_read2.quality.strip() )
567        return rval
568    def get_paired_identifier( self, fastq_read ):
569        identifier = fastq_read.identifier
570        if identifier[-2] == '/':
571            if identifier[-1] == "1":
572                identifier = "%s2" % identifier[:-1]
573            elif identifier[-1] == "2":
574                identifier = "%s1" % identifier[:-1]
575        return identifier
576
577class fastqSplitter( object ):   
578    def split( self, fastq_read ):
579        length = len( fastq_read )
580        #Only reads of even lengths can be split
581        if length % 2 != 0:
582            return None, None
583        half = int( length / 2 )
584        read1 = fastq_read.slice( 0, half )
585        read1.identifier += "/1"
586        if len( read1.description ) > 1:
587            read1.description += "/1"
588        read2 = fastq_read.slice( half, None )
589        read2.identifier += "/2"
590        if len( read2.description ) > 1:
591            read2.description += "/2"
592        return read1, read2
593
594class fastqCombiner( object ):
595    def __init__( self, format ):
596        self.format = format
597    def combine(self, fasta_seq, quality_seq ):
598        fastq_read = fastqSequencingRead.get_class_by_format( self.format )()
599        fastq_read.identifier = "@%s" % fasta_seq.identifier[1:]
600        fastq_read.description = '+'
601        fastq_read.sequence = fasta_seq.sequence
602        fastq_read.quality = quality_seq.sequence
603        return fastq_read
604
605class fastqFakeFastaScoreReader( object ):
606    def __init__( self, format = 'sanger', quality_encoding = None ):
607        self.fastq_read = fastqSequencingRead.get_class_by_format( format )()
608        if quality_encoding != 'decimal':
609            quality_encoding = 'ascii'
610        self.quality_encoding = quality_encoding
611    def close( self ):
612        return #nothing to close
613    def get( self, sequence ):
614        assert isinstance( sequence, fastaSequence ), 'fastqFakeFastaScoreReader requires a fastaSequence object as the parameter'
615        #add sequence to fastq_read, then get_sequence(), color space adapters do not have quality score values
616        self.fastq_read.sequence = sequence.sequence
617        new_sequence = fastaSequence()
618        new_sequence.identifier = sequence.identifier
619        if self.quality_encoding == 'ascii':
620            new_sequence.sequence = chr( self.fastq_read.ascii_max ) * len( self.fastq_read.get_sequence() )
621        else:
622            new_sequence.sequence = ( "%i " % self.fastq_read.quality_max ) * len( self.fastq_read.get_sequence() )
623        return new_sequence
624    def has_data( self ):
625        return '' #No actual data exist, none can be remaining
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。