[3] | 1 | import sys |
---|
| 2 | import twobit |
---|
| 3 | import random |
---|
| 4 | |
---|
| 5 | def quick_fasta_iter( f ): |
---|
| 6 | current_header = None |
---|
| 7 | current_sequence = [] |
---|
| 8 | for line in f: |
---|
| 9 | if line.startswith( "#" ): |
---|
| 10 | continue |
---|
| 11 | if line.startswith( ">" ): |
---|
| 12 | if current_sequence: |
---|
| 13 | ## print current_header, "".join( current_sequence ) |
---|
| 14 | yield current_header, "".join( current_sequence ) |
---|
| 15 | current_sequence = [] |
---|
| 16 | current_header = line.strip()[1:] |
---|
| 17 | else: |
---|
| 18 | current_sequence.append( "".join( line.split() ) ) |
---|
| 19 | if current_sequence: |
---|
| 20 | yield current_header, "".join( current_sequence ) |
---|
| 21 | current_sequence = [] |
---|
| 22 | |
---|
| 23 | def test(): |
---|
| 24 | """ |
---|
| 25 | Nose test generator |
---|
| 26 | """ |
---|
| 27 | for t in "test", "testN", "testMask": |
---|
| 28 | test_fa = "test_data/seq_tests/%s.fa" % t |
---|
| 29 | test_twobit = "test_data/seq_tests/%s.2bit" % t |
---|
| 30 | yield check_random_subseq_matches, test_fa, test_twobit |
---|
| 31 | |
---|
| 32 | def check_random_subseq_matches( test_fa, test_twobit ): |
---|
| 33 | # Load Fasta data |
---|
| 34 | expected = {} |
---|
| 35 | for h, s in quick_fasta_iter( open( test_fa ) ): |
---|
| 36 | expected[h] = s |
---|
| 37 | # Open 2bit |
---|
| 38 | t = twobit.TwoBitFile( open( test_twobit ) ) |
---|
| 39 | for k, s in expected.iteritems(): |
---|
| 40 | assert k in t.index |
---|
| 41 | # assert t.index[k].size == len(s) |
---|
| 42 | length = len(s) |
---|
| 43 | for i in range( 100 ): |
---|
| 44 | start = random.randint( 0, length-2 ) |
---|
| 45 | end = random.randint( start+1, length ) |
---|
| 46 | assert t[k][start:end] == s[start:end], \ |
---|
| 47 | "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % ( k, start, end, s[start:end], t.get( k, start, end ) ) |
---|