| 1 | import sys |
|---|
| 2 | import twobit |
|---|
| 3 | import random |
|---|
| 4 | |
|---|
| 5 | def quick_fasta_iter( f ): |
|---|
| 6 | current_header = None |
|---|
| 7 | current_sequence = [] |
|---|
| 8 | for line in f: |
|---|
| 9 | if line.startswith( "#" ): |
|---|
| 10 | continue |
|---|
| 11 | if line.startswith( ">" ): |
|---|
| 12 | if current_sequence: |
|---|
| 13 | ## print current_header, "".join( current_sequence ) |
|---|
| 14 | yield current_header, "".join( current_sequence ) |
|---|
| 15 | current_sequence = [] |
|---|
| 16 | current_header = line.strip()[1:] |
|---|
| 17 | else: |
|---|
| 18 | current_sequence.append( "".join( line.split() ) ) |
|---|
| 19 | if current_sequence: |
|---|
| 20 | yield current_header, "".join( current_sequence ) |
|---|
| 21 | current_sequence = [] |
|---|
| 22 | |
|---|
| 23 | def test(): |
|---|
| 24 | """ |
|---|
| 25 | Nose test generator |
|---|
| 26 | """ |
|---|
| 27 | for t in "test", "testN", "testMask": |
|---|
| 28 | test_fa = "test_data/seq_tests/%s.fa" % t |
|---|
| 29 | test_twobit = "test_data/seq_tests/%s.2bit" % t |
|---|
| 30 | yield check_random_subseq_matches, test_fa, test_twobit |
|---|
| 31 | |
|---|
| 32 | def check_random_subseq_matches( test_fa, test_twobit ): |
|---|
| 33 | # Load Fasta data |
|---|
| 34 | expected = {} |
|---|
| 35 | for h, s in quick_fasta_iter( open( test_fa ) ): |
|---|
| 36 | expected[h] = s |
|---|
| 37 | # Open 2bit |
|---|
| 38 | t = twobit.TwoBitFile( open( test_twobit ) ) |
|---|
| 39 | for k, s in expected.iteritems(): |
|---|
| 40 | assert k in t.index |
|---|
| 41 | # assert t.index[k].size == len(s) |
|---|
| 42 | length = len(s) |
|---|
| 43 | for i in range( 100 ): |
|---|
| 44 | start = random.randint( 0, length-2 ) |
|---|
| 45 | end = random.randint( start+1, length ) |
|---|
| 46 | assert t[k][start:end] == s[start:end], \ |
|---|
| 47 | "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % ( k, start, end, s[start:end], t.get( k, start, end ) ) |
|---|