1 | import sys |
---|
2 | import twobit |
---|
3 | import random |
---|
4 | |
---|
5 | def quick_fasta_iter( f ): |
---|
6 | current_header = None |
---|
7 | current_sequence = [] |
---|
8 | for line in f: |
---|
9 | if line.startswith( "#" ): |
---|
10 | continue |
---|
11 | if line.startswith( ">" ): |
---|
12 | if current_sequence: |
---|
13 | ## print current_header, "".join( current_sequence ) |
---|
14 | yield current_header, "".join( current_sequence ) |
---|
15 | current_sequence = [] |
---|
16 | current_header = line.strip()[1:] |
---|
17 | else: |
---|
18 | current_sequence.append( "".join( line.split() ) ) |
---|
19 | if current_sequence: |
---|
20 | yield current_header, "".join( current_sequence ) |
---|
21 | current_sequence = [] |
---|
22 | |
---|
23 | def test(): |
---|
24 | """ |
---|
25 | Nose test generator |
---|
26 | """ |
---|
27 | for t in "test", "testN", "testMask": |
---|
28 | test_fa = "test_data/seq_tests/%s.fa" % t |
---|
29 | test_twobit = "test_data/seq_tests/%s.2bit" % t |
---|
30 | yield check_random_subseq_matches, test_fa, test_twobit |
---|
31 | |
---|
32 | def check_random_subseq_matches( test_fa, test_twobit ): |
---|
33 | # Load Fasta data |
---|
34 | expected = {} |
---|
35 | for h, s in quick_fasta_iter( open( test_fa ) ): |
---|
36 | expected[h] = s |
---|
37 | # Open 2bit |
---|
38 | t = twobit.TwoBitFile( open( test_twobit ) ) |
---|
39 | for k, s in expected.iteritems(): |
---|
40 | assert k in t.index |
---|
41 | # assert t.index[k].size == len(s) |
---|
42 | length = len(s) |
---|
43 | for i in range( 100 ): |
---|
44 | start = random.randint( 0, length-2 ) |
---|
45 | end = random.randint( start+1, length ) |
---|
46 | assert t[k][start:end] == s[start:end], \ |
---|
47 | "seq: %s, start: %d, end: %d\nExpected:\n%s\nActual:\n%s\n" % ( k, start, end, s[start:end], t.get( k, start, end ) ) |
---|