1 | import pwm |
---|
2 | from numpy import allclose, isnan |
---|
3 | |
---|
4 | def test_create(): |
---|
5 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
---|
6 | # Alphabet sort |
---|
7 | assert m.sorted_alphabet == [ 'A', 'C', 'G', 'T' ] |
---|
8 | # Character to index mapping |
---|
9 | assert m.char_to_index[ ord('A') ] == 0 |
---|
10 | assert m.char_to_index[ ord('C') ] == 1 |
---|
11 | assert m.char_to_index[ ord('G') ] == 2 |
---|
12 | assert m.char_to_index[ ord('T') ] == 3 |
---|
13 | assert m.char_to_index[ ord('Q') ] == -1 |
---|
14 | # Values |
---|
15 | assert allclose( m.values[0], [ 2620, 2052, 3013, 2314 ] ) |
---|
16 | assert allclose( m.values[19], [ 3144, 3231, 3056, 567 ] ) |
---|
17 | |
---|
18 | def test_scoring(): |
---|
19 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
---|
20 | # Stormo method |
---|
21 | sm = m.to_stormo_scoring_matrix() |
---|
22 | # Forward matches |
---|
23 | assert allclose( sm.score_string( "AATCACCACCTCCTGGCAGG" )[0], -156.8261261 ) |
---|
24 | assert allclose( sm.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], -128.8106842 ) |
---|
25 | assert allclose( sm.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], 4.65049839 ) |
---|
26 | assert allclose( sm.score_string( "GCAGACACCAGGTGGTTCAG" )[0], 1.60168743 ) |
---|
27 | # Reverse matches |
---|
28 | rc = sm.reverse_complement() |
---|
29 | assert allclose( rc.score_string( "AATCACCACCTCCTGGCAGG" )[0], 0.014178276062 ) |
---|
30 | assert allclose( rc.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], 0.723828315735 ) |
---|
31 | assert allclose( rc.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], -126.99407196 ) |
---|
32 | assert allclose( rc.score_string( "GCAGACACCAGGTGGTTCAG" )[0], -86.9560623169 ) |
---|
33 | # Nothing valid |
---|
34 | assert isnan( sm.score_string_with_gaps( "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ) ).all() |
---|
35 | # Too short |
---|
36 | assert isnan( sm.score_string( "TTTT" ) ).all() |
---|
37 | |
---|
38 | def test_scoring_with_gaps(): |
---|
39 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
---|
40 | # Stormo method |
---|
41 | sm = m.to_stormo_scoring_matrix() |
---|
42 | # Forward matches |
---|
43 | assert allclose( sm.score_string_with_gaps( "GTTGCCAGT----TGGGGGAAGCATTT---AA" )[0], 4.65049839 ) |
---|
44 | assert allclose( sm.score_string_with_gaps( "GCAGA--CACCAGGTGG--TTCAG---" )[0], 1.60168743 ) |
---|
45 | assert allclose( sm.score_string_with_gaps( "----GTTGCCAGTTGGGGGAAGCA" )[4], 4.65049839 ) |
---|
46 | assert allclose( sm.score_string_with_gaps( "TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-" )[5], 4.65049839 ) |
---|
47 | assert isnan( sm.score_string_with_gaps( "TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-" )[4] ) |
---|
48 | # Nothing valid |
---|
49 | assert isnan( sm.score_string_with_gaps( "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ) ).all() |
---|
50 | assert isnan( sm.score_string_with_gaps( "------------------------------------" ) ).all() |
---|
51 | # Too short |
---|
52 | assert isnan( sm.score_string_with_gaps( "TTTT" ) ).all() |
---|
53 | assert isnan( sm.score_string_with_gaps( "TTTT----" ) ).all() |
---|
54 | |
---|
55 | |
---|
56 | def get_ctcf_rows(): |
---|
57 | """ |
---|
58 | The CTCF primary site motif |
---|
59 | """ |
---|
60 | return [ |
---|
61 | [ 2620 , 2052 , 3013 , 2314 ], |
---|
62 | [ 0 , 3580 , 1746 , 4672 ], |
---|
63 | [ 2008 , 1790 , 4497 , 1703 ], |
---|
64 | [ 3362 , 0 , 6637 , 0 ], |
---|
65 | [ 0 , 10000 , 0 , 0 ], |
---|
66 | [ 0 , 10000 , 0 , 0 ], |
---|
67 | [ 7467 , 0 , 1310 , 1222 ], |
---|
68 | [ 786 , 4890 , 4323 , 0 ], |
---|
69 | [ 1179 , 6288 , 829 , 1703 ], |
---|
70 | [ 10000 , 0 , 0 , 0 ], |
---|
71 | [ 0 , 0 , 10000 , 0 ], |
---|
72 | [ 4847 , 0 , 5152 , 0 ], |
---|
73 | [ 0 , 0 , 6200 , 3799 ], |
---|
74 | [ 0 , 0 , 10000 , 0 ], |
---|
75 | [ 0 , 0 , 10000 , 0 ], |
---|
76 | [ 1572 , 7467 , 0 , 960 ], |
---|
77 | [ 3842 , 0 , 5545 , 611 ], |
---|
78 | [ 0 , 5895 , 4104 , 0 ], |
---|
79 | [ 1615 , 4192 , 1397 , 2794 ], |
---|
80 | [ 3144 , 3231 , 3056 , 567 ] |
---|
81 | ] |
---|