| 1 | import pwm |
|---|
| 2 | from numpy import allclose, isnan |
|---|
| 3 | |
|---|
| 4 | def test_create(): |
|---|
| 5 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
|---|
| 6 | # Alphabet sort |
|---|
| 7 | assert m.sorted_alphabet == [ 'A', 'C', 'G', 'T' ] |
|---|
| 8 | # Character to index mapping |
|---|
| 9 | assert m.char_to_index[ ord('A') ] == 0 |
|---|
| 10 | assert m.char_to_index[ ord('C') ] == 1 |
|---|
| 11 | assert m.char_to_index[ ord('G') ] == 2 |
|---|
| 12 | assert m.char_to_index[ ord('T') ] == 3 |
|---|
| 13 | assert m.char_to_index[ ord('Q') ] == -1 |
|---|
| 14 | # Values |
|---|
| 15 | assert allclose( m.values[0], [ 2620, 2052, 3013, 2314 ] ) |
|---|
| 16 | assert allclose( m.values[19], [ 3144, 3231, 3056, 567 ] ) |
|---|
| 17 | |
|---|
| 18 | def test_scoring(): |
|---|
| 19 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
|---|
| 20 | # Stormo method |
|---|
| 21 | sm = m.to_stormo_scoring_matrix() |
|---|
| 22 | # Forward matches |
|---|
| 23 | assert allclose( sm.score_string( "AATCACCACCTCCTGGCAGG" )[0], -156.8261261 ) |
|---|
| 24 | assert allclose( sm.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], -128.8106842 ) |
|---|
| 25 | assert allclose( sm.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], 4.65049839 ) |
|---|
| 26 | assert allclose( sm.score_string( "GCAGACACCAGGTGGTTCAG" )[0], 1.60168743 ) |
|---|
| 27 | # Reverse matches |
|---|
| 28 | rc = sm.reverse_complement() |
|---|
| 29 | assert allclose( rc.score_string( "AATCACCACCTCCTGGCAGG" )[0], 0.014178276062 ) |
|---|
| 30 | assert allclose( rc.score_string( "TGCCTGCCTCTGTAGGCTCC" )[0], 0.723828315735 ) |
|---|
| 31 | assert allclose( rc.score_string( "GTTGCCAGTTGGGGGAAGCA" )[0], -126.99407196 ) |
|---|
| 32 | assert allclose( rc.score_string( "GCAGACACCAGGTGGTTCAG" )[0], -86.9560623169 ) |
|---|
| 33 | # Nothing valid |
|---|
| 34 | assert isnan( sm.score_string_with_gaps( "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ) ).all() |
|---|
| 35 | # Too short |
|---|
| 36 | assert isnan( sm.score_string( "TTTT" ) ).all() |
|---|
| 37 | |
|---|
| 38 | def test_scoring_with_gaps(): |
|---|
| 39 | m = pwm.FrequencyMatrix.from_rows( ['A','C','G','T'], get_ctcf_rows() ) |
|---|
| 40 | # Stormo method |
|---|
| 41 | sm = m.to_stormo_scoring_matrix() |
|---|
| 42 | # Forward matches |
|---|
| 43 | assert allclose( sm.score_string_with_gaps( "GTTGCCAGT----TGGGGGAAGCATTT---AA" )[0], 4.65049839 ) |
|---|
| 44 | assert allclose( sm.score_string_with_gaps( "GCAGA--CACCAGGTGG--TTCAG---" )[0], 1.60168743 ) |
|---|
| 45 | assert allclose( sm.score_string_with_gaps( "----GTTGCCAGTTGGGGGAAGCA" )[4], 4.65049839 ) |
|---|
| 46 | assert allclose( sm.score_string_with_gaps( "TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-" )[5], 4.65049839 ) |
|---|
| 47 | assert isnan( sm.score_string_with_gaps( "TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-" )[4] ) |
|---|
| 48 | # Nothing valid |
|---|
| 49 | assert isnan( sm.score_string_with_gaps( "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ) ).all() |
|---|
| 50 | assert isnan( sm.score_string_with_gaps( "------------------------------------" ) ).all() |
|---|
| 51 | # Too short |
|---|
| 52 | assert isnan( sm.score_string_with_gaps( "TTTT" ) ).all() |
|---|
| 53 | assert isnan( sm.score_string_with_gaps( "TTTT----" ) ).all() |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | def get_ctcf_rows(): |
|---|
| 57 | """ |
|---|
| 58 | The CTCF primary site motif |
|---|
| 59 | """ |
|---|
| 60 | return [ |
|---|
| 61 | [ 2620 , 2052 , 3013 , 2314 ], |
|---|
| 62 | [ 0 , 3580 , 1746 , 4672 ], |
|---|
| 63 | [ 2008 , 1790 , 4497 , 1703 ], |
|---|
| 64 | [ 3362 , 0 , 6637 , 0 ], |
|---|
| 65 | [ 0 , 10000 , 0 , 0 ], |
|---|
| 66 | [ 0 , 10000 , 0 , 0 ], |
|---|
| 67 | [ 7467 , 0 , 1310 , 1222 ], |
|---|
| 68 | [ 786 , 4890 , 4323 , 0 ], |
|---|
| 69 | [ 1179 , 6288 , 829 , 1703 ], |
|---|
| 70 | [ 10000 , 0 , 0 , 0 ], |
|---|
| 71 | [ 0 , 0 , 10000 , 0 ], |
|---|
| 72 | [ 4847 , 0 , 5152 , 0 ], |
|---|
| 73 | [ 0 , 0 , 6200 , 3799 ], |
|---|
| 74 | [ 0 , 0 , 10000 , 0 ], |
|---|
| 75 | [ 0 , 0 , 10000 , 0 ], |
|---|
| 76 | [ 1572 , 7467 , 0 , 960 ], |
|---|
| 77 | [ 3842 , 0 , 5545 , 611 ], |
|---|
| 78 | [ 0 , 5895 , 4104 , 0 ], |
|---|
| 79 | [ 1615 , 4192 , 1397 , 2794 ], |
|---|
| 80 | [ 3144 , 3231 , 3056 , 567 ] |
|---|
| 81 | ] |
|---|