| 1 | from StringIO import StringIO |
|---|
| 2 | import transfac |
|---|
| 3 | from numpy import allclose |
|---|
| 4 | |
|---|
| 5 | sample = """ |
|---|
| 6 | VV TRANSFAC MATRIX TABLE, Rel.3.2 26-06-1997 |
|---|
| 7 | XX |
|---|
| 8 | // |
|---|
| 9 | AC a |
|---|
| 10 | XX |
|---|
| 11 | ID V$MYOD_01 |
|---|
| 12 | XX |
|---|
| 13 | DT 19.10.92 (created); ewi. |
|---|
| 14 | DT 16.10.95 (updated); ewi. |
|---|
| 15 | XX |
|---|
| 16 | NA MyoD |
|---|
| 17 | XX |
|---|
| 18 | DE myoblast determination gene product |
|---|
| 19 | XX |
|---|
| 20 | BF T00526; MyoD; Species: mouse, Mus musculus. |
|---|
| 21 | XX |
|---|
| 22 | P0 A C G T |
|---|
| 23 | 01 100 200 200 0 S |
|---|
| 24 | 02 200 100 200 0 R |
|---|
| 25 | 03 300 0 100 100 A |
|---|
| 26 | 04 0 500 0 0 C |
|---|
| 27 | 05 500 0 0 0 A |
|---|
| 28 | 06 0 0 400 100 G |
|---|
| 29 | 07 0 100 400 0 G |
|---|
| 30 | 08 0 0 0 500 T |
|---|
| 31 | 09 0 0 500 0 G |
|---|
| 32 | 10 0 100 200 200 K |
|---|
| 33 | 11 0 200 0 300 Y |
|---|
| 34 | 12 100 0 300 100 G |
|---|
| 35 | XX |
|---|
| 36 | BA 5 functional elements in 3 genes |
|---|
| 37 | XX |
|---|
| 38 | // |
|---|
| 39 | AC M00002 |
|---|
| 40 | XX |
|---|
| 41 | ID V$E47_01 |
|---|
| 42 | XX |
|---|
| 43 | DT 19.10.92 (created); ewi. |
|---|
| 44 | DT 16.10.95 (updated); ewi. |
|---|
| 45 | XX |
|---|
| 46 | NA E47 |
|---|
| 47 | XX |
|---|
| 48 | DE E47 |
|---|
| 49 | XX |
|---|
| 50 | BF T00207; E47; Species: human, Homo sapiens. |
|---|
| 51 | XX |
|---|
| 52 | P0 A C G T |
|---|
| 53 | 00 400 400 300 0 N |
|---|
| 54 | 02 200 500 400 0 S |
|---|
| 55 | 03 300 200 400 200 N |
|---|
| 56 | 04 200 0 900 0 G |
|---|
| 57 | 05 0 1100 0 0 C |
|---|
| 58 | 06 1100 0 0 0 A |
|---|
| 59 | 07 0 0 1100 0 G |
|---|
| 60 | 08 100 200 800 0 G |
|---|
| 61 | 09 0 0 0 1100 T |
|---|
| 62 | 10 0 0 1100 0 G |
|---|
| 63 | 11 0 0 400 700 K |
|---|
| 64 | 12 100 400 300 300 N |
|---|
| 65 | 13 100 600 200 200 C |
|---|
| 66 | 14 100 400 400 200 N |
|---|
| 67 | 15 100 400 200 300 N |
|---|
| 68 | XX |
|---|
| 69 | BA 11 selected strong binding sites for E47, E47-MyoD, E12+MyoD |
|---|
| 70 | BA and (weak) for E12 |
|---|
| 71 | XX |
|---|
| 72 | CC Group I in [903]; 5 sites selected in vitro for binding to E12N |
|---|
| 73 | CC (=N-terminally truncated E12); matrix corrected according to |
|---|
| 74 | CC the published sequences |
|---|
| 75 | XX |
|---|
| 76 | RN [1] |
|---|
| 77 | RA Sun X.-H., Baltimore D. |
|---|
| 78 | RT An inhibitory domain of E12 transcription factor prevents |
|---|
| 79 | RT DNA binding in E12 homodimers but not in E12 heterodimers |
|---|
| 80 | RL Cell 64:459-470 (1991). |
|---|
| 81 | XX |
|---|
| 82 | """ |
|---|
| 83 | |
|---|
| 84 | def test_reader(): |
|---|
| 85 | input = StringIO( sample ) |
|---|
| 86 | motifs = list( transfac.TransfacReader( input ) ) |
|---|
| 87 | assert len( motifs ) == 2 |
|---|
| 88 | # Single value parse |
|---|
| 89 | assert motifs[1].accession == "M00002" |
|---|
| 90 | # Value list parse |
|---|
| 91 | assert motifs[1].dates == [ '19.10.92 (created); ewi.', '16.10.95 (updated); ewi.' ] |
|---|
| 92 | # Matrix parse |
|---|
| 93 | assert motifs[1].matrix.sorted_alphabet == ['A','C','G','T'] |
|---|
| 94 | assert allclose( motifs[1].matrix.values[0], [400,400,300,0] ) |
|---|