1 | from StringIO import StringIO |
---|
2 | import transfac |
---|
3 | from numpy import allclose |
---|
4 | |
---|
5 | sample = """ |
---|
6 | VV TRANSFAC MATRIX TABLE, Rel.3.2 26-06-1997 |
---|
7 | XX |
---|
8 | // |
---|
9 | AC a |
---|
10 | XX |
---|
11 | ID V$MYOD_01 |
---|
12 | XX |
---|
13 | DT 19.10.92 (created); ewi. |
---|
14 | DT 16.10.95 (updated); ewi. |
---|
15 | XX |
---|
16 | NA MyoD |
---|
17 | XX |
---|
18 | DE myoblast determination gene product |
---|
19 | XX |
---|
20 | BF T00526; MyoD; Species: mouse, Mus musculus. |
---|
21 | XX |
---|
22 | P0 A C G T |
---|
23 | 01 100 200 200 0 S |
---|
24 | 02 200 100 200 0 R |
---|
25 | 03 300 0 100 100 A |
---|
26 | 04 0 500 0 0 C |
---|
27 | 05 500 0 0 0 A |
---|
28 | 06 0 0 400 100 G |
---|
29 | 07 0 100 400 0 G |
---|
30 | 08 0 0 0 500 T |
---|
31 | 09 0 0 500 0 G |
---|
32 | 10 0 100 200 200 K |
---|
33 | 11 0 200 0 300 Y |
---|
34 | 12 100 0 300 100 G |
---|
35 | XX |
---|
36 | BA 5 functional elements in 3 genes |
---|
37 | XX |
---|
38 | // |
---|
39 | AC M00002 |
---|
40 | XX |
---|
41 | ID V$E47_01 |
---|
42 | XX |
---|
43 | DT 19.10.92 (created); ewi. |
---|
44 | DT 16.10.95 (updated); ewi. |
---|
45 | XX |
---|
46 | NA E47 |
---|
47 | XX |
---|
48 | DE E47 |
---|
49 | XX |
---|
50 | BF T00207; E47; Species: human, Homo sapiens. |
---|
51 | XX |
---|
52 | P0 A C G T |
---|
53 | 00 400 400 300 0 N |
---|
54 | 02 200 500 400 0 S |
---|
55 | 03 300 200 400 200 N |
---|
56 | 04 200 0 900 0 G |
---|
57 | 05 0 1100 0 0 C |
---|
58 | 06 1100 0 0 0 A |
---|
59 | 07 0 0 1100 0 G |
---|
60 | 08 100 200 800 0 G |
---|
61 | 09 0 0 0 1100 T |
---|
62 | 10 0 0 1100 0 G |
---|
63 | 11 0 0 400 700 K |
---|
64 | 12 100 400 300 300 N |
---|
65 | 13 100 600 200 200 C |
---|
66 | 14 100 400 400 200 N |
---|
67 | 15 100 400 200 300 N |
---|
68 | XX |
---|
69 | BA 11 selected strong binding sites for E47, E47-MyoD, E12+MyoD |
---|
70 | BA and (weak) for E12 |
---|
71 | XX |
---|
72 | CC Group I in [903]; 5 sites selected in vitro for binding to E12N |
---|
73 | CC (=N-terminally truncated E12); matrix corrected according to |
---|
74 | CC the published sequences |
---|
75 | XX |
---|
76 | RN [1] |
---|
77 | RA Sun X.-H., Baltimore D. |
---|
78 | RT An inhibitory domain of E12 transcription factor prevents |
---|
79 | RT DNA binding in E12 homodimers but not in E12 heterodimers |
---|
80 | RL Cell 64:459-470 (1991). |
---|
81 | XX |
---|
82 | """ |
---|
83 | |
---|
84 | def test_reader(): |
---|
85 | input = StringIO( sample ) |
---|
86 | motifs = list( transfac.TransfacReader( input ) ) |
---|
87 | assert len( motifs ) == 2 |
---|
88 | # Single value parse |
---|
89 | assert motifs[1].accession == "M00002" |
---|
90 | # Value list parse |
---|
91 | assert motifs[1].dates == [ '19.10.92 (created); ewi.', '16.10.95 (updated); ewi.' ] |
---|
92 | # Matrix parse |
---|
93 | assert motifs[1].matrix.sorted_alphabet == ['A','C','G','T'] |
---|
94 | assert allclose( motifs[1].matrix.values[0], [400,400,300,0] ) |
---|