1 | """ |
---|
2 | Tests for `bx.align.maf`. |
---|
3 | """ |
---|
4 | |
---|
5 | import unittest |
---|
6 | import sys |
---|
7 | import bx.align as align |
---|
8 | import bx.align.maf as maf |
---|
9 | |
---|
10 | from StringIO import StringIO |
---|
11 | |
---|
12 | # A simple MAF from the rat paper days |
---|
13 | test_maf = """##maf version=1 scoring=humor.v4 |
---|
14 | # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf |
---|
15 | # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf |
---|
16 | |
---|
17 | a score=0.128 |
---|
18 | s human_hoxa 100 8 + 100257 ACA-TTACT |
---|
19 | s horse_hoxa 120 9 - 98892 ACAATTGCT |
---|
20 | s fugu_hoxa 88 7 + 90788 ACA--TGCT |
---|
21 | |
---|
22 | |
---|
23 | a score=0.071 |
---|
24 | s human_unc 9077 8 + 10998 ACAGTATT |
---|
25 | # Comment |
---|
26 | s horse_unc 4555 6 - 5099 ACA--ATT |
---|
27 | s fugu_unc 4000 4 + 4038 AC----TT |
---|
28 | """ |
---|
29 | |
---|
30 | # A more complicated MAF with synteny annotation and such |
---|
31 | test_maf_2 = """##maf version=1 scoring=autoMZ.v1 |
---|
32 | a score=3656.000000 |
---|
33 | s hg17.chr1 2005 34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- |
---|
34 | s rheMac2.chr11 9625228 31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ |
---|
35 | i rheMac2.chr11 C 0 I 1678 |
---|
36 | s panTro1.chr1 2014 34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- |
---|
37 | i panTro1.chr1 C 0 C 0 |
---|
38 | s bosTau2.chr5 64972365 47 + 76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG |
---|
39 | i bosTau2.chr5 C 0 I 1462 |
---|
40 | s canFam2.chr27 45129665 31 + 48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ |
---|
41 | i canFam2.chr27 C 0 I 1664 |
---|
42 | e danRer3.chr18 2360867 428 + 50308305 I |
---|
43 | e oryCun1.scaffold_139397 643 1271 - 4771 I |
---|
44 | e loxAfr1.scaffold_5603 58454 1915 + 68791 I |
---|
45 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
46 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
47 | e rn3.chr4 29161032 1524 - 187371129 I |
---|
48 | e mm7.chr6 28091695 3290 - 149646834 I |
---|
49 | |
---|
50 | """ |
---|
51 | |
---|
52 | # A MAF to test slicing upon |
---|
53 | test_maf_3 = """##maf version=1 scoring=none |
---|
54 | a score=0 |
---|
55 | s apple 34 64 + 110 AGGGA---GTTCGTCACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTGTGTTGCA--ACCG |
---|
56 | s orange 19 61 - 100 AGGGATGCGTT--TCACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG |
---|
57 | |
---|
58 | """ |
---|
59 | |
---|
60 | def test_reader(): |
---|
61 | |
---|
62 | reader = maf.Reader( StringIO( test_maf ) ) |
---|
63 | assert reader.attributes["version"] == "1" |
---|
64 | assert reader.attributes["scoring"] == "humor.v4" |
---|
65 | |
---|
66 | a = reader.next() |
---|
67 | assert a.score == 0.128 |
---|
68 | assert len( a.components ) == 3 |
---|
69 | check_component( a.components[0], "human_hoxa", 100, 8, "+", 100257, "ACA-TTACT" ) |
---|
70 | check_component( a.components[1], "horse_hoxa", 120, 9, "-", 98892, "ACAATTGCT" ) |
---|
71 | check_component( a.components[2], "fugu_hoxa", 88, 7, "+", 90788, "ACA--TGCT" ) |
---|
72 | |
---|
73 | a = reader.next() |
---|
74 | assert a.score == 0.071 |
---|
75 | assert len( a.components ) == 3 |
---|
76 | check_component( a.components[0], "human_unc", 9077, 8, "+", 10998, "ACAGTATT" ) |
---|
77 | check_component( a.components[1], "horse_unc", 4555, 6, "-", 5099, "ACA--ATT" ) |
---|
78 | check_component( a.components[2], "fugu_unc", 4000, 4, "+", 4038, "AC----TT" ) |
---|
79 | |
---|
80 | a = reader.next() |
---|
81 | assert a is None |
---|
82 | |
---|
83 | reader.close() |
---|
84 | |
---|
85 | def test_writer(): |
---|
86 | |
---|
87 | val = StringIO() |
---|
88 | writer = maf.Writer( val, { 'scoring':'foobar' } ) |
---|
89 | |
---|
90 | a = align.Alignment() |
---|
91 | a.score = 7009 |
---|
92 | |
---|
93 | a.components.append( align.Component( src="human_hoxa", start=100, size=9, strand="+", src_size=1000257, text="ACA-TTACT" ) ) |
---|
94 | a.components.append( align.Component( src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT" ) ) |
---|
95 | |
---|
96 | check_component( a.components[0], "human_hoxa", 100, 9, "+", 1000257, "ACA-TTACT" ) |
---|
97 | check_component( a.components[1], "horse_hoxa", 120, 10, "-", 98892, "ACAATTGCT" ) |
---|
98 | |
---|
99 | writer.write( a ) |
---|
100 | |
---|
101 | assert val.getvalue() == """##maf version=1 scoring=foobar |
---|
102 | a score=7009 |
---|
103 | s human_hoxa 100 9 + 1000257 ACA-TTACT |
---|
104 | s horse_hoxa 120 10 - 98892 ACAATTGCT |
---|
105 | |
---|
106 | """ |
---|
107 | |
---|
108 | |
---|
109 | |
---|
110 | def test_slice(): |
---|
111 | |
---|
112 | a = align.Alignment() |
---|
113 | a.score = "7009" |
---|
114 | a.components.append( align.Component( src="human_hoxa", start=100, size=9, strand="+", src_size=100257, text="ACA-TTACT" ) ) |
---|
115 | a.components.append( align.Component( src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT" ) ) |
---|
116 | |
---|
117 | b = a.slice_by_component( 0, 101, 105 ) |
---|
118 | |
---|
119 | check_component( b.components[0], src="human_hoxa", start=101, size=4, strand="+", src_size=100257, text="CA-TT" ) |
---|
120 | check_component( b.components[1], src="horse_hoxa", start=121, size=5, strand="-", src_size=98892, text ="CAATT" ) |
---|
121 | |
---|
122 | # test slicing with + strand src |
---|
123 | reader = maf.Reader( StringIO( test_maf_3 ) ) |
---|
124 | a = reader.next() |
---|
125 | b = a.slice_by_component( 0, 40, 62 ) |
---|
126 | check_component( b.components[0], src="apple", start=40, size=22, strand="+", src_size=110, text="TTCGTCACT------GTCGTAAGGGTTC" ) |
---|
127 | check_component( b.components[1], src="orange", start=28, size=22, strand="-", src_size=100, text="TT--TCACTGCTATCGTCGTA----TTC" ) |
---|
128 | |
---|
129 | # test slicing with - strand src |
---|
130 | b = a.slice_by_component( 1, 30, 68 ) |
---|
131 | check_component( b.components[0], src="apple", start=46, size=41, strand="+", src_size=110, text="ACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTG" ) |
---|
132 | check_component( b.components[1], src="orange", start=32, size=38, strand="-", src_size=100, text="ACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTG" ) |
---|
133 | |
---|
134 | a = reader.next() |
---|
135 | assert a is None |
---|
136 | |
---|
137 | |
---|
138 | def test_with_synteny(): |
---|
139 | reader = maf.Reader( StringIO( test_maf_2 ), parse_e_rows=True ) |
---|
140 | |
---|
141 | a = reader.next() |
---|
142 | check_component( a.components[0], "hg17.chr1", 2005, 34, "+", 245522847, "TGTAACTTAATACCACAACCAGGCATAGGGG--AAA-------------") |
---|
143 | check_component( a.components[1], "rheMac2.chr11", 9625228, 31, "+", 134511895, "TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------") |
---|
144 | print a.components[1].synteny_left |
---|
145 | assert a.components[1].synteny_left == ( maf.MAF_CONTIG_STATUS, 0 ) |
---|
146 | assert a.components[1].synteny_right == ( maf.MAF_INSERT_STATUS, 1678 ) |
---|
147 | |
---|
148 | rat = a.get_component_by_src_start( "rn3." ) |
---|
149 | check_component( rat, "rn3.chr4", 29161032, 1524, "-", 187371129, None ) |
---|
150 | assert rat.synteny_empty == maf.MAF_INSERT_STATUS |
---|
151 | |
---|
152 | def test_write_with_synteny(): |
---|
153 | reader = maf.Reader( StringIO( test_maf_2 ), parse_e_rows=True ) |
---|
154 | a = reader.next() |
---|
155 | val = StringIO() |
---|
156 | writer = maf.Writer( val, { 'scoring':'foobar' } ) |
---|
157 | writer.write( a ) |
---|
158 | actual = val.getvalue() |
---|
159 | expected = """##maf version=1 scoring=foobar |
---|
160 | a score=3656.0 |
---|
161 | s hg17.chr1 2005 34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- |
---|
162 | s rheMac2.chr11 9625228 31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ |
---|
163 | i rheMac2.chr11 C 0 I 1678 |
---|
164 | s panTro1.chr1 2014 34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- |
---|
165 | i panTro1.chr1 C 0 C 0 |
---|
166 | s bosTau2.chr5 64972365 47 + 76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG |
---|
167 | i bosTau2.chr5 C 0 I 1462 |
---|
168 | s canFam2.chr27 45129665 31 + 48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ |
---|
169 | i canFam2.chr27 C 0 I 1664 |
---|
170 | e danRer3.chr18 2360867 428 + 50308305 I |
---|
171 | e oryCun1.scaffold_139397 643 1271 - 4771 I |
---|
172 | e loxAfr1.scaffold_5603 58454 1915 + 68791 I |
---|
173 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
174 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
175 | e rn3.chr4 29161032 1524 - 187371129 I |
---|
176 | e mm7.chr6 28091695 3290 - 149646834 I |
---|
177 | |
---|
178 | """ |
---|
179 | print actual |
---|
180 | print "---" |
---|
181 | print expected |
---|
182 | assert actual == expected |
---|
183 | |
---|
184 | def check_component( c, src, start, size, strand, src_size, text ): |
---|
185 | assert c.src == src |
---|
186 | assert c.start == start |
---|
187 | assert c.size == size |
---|
188 | assert c.strand == strand |
---|
189 | assert c.src_size == src_size |
---|
190 | assert c.text == text |
---|