[3] | 1 | """ |
---|
| 2 | Tests for `bx.align.maf`. |
---|
| 3 | """ |
---|
| 4 | |
---|
| 5 | import unittest |
---|
| 6 | import sys |
---|
| 7 | import bx.align as align |
---|
| 8 | import bx.align.maf as maf |
---|
| 9 | |
---|
| 10 | from StringIO import StringIO |
---|
| 11 | |
---|
| 12 | # A simple MAF from the rat paper days |
---|
| 13 | test_maf = """##maf version=1 scoring=humor.v4 |
---|
| 14 | # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf |
---|
| 15 | # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf |
---|
| 16 | |
---|
| 17 | a score=0.128 |
---|
| 18 | s human_hoxa 100 8 + 100257 ACA-TTACT |
---|
| 19 | s horse_hoxa 120 9 - 98892 ACAATTGCT |
---|
| 20 | s fugu_hoxa 88 7 + 90788 ACA--TGCT |
---|
| 21 | |
---|
| 22 | |
---|
| 23 | a score=0.071 |
---|
| 24 | s human_unc 9077 8 + 10998 ACAGTATT |
---|
| 25 | # Comment |
---|
| 26 | s horse_unc 4555 6 - 5099 ACA--ATT |
---|
| 27 | s fugu_unc 4000 4 + 4038 AC----TT |
---|
| 28 | """ |
---|
| 29 | |
---|
| 30 | # A more complicated MAF with synteny annotation and such |
---|
| 31 | test_maf_2 = """##maf version=1 scoring=autoMZ.v1 |
---|
| 32 | a score=3656.000000 |
---|
| 33 | s hg17.chr1 2005 34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- |
---|
| 34 | s rheMac2.chr11 9625228 31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ |
---|
| 35 | i rheMac2.chr11 C 0 I 1678 |
---|
| 36 | s panTro1.chr1 2014 34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- |
---|
| 37 | i panTro1.chr1 C 0 C 0 |
---|
| 38 | s bosTau2.chr5 64972365 47 + 76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG |
---|
| 39 | i bosTau2.chr5 C 0 I 1462 |
---|
| 40 | s canFam2.chr27 45129665 31 + 48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ |
---|
| 41 | i canFam2.chr27 C 0 I 1664 |
---|
| 42 | e danRer3.chr18 2360867 428 + 50308305 I |
---|
| 43 | e oryCun1.scaffold_139397 643 1271 - 4771 I |
---|
| 44 | e loxAfr1.scaffold_5603 58454 1915 + 68791 I |
---|
| 45 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
| 46 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
| 47 | e rn3.chr4 29161032 1524 - 187371129 I |
---|
| 48 | e mm7.chr6 28091695 3290 - 149646834 I |
---|
| 49 | |
---|
| 50 | """ |
---|
| 51 | |
---|
| 52 | # A MAF to test slicing upon |
---|
| 53 | test_maf_3 = """##maf version=1 scoring=none |
---|
| 54 | a score=0 |
---|
| 55 | s apple 34 64 + 110 AGGGA---GTTCGTCACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTGTGTTGCA--ACCG |
---|
| 56 | s orange 19 61 - 100 AGGGATGCGTT--TCACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTGT---GCATTACCG |
---|
| 57 | |
---|
| 58 | """ |
---|
| 59 | |
---|
| 60 | def test_reader(): |
---|
| 61 | |
---|
| 62 | reader = maf.Reader( StringIO( test_maf ) ) |
---|
| 63 | assert reader.attributes["version"] == "1" |
---|
| 64 | assert reader.attributes["scoring"] == "humor.v4" |
---|
| 65 | |
---|
| 66 | a = reader.next() |
---|
| 67 | assert a.score == 0.128 |
---|
| 68 | assert len( a.components ) == 3 |
---|
| 69 | check_component( a.components[0], "human_hoxa", 100, 8, "+", 100257, "ACA-TTACT" ) |
---|
| 70 | check_component( a.components[1], "horse_hoxa", 120, 9, "-", 98892, "ACAATTGCT" ) |
---|
| 71 | check_component( a.components[2], "fugu_hoxa", 88, 7, "+", 90788, "ACA--TGCT" ) |
---|
| 72 | |
---|
| 73 | a = reader.next() |
---|
| 74 | assert a.score == 0.071 |
---|
| 75 | assert len( a.components ) == 3 |
---|
| 76 | check_component( a.components[0], "human_unc", 9077, 8, "+", 10998, "ACAGTATT" ) |
---|
| 77 | check_component( a.components[1], "horse_unc", 4555, 6, "-", 5099, "ACA--ATT" ) |
---|
| 78 | check_component( a.components[2], "fugu_unc", 4000, 4, "+", 4038, "AC----TT" ) |
---|
| 79 | |
---|
| 80 | a = reader.next() |
---|
| 81 | assert a is None |
---|
| 82 | |
---|
| 83 | reader.close() |
---|
| 84 | |
---|
| 85 | def test_writer(): |
---|
| 86 | |
---|
| 87 | val = StringIO() |
---|
| 88 | writer = maf.Writer( val, { 'scoring':'foobar' } ) |
---|
| 89 | |
---|
| 90 | a = align.Alignment() |
---|
| 91 | a.score = 7009 |
---|
| 92 | |
---|
| 93 | a.components.append( align.Component( src="human_hoxa", start=100, size=9, strand="+", src_size=1000257, text="ACA-TTACT" ) ) |
---|
| 94 | a.components.append( align.Component( src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT" ) ) |
---|
| 95 | |
---|
| 96 | check_component( a.components[0], "human_hoxa", 100, 9, "+", 1000257, "ACA-TTACT" ) |
---|
| 97 | check_component( a.components[1], "horse_hoxa", 120, 10, "-", 98892, "ACAATTGCT" ) |
---|
| 98 | |
---|
| 99 | writer.write( a ) |
---|
| 100 | |
---|
| 101 | assert val.getvalue() == """##maf version=1 scoring=foobar |
---|
| 102 | a score=7009 |
---|
| 103 | s human_hoxa 100 9 + 1000257 ACA-TTACT |
---|
| 104 | s horse_hoxa 120 10 - 98892 ACAATTGCT |
---|
| 105 | |
---|
| 106 | """ |
---|
| 107 | |
---|
| 108 | |
---|
| 109 | |
---|
| 110 | def test_slice(): |
---|
| 111 | |
---|
| 112 | a = align.Alignment() |
---|
| 113 | a.score = "7009" |
---|
| 114 | a.components.append( align.Component( src="human_hoxa", start=100, size=9, strand="+", src_size=100257, text="ACA-TTACT" ) ) |
---|
| 115 | a.components.append( align.Component( src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT" ) ) |
---|
| 116 | |
---|
| 117 | b = a.slice_by_component( 0, 101, 105 ) |
---|
| 118 | |
---|
| 119 | check_component( b.components[0], src="human_hoxa", start=101, size=4, strand="+", src_size=100257, text="CA-TT" ) |
---|
| 120 | check_component( b.components[1], src="horse_hoxa", start=121, size=5, strand="-", src_size=98892, text ="CAATT" ) |
---|
| 121 | |
---|
| 122 | # test slicing with + strand src |
---|
| 123 | reader = maf.Reader( StringIO( test_maf_3 ) ) |
---|
| 124 | a = reader.next() |
---|
| 125 | b = a.slice_by_component( 0, 40, 62 ) |
---|
| 126 | check_component( b.components[0], src="apple", start=40, size=22, strand="+", src_size=110, text="TTCGTCACT------GTCGTAAGGGTTC" ) |
---|
| 127 | check_component( b.components[1], src="orange", start=28, size=22, strand="-", src_size=100, text="TT--TCACTGCTATCGTCGTA----TTC" ) |
---|
| 128 | |
---|
| 129 | # test slicing with - strand src |
---|
| 130 | b = a.slice_by_component( 1, 30, 68 ) |
---|
| 131 | check_component( b.components[0], src="apple", start=46, size=41, strand="+", src_size=110, text="ACT------GTCGTAAGGGTTCAGA--CTGTCTATGTATACACAAGTTG" ) |
---|
| 132 | check_component( b.components[1], src="orange", start=32, size=38, strand="-", src_size=100, text="ACTGCTATCGTCGTA----TTCAGACTTCG-CTATCT------GAGTTG" ) |
---|
| 133 | |
---|
| 134 | a = reader.next() |
---|
| 135 | assert a is None |
---|
| 136 | |
---|
| 137 | |
---|
| 138 | def test_with_synteny(): |
---|
| 139 | reader = maf.Reader( StringIO( test_maf_2 ), parse_e_rows=True ) |
---|
| 140 | |
---|
| 141 | a = reader.next() |
---|
| 142 | check_component( a.components[0], "hg17.chr1", 2005, 34, "+", 245522847, "TGTAACTTAATACCACAACCAGGCATAGGGG--AAA-------------") |
---|
| 143 | check_component( a.components[1], "rheMac2.chr11", 9625228, 31, "+", 134511895, "TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------") |
---|
| 144 | print a.components[1].synteny_left |
---|
| 145 | assert a.components[1].synteny_left == ( maf.MAF_CONTIG_STATUS, 0 ) |
---|
| 146 | assert a.components[1].synteny_right == ( maf.MAF_INSERT_STATUS, 1678 ) |
---|
| 147 | |
---|
| 148 | rat = a.get_component_by_src_start( "rn3." ) |
---|
| 149 | check_component( rat, "rn3.chr4", 29161032, 1524, "-", 187371129, None ) |
---|
| 150 | assert rat.synteny_empty == maf.MAF_INSERT_STATUS |
---|
| 151 | |
---|
| 152 | def test_write_with_synteny(): |
---|
| 153 | reader = maf.Reader( StringIO( test_maf_2 ), parse_e_rows=True ) |
---|
| 154 | a = reader.next() |
---|
| 155 | val = StringIO() |
---|
| 156 | writer = maf.Writer( val, { 'scoring':'foobar' } ) |
---|
| 157 | writer.write( a ) |
---|
| 158 | actual = val.getvalue() |
---|
| 159 | expected = """##maf version=1 scoring=foobar |
---|
| 160 | a score=3656.0 |
---|
| 161 | s hg17.chr1 2005 34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- |
---|
| 162 | s rheMac2.chr11 9625228 31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ |
---|
| 163 | i rheMac2.chr11 C 0 I 1678 |
---|
| 164 | s panTro1.chr1 2014 34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- |
---|
| 165 | i panTro1.chr1 C 0 C 0 |
---|
| 166 | s bosTau2.chr5 64972365 47 + 76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG |
---|
| 167 | i bosTau2.chr5 C 0 I 1462 |
---|
| 168 | s canFam2.chr27 45129665 31 + 48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ |
---|
| 169 | i canFam2.chr27 C 0 I 1664 |
---|
| 170 | e danRer3.chr18 2360867 428 + 50308305 I |
---|
| 171 | e oryCun1.scaffold_139397 643 1271 - 4771 I |
---|
| 172 | e loxAfr1.scaffold_5603 58454 1915 + 68791 I |
---|
| 173 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
| 174 | e echTel1.scaffold_212365 4641 1430 + 9822 I |
---|
| 175 | e rn3.chr4 29161032 1524 - 187371129 I |
---|
| 176 | e mm7.chr6 28091695 3290 - 149646834 I |
---|
| 177 | |
---|
| 178 | """ |
---|
| 179 | print actual |
---|
| 180 | print "---" |
---|
| 181 | print expected |
---|
| 182 | assert actual == expected |
---|
| 183 | |
---|
| 184 | def check_component( c, src, start, size, strand, src_size, text ): |
---|
| 185 | assert c.src == src |
---|
| 186 | assert c.start == start |
---|
| 187 | assert c.size == size |
---|
| 188 | assert c.strand == strand |
---|
| 189 | assert c.src_size == src_size |
---|
| 190 | assert c.text == text |
---|