1 | """ |
---|
2 | Classes for reading and writing motif data. |
---|
3 | """ |
---|
4 | |
---|
5 | from bx.motif.pwm import FrequencyMatrix |
---|
6 | |
---|
7 | class TransfacMotif( object ): |
---|
8 | |
---|
9 | def __init__( self ): |
---|
10 | self.accession = None |
---|
11 | self.id = None |
---|
12 | self.dates = None |
---|
13 | self.name = None |
---|
14 | self.description = None |
---|
15 | self.binding_factors = None |
---|
16 | self.basis = None |
---|
17 | self.comment = None |
---|
18 | self.matrix = None |
---|
19 | self.attributes = None |
---|
20 | self.sites = None |
---|
21 | |
---|
22 | transfac_actions = { |
---|
23 | "AC": ( "store_single", "accession" ), |
---|
24 | "ID": ( "store_single", "id" ), |
---|
25 | "DT": ( "store_single_list", "dates" ), |
---|
26 | "NA": ( "store_single", "name" ), |
---|
27 | "DE": ( "store_block", "description" ), |
---|
28 | "BF": ( "store_single_list", "binding_factors" ), |
---|
29 | "BA": ( "store_block", "basis" ), |
---|
30 | "CC": ( "store_block", "comment" ), |
---|
31 | "P0": ( "store_matrix", "matrix" ), |
---|
32 | # For CREAD format files |
---|
33 | "TY": ( "store_single", "type" ), |
---|
34 | "AT": ( "store_single_key_value", "attributes" ), |
---|
35 | "BS": ( "store_single_list", "sites" ) |
---|
36 | } |
---|
37 | |
---|
38 | class TransfacReader( object ): |
---|
39 | """ |
---|
40 | Reads motifs in TRANSFAC format. |
---|
41 | """ |
---|
42 | |
---|
43 | parse_actions = transfac_actions |
---|
44 | |
---|
45 | def __init__( self, input ): |
---|
46 | self.input = iter( input ) |
---|
47 | self.input_exhausted = False |
---|
48 | |
---|
49 | def as_dict( self, key="id" ): |
---|
50 | """ |
---|
51 | Return a dictionary containing all remaining motifs, using `key` |
---|
52 | as the dictionary key. |
---|
53 | """ |
---|
54 | rval = {} |
---|
55 | for motif in self: |
---|
56 | rval[ getattr( motif, key ) ] = motif |
---|
57 | return rval |
---|
58 | |
---|
59 | def __iter__( self ): |
---|
60 | return self |
---|
61 | |
---|
62 | def next( self ): |
---|
63 | rval = self.next_motif() |
---|
64 | while rval is None: |
---|
65 | rval = self.next_motif() |
---|
66 | return rval |
---|
67 | |
---|
68 | def next_motif( self ): |
---|
69 | if self.input_exhausted: |
---|
70 | raise StopIteration |
---|
71 | # Accumulate lines until either the end of record indicator "//" is |
---|
72 | # encounted or the input is exhausted. |
---|
73 | lines = [] |
---|
74 | while 1: |
---|
75 | try: |
---|
76 | line = self.input.next() |
---|
77 | except StopIteration, e: |
---|
78 | self.input_exhausted = True |
---|
79 | break |
---|
80 | if line.startswith( "//" ): |
---|
81 | break |
---|
82 | if not line.isspace(): |
---|
83 | lines.append( line ) |
---|
84 | if lines: |
---|
85 | return self.parse_record( lines ) |
---|
86 | |
---|
87 | def parse_record( self, lines ): |
---|
88 | """ |
---|
89 | Parse a TRANSFAC record out of `lines` and return a motif. |
---|
90 | """ |
---|
91 | # Break lines up |
---|
92 | temp_lines = [] |
---|
93 | for line in lines: |
---|
94 | fields = line.rstrip( "\r\n" ).split( None, 1 ) |
---|
95 | if len( fields ) == 1: |
---|
96 | fields.append( "" ) |
---|
97 | temp_lines.append( fields ) |
---|
98 | lines = temp_lines |
---|
99 | # Fill in motif from lines |
---|
100 | motif = TransfacMotif() |
---|
101 | current_line = 0 |
---|
102 | while 1: |
---|
103 | # Done parsing if no more lines to consume |
---|
104 | if current_line >= len( lines ): |
---|
105 | break |
---|
106 | # Remove prefix and first separator from line |
---|
107 | prefix, rest = lines[ current_line ] |
---|
108 | # No action for this prefix, just ignore the line |
---|
109 | if prefix not in self.parse_actions: |
---|
110 | current_line += 1 |
---|
111 | continue |
---|
112 | # Get action for line |
---|
113 | action = self.parse_actions[ prefix ] |
---|
114 | # Store a single line value |
---|
115 | if action[0] == "store_single": |
---|
116 | key = action[1] |
---|
117 | setattr( motif, key, rest ) |
---|
118 | current_line += 1 |
---|
119 | # Add a single line value to a list |
---|
120 | if action[0] == "store_single_list": |
---|
121 | key = action[1] |
---|
122 | if not getattr( motif, key ): |
---|
123 | setattr( motif, key, [] ) |
---|
124 | getattr( motif, key ).append( rest ) |
---|
125 | current_line += 1 |
---|
126 | # Add a single line value to a dictionary |
---|
127 | if action[0] == "store_single_key_value": |
---|
128 | key = action[1] |
---|
129 | k, v = rest.strip().split( '=', 1 ) |
---|
130 | if not getattr( motif, key ): |
---|
131 | setattr( motif, key, {} ) |
---|
132 | getattr( motif, key )[k] = v |
---|
133 | current_line += 1 |
---|
134 | # Store a block of text |
---|
135 | if action[0] == "store_block": |
---|
136 | key = action[1] |
---|
137 | value = [] |
---|
138 | while current_line < len( lines ) and lines[ current_line ][0] == prefix: |
---|
139 | value.append( lines[current_line][1] ) |
---|
140 | current_line += 1 |
---|
141 | setattr( motif, key, str.join( "\n", value ) ) |
---|
142 | # Store a matrix |
---|
143 | if action[0] == "store_matrix": |
---|
144 | # First line is alphabet |
---|
145 | alphabet = rest.split() |
---|
146 | alphabet_size = len( alphabet ) |
---|
147 | rows = [] |
---|
148 | pattern = "" |
---|
149 | current_line += 1 |
---|
150 | # Next lines are the rows of the matrix (we allow 0 rows) |
---|
151 | while current_line < len( lines ): |
---|
152 | prefix, rest = lines[ current_line ] |
---|
153 | # Prefix should be a two digit 0 padded row number |
---|
154 | if not prefix.isdigit(): |
---|
155 | break |
---|
156 | # The first `alphabet_size` fields are the row values |
---|
157 | values = rest.split() |
---|
158 | rows.append( map( float, values[:alphabet_size] ) ) |
---|
159 | # TRANSFAC includes an extra column with the IUPAC code |
---|
160 | if len( values ) > alphabet_size: |
---|
161 | pattern += values[alphabet_size] |
---|
162 | current_line += 1 |
---|
163 | # Only store the pattern if it is the correct length (meaning |
---|
164 | # that every row had an extra field) |
---|
165 | if len( pattern ) != len( rows ): |
---|
166 | pattern = None |
---|
167 | matrix = FrequencyMatrix.from_rows( alphabet, rows ) |
---|
168 | setattr( motif, action[1], matrix ) |
---|
169 | # Only return a motif if we saw at least ID or AC or NA |
---|
170 | if motif.id or motif.accession or motif.name: |
---|
171 | return motif |
---|
172 | |
---|
173 | class TransfacWriter( object ): |
---|
174 | """ |
---|
175 | Writes motifs in TRANSFAC format. |
---|
176 | """ |
---|
177 | |
---|
178 | actions = transfac_actions |
---|
179 | |
---|
180 | def __init__( self, output ): |
---|
181 | self.output = output |
---|
182 | |
---|
183 | def write( self, motif ): |
---|
184 | output = self.output |
---|
185 | for prefix, actions in self.actions.iteritems(): |
---|
186 | action = actions[0] |
---|
187 | if action == "store_single": |
---|
188 | key = actions[1] |
---|
189 | if getattr( motif, key ) is not None: |
---|
190 | print >> output, prefix, " ", getattr( motif, key ) |
---|
191 | print >> output, "XX" |
---|
192 | elif action == "store_single_list": |
---|
193 | key = actions[1] |
---|
194 | if getattr( motif, key ) is not None: |
---|
195 | value = getattr( motif, key ) |
---|
196 | for v in value: |
---|
197 | print >> output, prefix, " ", v |
---|
198 | print >> output, "XX" |
---|
199 | elif action == "store_single_key_value": |
---|
200 | key = actions[1] |
---|
201 | if getattr( motif, key ) is not None: |
---|
202 | value = getattr( motif, key ) |
---|
203 | for k, v in value.iteritems(): |
---|
204 | print >> output, prefix, " ", "%s=%s" % ( k, v ) |
---|
205 | print >> output, "XX" |
---|
206 | elif action == "store_block": |
---|
207 | key = actions[1] |
---|
208 | if getattr( motif, key ) is not None: |
---|
209 | value = getattr( motif, key ) |
---|
210 | for line in value.split( "\n" ): |
---|
211 | print >> output, prefix, " ", line |
---|
212 | print >> output, "XX" |
---|
213 | elif action == "store_matrix": |
---|
214 | key = actions[1] |
---|
215 | if getattr( motif, key ) is not None: |
---|
216 | matrix = getattr( motif, key ) |
---|
217 | print >> output, prefix, " ", " ".join( [ s.rjust(6) for s in matrix.alphabet ] ) |
---|
218 | for i in range( matrix.width ): |
---|
219 | print >> output, "%02d" % ( i + 1 ), " ", " ".join( [ str(matrix.values[i,matrix.char_to_index[ord(s)]]).rjust(6) for s in matrix.alphabet ] ) |
---|
220 | print >> output, "XX" |
---|
221 | print "//" |
---|