[3] | 1 | """ |
---|
| 2 | Classes for reading and writing motif data. |
---|
| 3 | """ |
---|
| 4 | |
---|
| 5 | from bx.motif.pwm import FrequencyMatrix |
---|
| 6 | |
---|
| 7 | class TransfacMotif( object ): |
---|
| 8 | |
---|
| 9 | def __init__( self ): |
---|
| 10 | self.accession = None |
---|
| 11 | self.id = None |
---|
| 12 | self.dates = None |
---|
| 13 | self.name = None |
---|
| 14 | self.description = None |
---|
| 15 | self.binding_factors = None |
---|
| 16 | self.basis = None |
---|
| 17 | self.comment = None |
---|
| 18 | self.matrix = None |
---|
| 19 | self.attributes = None |
---|
| 20 | self.sites = None |
---|
| 21 | |
---|
| 22 | transfac_actions = { |
---|
| 23 | "AC": ( "store_single", "accession" ), |
---|
| 24 | "ID": ( "store_single", "id" ), |
---|
| 25 | "DT": ( "store_single_list", "dates" ), |
---|
| 26 | "NA": ( "store_single", "name" ), |
---|
| 27 | "DE": ( "store_block", "description" ), |
---|
| 28 | "BF": ( "store_single_list", "binding_factors" ), |
---|
| 29 | "BA": ( "store_block", "basis" ), |
---|
| 30 | "CC": ( "store_block", "comment" ), |
---|
| 31 | "P0": ( "store_matrix", "matrix" ), |
---|
| 32 | # For CREAD format files |
---|
| 33 | "TY": ( "store_single", "type" ), |
---|
| 34 | "AT": ( "store_single_key_value", "attributes" ), |
---|
| 35 | "BS": ( "store_single_list", "sites" ) |
---|
| 36 | } |
---|
| 37 | |
---|
| 38 | class TransfacReader( object ): |
---|
| 39 | """ |
---|
| 40 | Reads motifs in TRANSFAC format. |
---|
| 41 | """ |
---|
| 42 | |
---|
| 43 | parse_actions = transfac_actions |
---|
| 44 | |
---|
| 45 | def __init__( self, input ): |
---|
| 46 | self.input = iter( input ) |
---|
| 47 | self.input_exhausted = False |
---|
| 48 | |
---|
| 49 | def as_dict( self, key="id" ): |
---|
| 50 | """ |
---|
| 51 | Return a dictionary containing all remaining motifs, using `key` |
---|
| 52 | as the dictionary key. |
---|
| 53 | """ |
---|
| 54 | rval = {} |
---|
| 55 | for motif in self: |
---|
| 56 | rval[ getattr( motif, key ) ] = motif |
---|
| 57 | return rval |
---|
| 58 | |
---|
| 59 | def __iter__( self ): |
---|
| 60 | return self |
---|
| 61 | |
---|
| 62 | def next( self ): |
---|
| 63 | rval = self.next_motif() |
---|
| 64 | while rval is None: |
---|
| 65 | rval = self.next_motif() |
---|
| 66 | return rval |
---|
| 67 | |
---|
| 68 | def next_motif( self ): |
---|
| 69 | if self.input_exhausted: |
---|
| 70 | raise StopIteration |
---|
| 71 | # Accumulate lines until either the end of record indicator "//" is |
---|
| 72 | # encounted or the input is exhausted. |
---|
| 73 | lines = [] |
---|
| 74 | while 1: |
---|
| 75 | try: |
---|
| 76 | line = self.input.next() |
---|
| 77 | except StopIteration, e: |
---|
| 78 | self.input_exhausted = True |
---|
| 79 | break |
---|
| 80 | if line.startswith( "//" ): |
---|
| 81 | break |
---|
| 82 | if not line.isspace(): |
---|
| 83 | lines.append( line ) |
---|
| 84 | if lines: |
---|
| 85 | return self.parse_record( lines ) |
---|
| 86 | |
---|
| 87 | def parse_record( self, lines ): |
---|
| 88 | """ |
---|
| 89 | Parse a TRANSFAC record out of `lines` and return a motif. |
---|
| 90 | """ |
---|
| 91 | # Break lines up |
---|
| 92 | temp_lines = [] |
---|
| 93 | for line in lines: |
---|
| 94 | fields = line.rstrip( "\r\n" ).split( None, 1 ) |
---|
| 95 | if len( fields ) == 1: |
---|
| 96 | fields.append( "" ) |
---|
| 97 | temp_lines.append( fields ) |
---|
| 98 | lines = temp_lines |
---|
| 99 | # Fill in motif from lines |
---|
| 100 | motif = TransfacMotif() |
---|
| 101 | current_line = 0 |
---|
| 102 | while 1: |
---|
| 103 | # Done parsing if no more lines to consume |
---|
| 104 | if current_line >= len( lines ): |
---|
| 105 | break |
---|
| 106 | # Remove prefix and first separator from line |
---|
| 107 | prefix, rest = lines[ current_line ] |
---|
| 108 | # No action for this prefix, just ignore the line |
---|
| 109 | if prefix not in self.parse_actions: |
---|
| 110 | current_line += 1 |
---|
| 111 | continue |
---|
| 112 | # Get action for line |
---|
| 113 | action = self.parse_actions[ prefix ] |
---|
| 114 | # Store a single line value |
---|
| 115 | if action[0] == "store_single": |
---|
| 116 | key = action[1] |
---|
| 117 | setattr( motif, key, rest ) |
---|
| 118 | current_line += 1 |
---|
| 119 | # Add a single line value to a list |
---|
| 120 | if action[0] == "store_single_list": |
---|
| 121 | key = action[1] |
---|
| 122 | if not getattr( motif, key ): |
---|
| 123 | setattr( motif, key, [] ) |
---|
| 124 | getattr( motif, key ).append( rest ) |
---|
| 125 | current_line += 1 |
---|
| 126 | # Add a single line value to a dictionary |
---|
| 127 | if action[0] == "store_single_key_value": |
---|
| 128 | key = action[1] |
---|
| 129 | k, v = rest.strip().split( '=', 1 ) |
---|
| 130 | if not getattr( motif, key ): |
---|
| 131 | setattr( motif, key, {} ) |
---|
| 132 | getattr( motif, key )[k] = v |
---|
| 133 | current_line += 1 |
---|
| 134 | # Store a block of text |
---|
| 135 | if action[0] == "store_block": |
---|
| 136 | key = action[1] |
---|
| 137 | value = [] |
---|
| 138 | while current_line < len( lines ) and lines[ current_line ][0] == prefix: |
---|
| 139 | value.append( lines[current_line][1] ) |
---|
| 140 | current_line += 1 |
---|
| 141 | setattr( motif, key, str.join( "\n", value ) ) |
---|
| 142 | # Store a matrix |
---|
| 143 | if action[0] == "store_matrix": |
---|
| 144 | # First line is alphabet |
---|
| 145 | alphabet = rest.split() |
---|
| 146 | alphabet_size = len( alphabet ) |
---|
| 147 | rows = [] |
---|
| 148 | pattern = "" |
---|
| 149 | current_line += 1 |
---|
| 150 | # Next lines are the rows of the matrix (we allow 0 rows) |
---|
| 151 | while current_line < len( lines ): |
---|
| 152 | prefix, rest = lines[ current_line ] |
---|
| 153 | # Prefix should be a two digit 0 padded row number |
---|
| 154 | if not prefix.isdigit(): |
---|
| 155 | break |
---|
| 156 | # The first `alphabet_size` fields are the row values |
---|
| 157 | values = rest.split() |
---|
| 158 | rows.append( map( float, values[:alphabet_size] ) ) |
---|
| 159 | # TRANSFAC includes an extra column with the IUPAC code |
---|
| 160 | if len( values ) > alphabet_size: |
---|
| 161 | pattern += values[alphabet_size] |
---|
| 162 | current_line += 1 |
---|
| 163 | # Only store the pattern if it is the correct length (meaning |
---|
| 164 | # that every row had an extra field) |
---|
| 165 | if len( pattern ) != len( rows ): |
---|
| 166 | pattern = None |
---|
| 167 | matrix = FrequencyMatrix.from_rows( alphabet, rows ) |
---|
| 168 | setattr( motif, action[1], matrix ) |
---|
| 169 | # Only return a motif if we saw at least ID or AC or NA |
---|
| 170 | if motif.id or motif.accession or motif.name: |
---|
| 171 | return motif |
---|
| 172 | |
---|
| 173 | class TransfacWriter( object ): |
---|
| 174 | """ |
---|
| 175 | Writes motifs in TRANSFAC format. |
---|
| 176 | """ |
---|
| 177 | |
---|
| 178 | actions = transfac_actions |
---|
| 179 | |
---|
| 180 | def __init__( self, output ): |
---|
| 181 | self.output = output |
---|
| 182 | |
---|
| 183 | def write( self, motif ): |
---|
| 184 | output = self.output |
---|
| 185 | for prefix, actions in self.actions.iteritems(): |
---|
| 186 | action = actions[0] |
---|
| 187 | if action == "store_single": |
---|
| 188 | key = actions[1] |
---|
| 189 | if getattr( motif, key ) is not None: |
---|
| 190 | print >> output, prefix, " ", getattr( motif, key ) |
---|
| 191 | print >> output, "XX" |
---|
| 192 | elif action == "store_single_list": |
---|
| 193 | key = actions[1] |
---|
| 194 | if getattr( motif, key ) is not None: |
---|
| 195 | value = getattr( motif, key ) |
---|
| 196 | for v in value: |
---|
| 197 | print >> output, prefix, " ", v |
---|
| 198 | print >> output, "XX" |
---|
| 199 | elif action == "store_single_key_value": |
---|
| 200 | key = actions[1] |
---|
| 201 | if getattr( motif, key ) is not None: |
---|
| 202 | value = getattr( motif, key ) |
---|
| 203 | for k, v in value.iteritems(): |
---|
| 204 | print >> output, prefix, " ", "%s=%s" % ( k, v ) |
---|
| 205 | print >> output, "XX" |
---|
| 206 | elif action == "store_block": |
---|
| 207 | key = actions[1] |
---|
| 208 | if getattr( motif, key ) is not None: |
---|
| 209 | value = getattr( motif, key ) |
---|
| 210 | for line in value.split( "\n" ): |
---|
| 211 | print >> output, prefix, " ", line |
---|
| 212 | print >> output, "XX" |
---|
| 213 | elif action == "store_matrix": |
---|
| 214 | key = actions[1] |
---|
| 215 | if getattr( motif, key ) is not None: |
---|
| 216 | matrix = getattr( motif, key ) |
---|
| 217 | print >> output, prefix, " ", " ".join( [ s.rjust(6) for s in matrix.alphabet ] ) |
---|
| 218 | for i in range( matrix.width ): |
---|
| 219 | print >> output, "%02d" % ( i + 1 ), " ", " ".join( [ str(matrix.values[i,matrix.char_to_index[ord(s)]]).rjust(6) for s in matrix.alphabet ] ) |
---|
| 220 | print >> output, "XX" |
---|
| 221 | print "//" |
---|