| 1 | import pkg_resources |
|---|
| 2 | from StringIO import StringIO |
|---|
| 3 | from string import Template |
|---|
| 4 | from numpy import * |
|---|
| 5 | |
|---|
| 6 | PAD = 2 |
|---|
| 7 | |
|---|
| 8 | # Colors from rgb.txt, |
|---|
| 9 | DNA_DEFAULT_COLORS = { |
|---|
| 10 | 'A': "0.00 1.00 0.00", # green |
|---|
| 11 | 'C': "0.00 0.00 1.00", # blue |
|---|
| 12 | 'G': "1.00 0.65 0.00", # orange red |
|---|
| 13 | 'T': "1.00 0.00 0.00" # red |
|---|
| 14 | } |
|---|
| 15 | |
|---|
| 16 | # Template is adapted from Jim Kent's lib/dnaMotif.pss to support aritrary |
|---|
| 17 | # alphabets. |
|---|
| 18 | TEMPLATE = "template.ps" |
|---|
| 19 | |
|---|
| 20 | def freqs_to_heights( matrix ): |
|---|
| 21 | """ |
|---|
| 22 | Calculate logo height using the method of: |
|---|
| 23 | |
|---|
| 24 | Schneider TD, Stephens RM. "Sequence logos: a new way to display consensus |
|---|
| 25 | sequences." Nucleic Acids Res. 1990 Oct 25;18(20):6097-100. |
|---|
| 26 | """ |
|---|
| 27 | # Columns are sequence positions, rows are symbol counts/frequencies |
|---|
| 28 | f = matrix.values.transpose() |
|---|
| 29 | n, m = f.shape |
|---|
| 30 | # Ensure normalized |
|---|
| 31 | f = f / sum( f, axis=0 ) |
|---|
| 32 | # Shannon entropy (the where replaces 0 with 1 so that '0 log 0 == 0') |
|---|
| 33 | H = - sum( f * log2( where( f, f, 1 ) ), axis=0 ) |
|---|
| 34 | # Height |
|---|
| 35 | return transpose( f * ( log2( n ) - H ) ) |
|---|
| 36 | |
|---|
| 37 | def eps_logo( matrix, base_width, height, colors=DNA_DEFAULT_COLORS ): |
|---|
| 38 | """ |
|---|
| 39 | Return an EPS document containing a sequence logo for matrix where each |
|---|
| 40 | bases is shown as a column of `base_width` points and the total logo |
|---|
| 41 | height is `height` points. If `colors` is provided it is a mapping from |
|---|
| 42 | characters to rgb color strings. |
|---|
| 43 | """ |
|---|
| 44 | alphabet = matrix.sorted_alphabet |
|---|
| 45 | rval = StringIO() |
|---|
| 46 | # Read header ans substitute in width / height |
|---|
| 47 | header = Template( pkg_resources.resource_string( __name__, "template.ps" ) ) |
|---|
| 48 | rval.write( header.substitute( bounding_box_width = ceil( base_width * matrix.width ) + PAD, |
|---|
| 49 | bounding_box_height = ceil( height ) + PAD ) ) |
|---|
| 50 | # Determine heights |
|---|
| 51 | heights = freqs_to_heights( matrix ) |
|---|
| 52 | height_scale = height / log2( len( alphabet ) ) |
|---|
| 53 | # Draw each "row" of the matrix |
|---|
| 54 | for i, row in enumerate( heights ): |
|---|
| 55 | x = ( i * base_width ) |
|---|
| 56 | y = 0 |
|---|
| 57 | for j, base_height in enumerate( row ): |
|---|
| 58 | char = alphabet[j] |
|---|
| 59 | page_height = height_scale * base_height |
|---|
| 60 | # print matrix.alphabet[j], base_height, height_scale, page_height |
|---|
| 61 | if page_height > 1: |
|---|
| 62 | # Draw letter |
|---|
| 63 | rval.write( "%s setrgbcolor\n" % colors.get( char, '0 0 0' ) ) |
|---|
| 64 | rval.write( "%3.2f " % x ) |
|---|
| 65 | rval.write( "%3.2f " % y ) |
|---|
| 66 | rval.write( "%3.2f " % ( x + base_width ) ) |
|---|
| 67 | rval.write( "%3.2f " % ( y + page_height ) ) |
|---|
| 68 | rval.write( "(%s) textInBox\n" % char ) |
|---|
| 69 | y += page_height |
|---|
| 70 | rval.write( "showpage" ) |
|---|
| 71 | return rval.getvalue() |
|---|