1 | import pkg_resources |
---|
2 | from StringIO import StringIO |
---|
3 | from string import Template |
---|
4 | from numpy import * |
---|
5 | |
---|
6 | PAD = 2 |
---|
7 | |
---|
8 | # Colors from rgb.txt, |
---|
9 | DNA_DEFAULT_COLORS = { |
---|
10 | 'A': "0.00 1.00 0.00", # green |
---|
11 | 'C': "0.00 0.00 1.00", # blue |
---|
12 | 'G': "1.00 0.65 0.00", # orange red |
---|
13 | 'T': "1.00 0.00 0.00" # red |
---|
14 | } |
---|
15 | |
---|
16 | # Template is adapted from Jim Kent's lib/dnaMotif.pss to support aritrary |
---|
17 | # alphabets. |
---|
18 | TEMPLATE = "template.ps" |
---|
19 | |
---|
20 | def freqs_to_heights( matrix ): |
---|
21 | """ |
---|
22 | Calculate logo height using the method of: |
---|
23 | |
---|
24 | Schneider TD, Stephens RM. "Sequence logos: a new way to display consensus |
---|
25 | sequences." Nucleic Acids Res. 1990 Oct 25;18(20):6097-100. |
---|
26 | """ |
---|
27 | # Columns are sequence positions, rows are symbol counts/frequencies |
---|
28 | f = matrix.values.transpose() |
---|
29 | n, m = f.shape |
---|
30 | # Ensure normalized |
---|
31 | f = f / sum( f, axis=0 ) |
---|
32 | # Shannon entropy (the where replaces 0 with 1 so that '0 log 0 == 0') |
---|
33 | H = - sum( f * log2( where( f, f, 1 ) ), axis=0 ) |
---|
34 | # Height |
---|
35 | return transpose( f * ( log2( n ) - H ) ) |
---|
36 | |
---|
37 | def eps_logo( matrix, base_width, height, colors=DNA_DEFAULT_COLORS ): |
---|
38 | """ |
---|
39 | Return an EPS document containing a sequence logo for matrix where each |
---|
40 | bases is shown as a column of `base_width` points and the total logo |
---|
41 | height is `height` points. If `colors` is provided it is a mapping from |
---|
42 | characters to rgb color strings. |
---|
43 | """ |
---|
44 | alphabet = matrix.sorted_alphabet |
---|
45 | rval = StringIO() |
---|
46 | # Read header ans substitute in width / height |
---|
47 | header = Template( pkg_resources.resource_string( __name__, "template.ps" ) ) |
---|
48 | rval.write( header.substitute( bounding_box_width = ceil( base_width * matrix.width ) + PAD, |
---|
49 | bounding_box_height = ceil( height ) + PAD ) ) |
---|
50 | # Determine heights |
---|
51 | heights = freqs_to_heights( matrix ) |
---|
52 | height_scale = height / log2( len( alphabet ) ) |
---|
53 | # Draw each "row" of the matrix |
---|
54 | for i, row in enumerate( heights ): |
---|
55 | x = ( i * base_width ) |
---|
56 | y = 0 |
---|
57 | for j, base_height in enumerate( row ): |
---|
58 | char = alphabet[j] |
---|
59 | page_height = height_scale * base_height |
---|
60 | # print matrix.alphabet[j], base_height, height_scale, page_height |
---|
61 | if page_height > 1: |
---|
62 | # Draw letter |
---|
63 | rval.write( "%s setrgbcolor\n" % colors.get( char, '0 0 0' ) ) |
---|
64 | rval.write( "%3.2f " % x ) |
---|
65 | rval.write( "%3.2f " % y ) |
---|
66 | rval.write( "%3.2f " % ( x + base_width ) ) |
---|
67 | rval.write( "%3.2f " % ( y + page_height ) ) |
---|
68 | rval.write( "(%s) textInBox\n" % char ) |
---|
69 | y += page_height |
---|
70 | rval.write( "showpage" ) |
---|
71 | return rval.getvalue() |
---|