root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/motif/pwm.py

リビジョン 3, 5.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Classes for working with position specific matrices.
3"""
4
5from numpy import *
6from copy import copy
7
8import _pwm
9
10class BaseMatrix( object ):
11    """
12    Base class for position specific matrices.
13    """
14    def __init__( self, alphabet=None, sorted_alphabet=None,
15                  char_to_index=None, values=None ):
16        self.alphabet = alphabet
17        self.sorted_alphabet = sorted_alphabet
18        self.char_to_index = char_to_index
19        self.values = values
20
21    @classmethod
22    def from_rows( Class, alphabet, rows ):
23        """
24        Create a new matrix for a sequence over alphabet `alphabet` taking
25        values from `rows` which is a list whose length is the width of the
26        matrix, and whose elements are lists of values associated with each
27        character (in the order those characters appear in alphabet).
28        """
29        # Sorted alphabet
30        sorted_alphabet = sorted( alphabet )
31        # Character to index mapping (initialized to -1)
32        char_to_index = zeros( (256), int16 ) - 1
33        for i, ch  in enumerate( sorted_alphabet ):
34            char_to_index[ ord(ch) ] = i
35        # Array
36        values = zeros( ( len( rows) , len( alphabet ) ), float32 )
37        for i, row in enumerate( rows ):
38            assert len( row ) == len( alphabet )
39            for ch, val in zip( alphabet, row ):
40                values[i, char_to_index[ord(ch)]] = val
41        # Matrix
42        matrix = Class()
43        matrix.alphabet = alphabet
44        matrix.sorted_alphabet = sorted_alphabet
45        matrix.char_to_index = char_to_index
46        matrix.values = values
47        return matrix
48     
49    @classmethod
50    def create_from_other( Class, other, values=None ):
51        """
52        Create a new Matrix with attributes taken from `other` but with the
53        values taken from `values` if provided
54        """
55        m = Class()
56        m.alphabet = other.alphabet
57        m.sorted_alphabet = other.sorted_alphabet
58        m.char_to_index = other.char_to_index
59        if values is not None:
60            m.values = values
61        else:
62            m.values = other.values
63        return m
64           
65    @property
66    def width( self ):
67        """
68        Return the width (size along the sequence axis) of this matrix.
69        """
70        return self.values.shape[0]
71
72    def reverse_complement( self ):
73        """
74        Create the reverse complement of this matrix. The result probably
75        only makese sense if the alphabet is that of DNA ('A','C','G','T').
76        """
77        rval = copy( self )
78        # Conveniently enough, reversing rows and columns is exactly what we
79        # want, since this results in A swapping with T and C swapping with G.
80        rval.values = self.values[::-1,::-1].copy()
81        return rval
82
83class FrequencyMatrix( BaseMatrix ):
84    """
85    A position specific count/frequency matrix.
86    """
87       
88    DEFAULT_CORRECTION = 0.0000000001
89    """
90    Default value to use for correcting when dealing with counts of zero,
91    chosen to produce scoring matrices that are the same as produced by CREAD.
92    """
93       
94    def to_logodds_scoring_matrix( self, background=None, correction=DEFAULT_CORRECTION ):
95        """
96        Create a standard logodds scoring matrix.
97        """
98        alphabet_size = len( self.alphabet )
99        if background is None:
100            background = ones( alphabet_size, float32 ) / alphabet_size
101        # Row totals as a one column array
102        totals = sum( self.values, 1 )[:,newaxis]
103        values = log2( maximum( self.values, correction ) ) \
104               - log2( totals ) \
105               - log2( maximum( background, correction ) )
106        return ScoringMatrix.create_from_other( self, values.astype( float32 ) )
107
108    def to_stormo_scoring_matrix( self, background=None ):
109        """
110        Create a scoring matrix from this count matrix using the method from:
111
112        Hertz, G.Z. and G.D. Stormo (1999). Identifying DNA and protein patterns with statistically
113        significant alignments of multiple sequences. Bioinformatics 15(7): 563-577.
114        """
115        alphabet_size = len( self.alphabet )
116        if background is None:
117            background = ones( alphabet_size, float32 ) / alphabet_size
118        # Row totals as a one column array
119        totals = sum( self.values, 1 )[:,newaxis]
120        values = log2( self.values + background ) \
121               - log2( totals + 1 ) - log2( background )
122        return ScoringMatrix.create_from_other( self, values.astype( float32 ) )
123       
124class ScoringMatrix( BaseMatrix ):
125    """
126    A position specific matrix containing values that are suitable for
127    scoring a sequence.
128    """
129       
130    def score_string( self, string ):
131        """
132        Score each valid position in `string` using this scoring matrix.
133        Positions which were not scored are set to nan.
134        """
135        rval = zeros( len( string ), float32 )
136        rval[:] = nan
137        _pwm.score_string( self.values, self.char_to_index, string, rval )
138        return rval
139       
140    def score_string_with_gaps( self, string ):
141        """
142        Score each valid position in `string` using this scoring matrix.
143        Positions which were not scored are set to nan. Gap characters are
144        ignored (matrices score across them).
145        """
146        rval = zeros( len( string ), float32 )
147        rval[:] = nan
148        _pwm.score_string_with_gaps( self.values, self.char_to_index, string, rval )
149        return rval
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。