root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/seq/seq.py

リビジョン 3, 4.8 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Classes to support "biological sequence" files.
3
4:Author: Bob Harris (rsharris@bx.psu.edu)
5"""
6
7# DNA reverse complement table
8
9DNA_COMP = "                                             -                  " \
10           " TVGH  CD  M KN   YSA BWXR       tvgh  cd  m kn   ysa bwxr      " \
11           "                                                                " \
12           "                                                                "
13
14class SeqFile(object):
15    """
16    A biological sequence is a sequence of bytes or characters.  Usually these
17    represent DNA (A,C,G,T), proteins, or some variation of those.
18
19    class attributes:
20
21        file:    file object containing the sequence
22        revcomp: whether gets from this sequence should be reverse-complemented
23                 False => no reverse complement
24                 True  => (same as "-5'")
25                 "maf" => (same as "-5'")
26                 "+5'" => minus strand is from plus strand's 5' end (same as "-3'")
27                 "+3'" => minus strand is from plus strand's 3' end (same as "-5'")
28                 "-5'" => minus strand is from its 5' end (as per MAF file format)
29                 "-3'" => minus strand is from its 3' end (as per genome browser,
30                          but with origin-zero)
31        name:    usually a species and/or chromosome name (e.g. "mule.chr5");  if
32                 the file contains a name, that overrides this one
33        gap:     gap character that aligners should use for gaps in this sequence
34    """
35
36    def __init__(self, file=None, revcomp=False, name="", gap=None):
37       
38       
39        self.file = file
40        if   (revcomp == True):  self.revcomp = "-5'"
41        elif (revcomp == "+3'"): self.revcomp = "-5'"
42        elif (revcomp == "+5'"): self.revcomp = "-3'"
43        elif (revcomp == "maf"): self.revcomp = "-5'"
44        else:                    self.revcomp = revcomp
45        self.name = name
46        if (gap == None): self.gap = "-"
47        else:             self.gap = gap
48
49        self.text   = None  # (subclasses fill in text and
50        self.length = 0     #  length or they most override get())
51
52    def close(self):
53        assert (self.file != None)
54        self.file.close()
55        self.file = None
56
57    def extract_name(self,line):
58        try:
59            return line.split()[0]
60        except:
61            return ""
62
63    def set_text(self,text):
64        self.text   = text
65        self.length = len(text)
66
67    def __str__ (self):
68        text = ""
69        if (self.name != None): text += self.name + " "
70        text += self.get(0,self.length)
71        return text
72
73    def get(self, start, length):
74        """
75        Fetch subsequence starting at position `start` with length `length`.
76        This method is picky about parameters, the requested interval must
77        have non-negative length and fit entirely inside the NIB sequence,
78        the returned string will contain exactly 'length' characters, or an
79        AssertionError will be generated.
80        """
81        # Check parameters
82        assert length >= 0, "Length must be non-negative (got %d)" % length
83        assert start >= 0,"Start must be greater than 0 (got %d)" % start
84        assert start + length <= self.length, \
85            "Interval beyond end of sequence (%s..%s > %s)" % ( start, start + length, self.length )
86        # Fetch sequence and reverse complement if necesary
87        if not self.revcomp:
88            return self.raw_fetch( start, length )
89        if self.revcomp == "-3'":
90            return self.reverse_complement(self.raw_fetch(start,length))
91        assert self.revcomp == "-5'", "unrecognized reverse complement scheme"
92        start = self.length - (start+length)
93        return self.reverse_complement(self.raw_fetch(start,length))
94
95    def raw_fetch(self, start, length):
96        return self.text[start:start+length]
97
98    def reverse_complement(self,text):
99        comp = [ch for ch in text.translate(DNA_COMP)]
100        comp.reverse()
101        return "".join(comp)
102
103
104class SeqReader(object):
105    """Iterate over all sequences in a file in order"""
106   
107    def __init__(self, file, revcomp=False, name="", gap=None):
108        self.file      = file
109        self.revcomp   = revcomp
110        self.name      = name
111        self.gap       = gap
112        self.seqs_read = 0
113
114    def close(self):
115        self.file.close()
116
117    def __iter__(self):
118        return SeqReaderIter(self)
119
120    def next(self):  # subclasses should override this method and return the
121        return       # .. next sequence (of type SeqFile or a subclass) read
122                     # .. from self.file
123
124
125class SeqReaderIter(object):
126    def __init__(self,reader):
127        self.reader = reader
128    def __iter__(self):
129        return self
130    def next(self):
131        v = self.reader.next()
132        if not v: raise StopIteration
133        return v
134
135
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。