root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/seq/fasta.py

リビジョン 3, 3.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Classes to support FASTA files.
3
4:Author: Bob Harris (rsharris@bx.psu.edu)
5
6A FASTA file contains multiple sequences.  Each sequence is usually DNA.
7
8A typical FASTA file::
9
10   >mule
11   TAATACCCCGGATATATGTCCTCACATAGTTCGAGGTCGAGAAAAATGAC
12   TTCCCACCAAGTGGACTCAGCTCGAGTAAACGCCAACGATACGTCCATTA
13   GGTGTGTGCCgcaactagtcggacccgttgtgacggaaacaggtccccgc
14   caagtcacacgggcatgtcatggacTCTCGATCGTTCATCGCCTTCTTGG
15   GTACCGCAGCCGCAATTAAGCCGTGTCTTCTTCCCCCTTCAAACGGGAAT
16   CGTGTCGACTTCTTAGGAGCAGNNNNNNNNNNCTAACTCCAGAG
17   >donkey
18   TAATACCCCGGATATATGTCTTAACATAGTTCCAGGTCGAGAAGAATGAC
19   TTGCCACCAAGTGGACTCAGATTCAGTCAACGCGAACGATAAGTCCATTA
20   GGTGTGTACCgcaactagtgggacccgttgtgacggaaacaggtcaccgc
21   caagtcacacgtgcatgtcatgtacTCTCGATCGTTTATCGCCTTCTTGG
22   GTACCGCAGCCGAAATTAAGCCGTGTCTTCTTCCCACTTCAAACGGGAAT
23   CGTGTCGACTTTACAGGAACAGNNNNNNNNNNATAACGCCAGAG
24    ... more sequences
25
26Typical use:
27
28        for seq in bx.seq.fasta.FastaReader(sys.stdin):
29                print seq.name
30                print seq.get(0,seq.length)
31
32"""
33
34from bx.seq.seq import SeqFile,SeqReader
35import sys, string
36
37class FastaFile(SeqFile):
38
39    def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, contig=None):
40        SeqFile.__init__(self,file,revcomp,name,gap)
41        self.lookahead = lookahead
42        if (contig == None): contig = 1
43        assert (contig >= 1), "contig %d is not legal" % contig
44
45        # nota bene: certainly not the most efficient or elegant implementation
46
47        currContig = 1
48        while (True):
49            if (self.lookahead != None):
50                (line,self.lookahead) = (self.lookahead,None)
51            else:
52                line = self.file.readline()
53            if (line == ""): break
54            if not line:
55                break
56            if (line.startswith(">")):
57                if (self.text != None):
58                    if (currContig == contig):
59                        self.lookahead = line # (next sequence header)
60                        break
61                    currContig += 1
62                self.name = self.extract_name(line[1:])
63                self.text = []
64                continue
65            line = line.split() # (remove whitespace)
66            if (self.text == None): self.text = line # (allows headerless fasta)
67            else:                   self.text.extend(line)
68        assert (currContig == contig), \
69            "contig %d is not legal (file contains only %d)" % (contig,currContig)
70        if (self.text != None):
71            self.text   = "".join(self.text)
72            self.length = len(self.text)
73
74class FastaReader(SeqReader):
75   
76    def __init__(self, file, revcomp=False, name="", gap=None):
77        SeqReader.__init__(self,file,revcomp,name,gap)
78        self.lookahead = None
79
80    def next(self):
81        seq = FastaFile(self.file,self.revcomp,self.name,self.gap,self.lookahead)
82        if (seq.text == None): return
83        self.lookahead = seq.lookahead
84        self.seqs_read += 1
85        return seq
86
87
88class FastaWriter(object):
89
90    def __init__(self,file,columns=50):
91        self.file    = file
92        self.columns = columns
93
94    def write(self,seq):
95        print >>self.file,">%s" % seq.name
96        text = seq.text
97        if (self.columns != None) and (self.columns > 0):
98            text = "\n".join([text[ix:ix+self.columns] \
99                              for ix in range(0,len(text),self.columns)])
100        print >>self.file,text
101
102    def close(self):
103        assert (self.file != None)
104        self.file.close()
105        self.file = None
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。