1 | """ |
---|
2 | Support for "biological sequence" files. |
---|
3 | |
---|
4 | :Author: Bob Harris (rsharris@bx.psu.edu) |
---|
5 | |
---|
6 | See seq.py for more information |
---|
7 | """ |
---|
8 | |
---|
9 | import struct |
---|
10 | import fasta, nib, qdna |
---|
11 | |
---|
12 | # DNA reverse complement table |
---|
13 | |
---|
14 | DNA_COMP = " - " \ |
---|
15 | " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " \ |
---|
16 | " " \ |
---|
17 | " " |
---|
18 | |
---|
19 | def reverse_complement( text ): |
---|
20 | return text.translate(DNA_COMP)[::-1] |
---|
21 | |
---|
22 | |
---|
23 | def seq_file (file, format=None, revcomp=False, name="", gap=None, contig=None): |
---|
24 | if (format == None): format = infer_format(file) |
---|
25 | if (contig != None) and (format not in ["fasta",None]): |
---|
26 | raise "Contigs are not supported for format %s" % format |
---|
27 | if (format == "fasta"): return fasta.FastaFile (file, revcomp=revcomp, name=name, gap=gap, contig=contig) |
---|
28 | elif (format == "nib"): return nib.NibFile (file, revcomp=revcomp, name=name, gap=gap) |
---|
29 | elif (format == "qdna"): return qdna.QdnaFile (file, revcomp=revcomp, name=name, gap=gap) |
---|
30 | else: |
---|
31 | if (format == None): format = "" |
---|
32 | else: format = " " + format |
---|
33 | raise "Unknown sequence format%s in %s" % (format,file.name) |
---|
34 | |
---|
35 | |
---|
36 | def seq_reader (file, format=None, revcomp=False, name="", gap=None): |
---|
37 | if (format == None): format = infer_format(file) |
---|
38 | if (format == "fasta"): return fasta.FastaReader (file, revcomp=revcomp, name=name, gap=gap) |
---|
39 | elif (format == "nib"): return nib.NibReader (file, revcomp=revcomp, name=name, gap=gap) |
---|
40 | elif (format == "qdna"): return qdna.QdnaReader (file, revcomp=revcomp, name=name, gap=gap) |
---|
41 | else: raise "Unknown sequence format %s" % format |
---|
42 | |
---|
43 | |
---|
44 | def seq_writer (outfile, format=None, name=""): |
---|
45 | if (format == "fasta"): return fasta.FastaWriter (outfile) |
---|
46 | elif (format == "nib"): return nib.NibWriter (outfile) |
---|
47 | elif (format == "qdna"): return qdna.QdnaWriter (outfile) |
---|
48 | else: raise "Unknown sequence format %s" % format |
---|
49 | |
---|
50 | |
---|
51 | def infer_format (file): |
---|
52 | format = None |
---|
53 | magic = struct.unpack(">L", file.read(4))[0] |
---|
54 | if (magic == nib.NIB_MAGIC_NUMBER) or (magic == nib.NIB_MAGIC_NUMBER_SWAP): |
---|
55 | format = "nib" |
---|
56 | elif (magic == qdna.qdnaMagic) or (magic == qdna.qdnaMagicSwap): |
---|
57 | format = "qdna" |
---|
58 | else: |
---|
59 | file.seek(0) |
---|
60 | if (file.read(1) == ">"): |
---|
61 | format = "fasta" |
---|
62 | file.seek(0) |
---|
63 | return format |
---|
64 | |
---|