| 1 | """ |
|---|
| 2 | Support for "biological sequence" files. |
|---|
| 3 | |
|---|
| 4 | :Author: Bob Harris (rsharris@bx.psu.edu) |
|---|
| 5 | |
|---|
| 6 | See seq.py for more information |
|---|
| 7 | """ |
|---|
| 8 | |
|---|
| 9 | import struct |
|---|
| 10 | import fasta, nib, qdna |
|---|
| 11 | |
|---|
| 12 | # DNA reverse complement table |
|---|
| 13 | |
|---|
| 14 | DNA_COMP = " - " \ |
|---|
| 15 | " TVGH CD M KN YSA BWXR tvgh cd m kn ysa bwxr " \ |
|---|
| 16 | " " \ |
|---|
| 17 | " " |
|---|
| 18 | |
|---|
| 19 | def reverse_complement( text ): |
|---|
| 20 | return text.translate(DNA_COMP)[::-1] |
|---|
| 21 | |
|---|
| 22 | |
|---|
| 23 | def seq_file (file, format=None, revcomp=False, name="", gap=None, contig=None): |
|---|
| 24 | if (format == None): format = infer_format(file) |
|---|
| 25 | if (contig != None) and (format not in ["fasta",None]): |
|---|
| 26 | raise "Contigs are not supported for format %s" % format |
|---|
| 27 | if (format == "fasta"): return fasta.FastaFile (file, revcomp=revcomp, name=name, gap=gap, contig=contig) |
|---|
| 28 | elif (format == "nib"): return nib.NibFile (file, revcomp=revcomp, name=name, gap=gap) |
|---|
| 29 | elif (format == "qdna"): return qdna.QdnaFile (file, revcomp=revcomp, name=name, gap=gap) |
|---|
| 30 | else: |
|---|
| 31 | if (format == None): format = "" |
|---|
| 32 | else: format = " " + format |
|---|
| 33 | raise "Unknown sequence format%s in %s" % (format,file.name) |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | def seq_reader (file, format=None, revcomp=False, name="", gap=None): |
|---|
| 37 | if (format == None): format = infer_format(file) |
|---|
| 38 | if (format == "fasta"): return fasta.FastaReader (file, revcomp=revcomp, name=name, gap=gap) |
|---|
| 39 | elif (format == "nib"): return nib.NibReader (file, revcomp=revcomp, name=name, gap=gap) |
|---|
| 40 | elif (format == "qdna"): return qdna.QdnaReader (file, revcomp=revcomp, name=name, gap=gap) |
|---|
| 41 | else: raise "Unknown sequence format %s" % format |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | def seq_writer (outfile, format=None, name=""): |
|---|
| 45 | if (format == "fasta"): return fasta.FastaWriter (outfile) |
|---|
| 46 | elif (format == "nib"): return nib.NibWriter (outfile) |
|---|
| 47 | elif (format == "qdna"): return qdna.QdnaWriter (outfile) |
|---|
| 48 | else: raise "Unknown sequence format %s" % format |
|---|
| 49 | |
|---|
| 50 | |
|---|
| 51 | def infer_format (file): |
|---|
| 52 | format = None |
|---|
| 53 | magic = struct.unpack(">L", file.read(4))[0] |
|---|
| 54 | if (magic == nib.NIB_MAGIC_NUMBER) or (magic == nib.NIB_MAGIC_NUMBER_SWAP): |
|---|
| 55 | format = "nib" |
|---|
| 56 | elif (magic == qdna.qdnaMagic) or (magic == qdna.qdnaMagicSwap): |
|---|
| 57 | format = "qdna" |
|---|
| 58 | else: |
|---|
| 59 | file.seek(0) |
|---|
| 60 | if (file.read(1) == ">"): |
|---|
| 61 | format = "fasta" |
|---|
| 62 | file.seek(0) |
|---|
| 63 | return format |
|---|
| 64 | |
|---|