1 | """ |
---|
2 | Classes to support nib files. |
---|
3 | |
---|
4 | :Author: James Taylor (james@bx.psu.edu), Bob Harris (rsharris@bx.psu.edu) |
---|
5 | |
---|
6 | A nib sequence is a sequence of DNA, using the 10 character alphabet A,C,G,T,N |
---|
7 | (upper and lower case). The file is packed as 4 bits per character. |
---|
8 | |
---|
9 | nib file format |
---|
10 | --------------- |
---|
11 | |
---|
12 | Fields can be in big- or little-endian format; they must match the endianess |
---|
13 | of the magic number. |
---|
14 | |
---|
15 | ============ =========== ====================================================== |
---|
16 | offset 0x00: 6B E9 3D 3A big endian magic number (3A 3D E9 6B => little endian) |
---|
17 | offset 0x04: xx xx xx xx length of data sequence (counted in characters) |
---|
18 | offset 0x08: ... data sequence; most significant nybble in each |
---|
19 | byte is first in sequence |
---|
20 | ============ =========== ====================================================== |
---|
21 | """ |
---|
22 | |
---|
23 | from __future__ import division |
---|
24 | |
---|
25 | from bx.seq.seq import SeqFile,SeqReader |
---|
26 | import sys, struct, string, math |
---|
27 | |
---|
28 | import _nib |
---|
29 | |
---|
30 | NIB_MAGIC_NUMBER = 0x6BE93D3A |
---|
31 | NIB_MAGIC_NUMBER_SWAP = 0x3A3DE96B |
---|
32 | NIB_MAGIC_SIZE = 4 |
---|
33 | NIB_LENGTH_SIZE = 4 |
---|
34 | |
---|
35 | class NibFile(SeqFile): |
---|
36 | |
---|
37 | def __init__(self, file, revcomp=False, name="", gap=None): |
---|
38 | SeqFile.__init__(self,file,revcomp,name,gap) |
---|
39 | |
---|
40 | self.byte_order = ">" |
---|
41 | magic = struct.unpack(">L", file.read(NIB_MAGIC_SIZE))[0] |
---|
42 | if (magic != NIB_MAGIC_NUMBER): |
---|
43 | if magic == NIB_MAGIC_NUMBER_SWAP: self.byte_order = "<" |
---|
44 | else: raise "Not a NIB file" |
---|
45 | self.magic = magic |
---|
46 | self.length = struct.unpack("%sL" % self.byte_order, file.read(NIB_LENGTH_SIZE))[0] |
---|
47 | |
---|
48 | def raw_fetch(self, start, length): |
---|
49 | # Check parameters |
---|
50 | assert start >= 0, "Start must be greater than 0" |
---|
51 | assert length >= 0, "Length must be greater than 0" |
---|
52 | assert start + length <= self.length, "Interval beyond end of sequence" |
---|
53 | # Read block of bytes containing sequence |
---|
54 | block_start = int(math.floor(start / 2)) |
---|
55 | block_end = int(math.floor((start + length - 1) / 2)) |
---|
56 | block_len = block_end + 1 - block_start |
---|
57 | self.file.seek(NIB_MAGIC_SIZE + NIB_LENGTH_SIZE + block_start) |
---|
58 | raw = self.file.read(block_len) |
---|
59 | # Unpack compressed block into a character string and return |
---|
60 | return _nib.translate_raw_data( raw, start, length ) |
---|
61 | |
---|
62 | class NibReader(SeqReader): |
---|
63 | |
---|
64 | def __init__(self, file, revcomp=False, name="", gap=None): |
---|
65 | SeqReader.__init__(self,file,revcomp,name,gap) |
---|
66 | |
---|
67 | def next(self): |
---|
68 | if (self.seqs_read != 0): return # nib files have just one sequence |
---|
69 | seq = NibFile(self.file,self.revcomp,self.name,self.gap) |
---|
70 | self.seqs_read += 1 |
---|
71 | return seq |
---|
72 | |
---|
73 | |
---|
74 | class NibWriter(object): |
---|
75 | |
---|
76 | def __init__(self,file): |
---|
77 | self.file = file |
---|
78 | |
---|
79 | def write(self,seq): |
---|
80 | assert (False), "NibWriter.write() is not implemented yet" |
---|
81 | |
---|
82 | def close(self): |
---|
83 | self.file.close() |
---|
84 | |
---|