root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/qv_to_bqv.py

リビジョン 3, 2.1 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Convert a qual (qv) file to several BinnedArray files for fast seek.
5This script takes approximately 4 seconds per 1 million base pairs.
6
7The input format is fasta style quality -- fasta headers followed by
8whitespace separated integers.
9
10usage: %prog qual_file output_file
11"""
12
13import string
14import psyco_full
15import sys
16from binned_array import *
17from bx.cookbook import *
18import fileinput
19
20def main():
21    args = sys.argv[1:]
22    try:
23        qual_file = args[ 0 ]
24        output_file = args[ 1 ]
25    except:
26        print "usage: qual_file output_file"
27        sys.exit()
28
29    qual = fileinput.FileInput( qual_file )
30    outfile = None
31    outbin = None
32    base_count = 0
33    mega_count = 0
34
35    for line in qual:
36        line = line.rstrip("\r\n")
37        if line.startswith(">"):
38            # close old
39            if outbin and outfile:
40                print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
41                outbin.finish()
42                outfile.close()
43            # start new file
44            region = line.lstrip(">")
45            outfname = output_file + "." + region + ".bqv"
46            print "Writing region " + region + " to file " + outfname
47            outfile = open( outfname , "wb")
48            outbin = BinnedArrayWriter(outfile, typecode='b', default=0)
49            base_count = 0
50            mega_count = 0
51        else:
52            if outfile and outbin:
53                nums = line.split()
54                for val in nums:
55                    outval = int(val)
56                    assert outval <= 255 and outval >= 0
57                    outbin.write(outval)
58                    base_count += 1
59                if (mega_count * 1000000) <= base_count:
60                    sys.stdout.write(str(mega_count)+" ")
61                    sys.stdout.flush()
62                    mega_count = base_count // 1000000 + 1
63    if outbin and outfile:
64        print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
65        outbin.finish()
66        outfile.close()
67
68if __name__ == "__main__":
69    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。