root/galaxy-central/tools/regVariation/qv_to_bqv.py

リビジョン 2, 3.1 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/usr/bin/env python
2
3"""
4Adapted from bx/scripts/qv_to_bqv.py
5
6Convert a qual (qv) file to several BinnedArray files for fast seek.
7This script takes approximately 4 seconds per 1 million base pairs.
8
9The input format is fasta style quality -- fasta headers followed by
10whitespace separated integers.
11
12usage: %prog qual_file output_file
13"""
14
15import pkg_resources
16pkg_resources.require( "bx-python" )
17pkg_resources.require( "numpy" )
18import string
19import psyco_full
20import sys, re, os, tempfile
21from bx.binned_array import BinnedArrayWriter
22from bx.cookbook import *
23import fileinput
24
25def load_scores_ba_dir( dir ):
26    """
27    Return a dict-like object (keyed by chromosome) that returns
28    FileBinnedArray objects created from "key.ba" files in `dir`
29    """
30    return FileBinnedArrayDir( dir )
31
32def main():
33    args = sys.argv[1:]
34    try:
35        qual_file_dir = args[0]
36        #mydir="/home/gua110/Desktop/chimp_quality_scores/chr22.qa"
37        mydir="/home/gua110/Desktop/rhesus_quality_scores/rheMac2.qual.qv"
38        qual_file_dir = mydir.replace(mydir.split("/")[-1], "")
39        output_file = args[ 1 ]
40        fo = open(output_file,"w")
41    except:
42        print "usage: qual_file output_file"
43        sys.exit()
44   
45    tmpfile = tempfile.NamedTemporaryFile()
46    cmdline = "ls " + qual_file_dir + "*.qa | cat >> " + tmpfile.name
47    os.system (cmdline)
48    for qual_file in tmpfile.readlines():
49        qual = fileinput.FileInput( qual_file.strip() )
50        outfile = None
51        outbin = None
52        base_count = 0
53        mega_count = 0
54   
55        for line in qual:
56            line = line.rstrip("\r\n")
57            if line.startswith(">"):
58                # close old
59                if outbin and outfile:
60                    print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
61                    outbin.finish()
62                    outfile.close()
63                # start new file
64                region = line.lstrip(">")
65                #outfname = output_file + "." + region + ".bqv" #CHANGED
66                outfname = qual_file.strip() + ".bqv"
67                print >>fo, "Writing region " + region + " to file " + outfname
68                outfile = open( outfname , "wb")
69                outbin = BinnedArrayWriter(outfile, typecode='b', default=0)
70                base_count = 0
71                mega_count = 0
72            else:
73                if outfile and outbin:
74                    nums = line.split()
75                    for val in nums:
76                        outval = int(val)
77                        assert outval <= 255 and outval >= 0
78                        outbin.write(outval)
79                        base_count += 1
80                    if (mega_count * 1000000) <= base_count:
81                        sys.stdout.write(str(mega_count)+" ")
82                        sys.stdout.flush()
83                        mega_count = base_count // 1000000 + 1
84        if outbin and outfile:
85            print "\nFinished region " + region + " at " + str(base_count) + " base pairs."
86            outbin.finish()
87            outfile.close()
88
89if __name__ == "__main__":
90    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。