root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/mask_quality.py @ 3

リビジョン 3, 3.0 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Masks an AXT or MAF file based on quality (from a binned_array) and
5outputs AXT or MAF.
6
7Binned array form of quality scores can be generated with `qv_to_bqv.py`.
8
9usage: %prog input output
10    -i, --input=N: Format of input (axt or maf)
11    -o, --output=N: Format of output (axt or maf)
12    -m, --mask=N: Character to use as mask character
13    -q, --quality=N: Min quality allowed
14    -t, --type=N: base_pair or nqs
15    -l, --list=N: colon seperated list of species,len_file[,qualityfile].
16"""
17
18import sys
19import bx.align.axt
20import bx.align.maf
21import bx.binned_array
22from bx.cookbook import doc_optparse
23import fileinput
24from bx.align.sitemask.quality import *
25
26def main():
27   
28    options, args = doc_optparse.parse( __doc__ )
29    try:
30        inputformat = options.input
31        outputformat = options.output
32        mask = options.mask
33        minqual = int(options.quality)
34        qtype = options.type
35        speciesAndLens = options.list
36        inputfile = args[0]
37        outputfile = args[1]
38    except:
39        doc_optparse.exception()
40
41    outstream = open( outputfile, "w" )
42    instream = open( inputfile, "r" )
43
44    qualfiles = {}
45
46    # read lens
47    specieslist = speciesAndLens.split(":")
48    species_to_lengths = {}
49   
50    for entry in specieslist:
51        fields = entry.split(",")
52        lenstream = fileinput.FileInput( fields[1] )
53        lendict = dict()
54        for line in lenstream:
55            region = line.split()
56            lendict[region[0]] = int(region[1])
57        species_to_lengths[fields[0]] = lendict
58        if len(fields) >= 3:
59            qualfiles[fields[0]] = fields[2]
60
61    specieslist = map( lambda(a): a.split(":")[0], specieslist )
62   
63    # open quality binned_arrays
64    reader = None
65    writer = None
66   
67    if inputformat == "axt":
68        # load axt
69        if len(specieslist) != 2:
70            print "AXT is pairwise only."
71            sys.exit()
72        reader = bx.align.axt.Reader(instream, species1=specieslist[0], \
73                                     species2=specieslist[1], \
74                                     species_to_lengths = species_to_lengths)
75    elif outputformat == "maf":
76        # load maf
77        reader = bx.align.maf.Reader(instream, species_to_lengths=species_to_lengths)
78
79    if outputformat == "axt":
80        # setup axt
81        if len(specieslist) != 2:
82            print "AXT is pairwise only."
83            sys.exit()
84        writer = bx.align.axt.Writer(outstream, attributes=reader.attributes)
85    elif outputformat == "maf":
86        # setup maf
87        writer = bx.align.maf.Writer(outstream, attributes=reader.attributes)
88
89    qualfilter = Simple( mask=mask, qualspecies = species_to_lengths, \
90                         qualfiles = qualfiles, minqual = minqual, cache=50 )
91
92    qualfilter.run( reader, writer.write )
93
94    print "For "+str(qualfilter.total)+" base pairs, "+str(qualfilter.masked)+" base pairs were masked."
95    print str(float(qualfilter.masked)/float(qualfilter.total) * 100)+"%"
96   
97if __name__ == "__main__":
98    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。