1 | #!/usr/bin/python2.6 |
---|
2 | |
---|
3 | """ |
---|
4 | Masks an AXT or MAF file based on quality (from a binned_array) and |
---|
5 | outputs AXT or MAF. |
---|
6 | |
---|
7 | Binned array form of quality scores can be generated with `qv_to_bqv.py`. |
---|
8 | |
---|
9 | usage: %prog input output |
---|
10 | -i, --input=N: Format of input (axt or maf) |
---|
11 | -o, --output=N: Format of output (axt or maf) |
---|
12 | -m, --mask=N: Character to use as mask character |
---|
13 | -q, --quality=N: Min quality allowed |
---|
14 | -t, --type=N: base_pair or nqs |
---|
15 | -l, --list=N: colon seperated list of species,len_file[,qualityfile]. |
---|
16 | """ |
---|
17 | |
---|
18 | import sys |
---|
19 | import bx.align.axt |
---|
20 | import bx.align.maf |
---|
21 | import bx.binned_array |
---|
22 | from bx.cookbook import doc_optparse |
---|
23 | import fileinput |
---|
24 | from bx.align.sitemask.quality import * |
---|
25 | |
---|
26 | def main(): |
---|
27 | |
---|
28 | options, args = doc_optparse.parse( __doc__ ) |
---|
29 | try: |
---|
30 | inputformat = options.input |
---|
31 | outputformat = options.output |
---|
32 | mask = options.mask |
---|
33 | minqual = int(options.quality) |
---|
34 | qtype = options.type |
---|
35 | speciesAndLens = options.list |
---|
36 | inputfile = args[0] |
---|
37 | outputfile = args[1] |
---|
38 | except: |
---|
39 | doc_optparse.exception() |
---|
40 | |
---|
41 | outstream = open( outputfile, "w" ) |
---|
42 | instream = open( inputfile, "r" ) |
---|
43 | |
---|
44 | qualfiles = {} |
---|
45 | |
---|
46 | # read lens |
---|
47 | specieslist = speciesAndLens.split(":") |
---|
48 | species_to_lengths = {} |
---|
49 | |
---|
50 | for entry in specieslist: |
---|
51 | fields = entry.split(",") |
---|
52 | lenstream = fileinput.FileInput( fields[1] ) |
---|
53 | lendict = dict() |
---|
54 | for line in lenstream: |
---|
55 | region = line.split() |
---|
56 | lendict[region[0]] = int(region[1]) |
---|
57 | species_to_lengths[fields[0]] = lendict |
---|
58 | if len(fields) >= 3: |
---|
59 | qualfiles[fields[0]] = fields[2] |
---|
60 | |
---|
61 | specieslist = map( lambda(a): a.split(":")[0], specieslist ) |
---|
62 | |
---|
63 | # open quality binned_arrays |
---|
64 | reader = None |
---|
65 | writer = None |
---|
66 | |
---|
67 | if inputformat == "axt": |
---|
68 | # load axt |
---|
69 | if len(specieslist) != 2: |
---|
70 | print "AXT is pairwise only." |
---|
71 | sys.exit() |
---|
72 | reader = bx.align.axt.Reader(instream, species1=specieslist[0], \ |
---|
73 | species2=specieslist[1], \ |
---|
74 | species_to_lengths = species_to_lengths) |
---|
75 | elif outputformat == "maf": |
---|
76 | # load maf |
---|
77 | reader = bx.align.maf.Reader(instream, species_to_lengths=species_to_lengths) |
---|
78 | |
---|
79 | if outputformat == "axt": |
---|
80 | # setup axt |
---|
81 | if len(specieslist) != 2: |
---|
82 | print "AXT is pairwise only." |
---|
83 | sys.exit() |
---|
84 | writer = bx.align.axt.Writer(outstream, attributes=reader.attributes) |
---|
85 | elif outputformat == "maf": |
---|
86 | # setup maf |
---|
87 | writer = bx.align.maf.Writer(outstream, attributes=reader.attributes) |
---|
88 | |
---|
89 | qualfilter = Simple( mask=mask, qualspecies = species_to_lengths, \ |
---|
90 | qualfiles = qualfiles, minqual = minqual, cache=50 ) |
---|
91 | |
---|
92 | qualfilter.run( reader, writer.write ) |
---|
93 | |
---|
94 | print "For "+str(qualfilter.total)+" base pairs, "+str(qualfilter.masked)+" base pairs were masked." |
---|
95 | print str(float(qualfilter.masked)/float(qualfilter.total) * 100)+"%" |
---|
96 | |
---|
97 | if __name__ == "__main__": |
---|
98 | main() |
---|