root/galaxy-central/tools/regVariation/substitution_rates.py @ 2

リビジョン 2, 3.9 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2#guruprasad Ananda
3"""
4Estimates substitution rates from pairwise alignments using JC69 model.
5"""
6
7from galaxy import eggs
8from galaxy.tools.util.galaxyops import *
9from galaxy.tools.util import maf_utilities
10import bx.align.maf
11import sys, fileinput
12
13def stop_err(msg):
14    sys.stderr.write(msg)
15    sys.exit()
16
17if len(sys.argv) < 3:
18        stop_err("Incorrect number of arguments.")   
19   
20inp_file = sys.argv[1]
21out_file = sys.argv[2]
22fout = open(out_file, 'w')
23int_file = sys.argv[3]
24if int_file != "None":     #The user has specified an interval file
25    dbkey_i = sys.argv[4]
26    chr_col_i, start_col_i, end_col_i, strand_col_i = parse_cols_arg( sys.argv[5] )
27
28
29def rateEstimator(block):
30    global alignlen, mismatches
31
32    src1 = block.components[0].src
33    sequence1 = block.components[0].text
34    start1 = block.components[0].start
35    end1 = block.components[0].end
36    len1 = int(end1)-int(start1)
37    len1_withgap = len(sequence1)
38    mismatch = 0.0
39   
40    for seq in range (1,len(block.components)):
41        src2 = block.components[seq].src
42        sequence2 = block.components[seq].text
43        start2 = block.components[seq].start
44        end2 = block.components[seq].end
45        len2 = int(end2)-int(start2)
46        for nt in range(len1_withgap):
47            if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
48                if sequence1[nt].upper() != sequence2[nt].upper():
49                    mismatch += 1
50   
51    if int_file == "None": 
52        p = mismatch/min(len1,len2)
53        print >>fout, "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%.4f" %(src1,start1,end1,src2,start2,end2,min(len1,len2),mismatch,p)
54    else:
55        mismatches += mismatch
56        alignlen += min(len1,len2)
57             
58def main():
59    skipped = 0
60    not_pairwise = 0
61   
62    if int_file == "None":
63        try:
64            maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
65        except:
66            stop_err("Your MAF file appears to be malformed.")
67        print >>fout, "#Seq1\tStart1\tEnd1\tSeq2\tStart2\tEnd2\tL\tN\tp"
68        for block in maf_reader:
69            if len(block.components) != 2:
70                not_pairwise += 1
71                continue
72            try:
73                rateEstimator(block)
74            except:
75                skipped += 1
76    else:
77        index, index_filename = maf_utilities.build_maf_index( inp_file, species = [dbkey_i] )
78        if index is None:
79            print >> sys.stderr, "Your MAF file appears to be malformed."
80            sys.exit()
81        win = NiceReaderWrapper( fileinput.FileInput( int_file ),
82                                chrom_col=chr_col_i,
83                                start_col=start_col_i,
84                                end_col=end_col_i,
85                                strand_col=strand_col_i,
86                                fix_strand=True)
87        species=None
88        mincols = 0
89        global alignlen, mismatches
90       
91        for interval in win:
92            alignlen = 0
93            mismatches = 0.0
94            src = "%s.%s" % ( dbkey_i, interval.chrom )
95            for block in maf_utilities.get_chopped_blocks_for_region( index, src, interval, species, mincols ):
96                if len(block.components) != 2:
97                    not_pairwise += 1
98                    continue
99                try:
100                    rateEstimator(block)
101                except:
102                    skipped += 1
103            if alignlen:
104                p = mismatches/alignlen
105            else:
106                p = 'NA'
107            interval.fields.append(str(alignlen))
108            interval.fields.append(str(mismatches))
109            interval.fields.append(str(p))
110            print >>fout, "\t".join(interval.fields)   
111            #num_blocks += 1
112   
113    if not_pairwise:
114        print "Skipped %d non-pairwise blocks" %(not_pairwise)
115    if skipped:
116        print "Skipped %d blocks as invalid" %(skipped)
117if __name__ == "__main__":
118    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。