root/galaxy-central/tools/regVariation/featureCounter.py

リビジョン 2, 5.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2#Guruprasad Ananda
3"""
4Calculate count and coverage of one query on another, and append the Coverage and counts to
5the last four columns as bases covered, percent coverage, number of completely present features, number of partially present/overlapping features.
6
7usage: %prog bed_file_1 bed_file_2 out_file
8    -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
9    -2, --cols2=N,N,N,N: Columns for chr, start, end, strand in second file
10"""
11from galaxy import eggs
12import pkg_resources
13pkg_resources.require( "bx-python" )
14import sys, traceback, fileinput
15from warnings import warn
16from bx.intervals.io import *
17from bx.cookbook import doc_optparse
18from bx.intervals.operations import quicksect
19from galaxy.tools.util.galaxyops import *
20
21assert sys.version_info[:2] >= ( 2, 4 )
22
23def stop_err(msg):
24    sys.stderr.write(msg)
25    sys.exit()
26
27def counter(node, start, end):
28    global full, partial
29    if node.start <= start and node.maxend > start:
30        if node.end >= end or (node.start == start and end > node.end > start):
31            full += 1
32        elif end > node.end > start:
33            partial += 1
34        if node.left and node.left.maxend > start:
35            counter(node.left, start, end)
36        if node.right:
37            counter(node.right, start, end)
38    elif start < node.start < end:
39        if node.end <= end:
40            full += 1
41        else:
42            partial += 1
43        if node.left and node.left.maxend > start:
44            counter(node.left, start, end)
45        if node.right:
46            counter(node.right, start, end)
47    else:
48        if node.left:
49            counter(node.left, start, end)
50
51def count_coverage( readers, comments=True ):
52    primary = readers[0]
53    secondary = readers[1]
54    secondary_copy = readers[2]
55   
56    rightTree = quicksect.IntervalTree()
57    for item in secondary:
58        if type( item ) is GenomicInterval:
59            rightTree.insert( item, secondary.linenum, item.fields )
60   
61    bitsets = secondary_copy.binned_bitsets()
62       
63    global full, partial
64   
65    for interval in primary:
66        if type( interval ) is Header:
67            yield interval
68        if type( interval ) is Comment and comments:
69            yield interval
70        elif type( interval ) == GenomicInterval:
71            chrom = interval.chrom
72            start = int(interval.start)
73            end = int(interval.end)
74            full = 0
75            partial = 0
76            if chrom not in bitsets:
77                bases_covered = 0
78                percent = 0.0
79                full = 0
80                partial = 0
81            else:
82                bases_covered = bitsets[ chrom ].count_range( start, end-start )
83                if (end - start) == 0:
84                    percent = 0
85                else:
86                    percent = float(bases_covered) / float(end - start)
87                if bases_covered:
88                    root = rightTree.chroms[chrom]    #root node for the chrom tree
89                    counter(root, start, end)
90            interval.fields.append(str(bases_covered))
91            interval.fields.append(str(percent))
92            interval.fields.append(str(full))
93            interval.fields.append(str(partial))
94            yield interval
95   
96def main():
97    options, args = doc_optparse.parse( __doc__ )
98   
99    try:
100        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
101        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )     
102        in1_fname, in2_fname, out_fname = args
103    except:
104        stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
105   
106    g1 = NiceReaderWrapper( fileinput.FileInput( in1_fname ),
107                            chrom_col=chr_col_1,
108                            start_col=start_col_1,
109                            end_col=end_col_1,
110                            strand_col=strand_col_1,
111                            fix_strand=True )
112    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
113                            chrom_col=chr_col_2,
114                            start_col=start_col_2,
115                            end_col=end_col_2,
116                            strand_col=strand_col_2,
117                            fix_strand=True )
118    g2_copy = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
119                                 chrom_col=chr_col_2,
120                                 start_col=start_col_2,
121                                 end_col=end_col_2,
122                                 strand_col=strand_col_2,
123                                 fix_strand=True )
124   
125
126    out_file = open( out_fname, "w" )
127
128    try:
129        for line in count_coverage([g1,g2,g2_copy]):
130            if type( line ) is GenomicInterval:
131                out_file.write( "%s\n" % "\t".join( line.fields ) )
132            else:
133                out_file.write( "%s\n" % line )
134    except ParseError, exc:
135        out_file.close()
136        fail( str( exc ) )
137
138    out_file.close()
139
140    if g1.skipped > 0:
141        print skipped( g1, filedesc=" of 1st dataset" )
142    if g2.skipped > 0:
143        print skipped( g2, filedesc=" of 2nd dataset" )
144    elif g2_copy.skipped > 0:
145        print skipped( g2_copy, filedesc=" of 2nd dataset" )
146       
147if __name__ == "__main__":
148    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。