root/galaxy-central/lib/galaxy/datatypes/converters/interval_to_coverage.py @ 2

リビジョン 2, 6.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Converter to generate 3 (or 4) column base-pair coverage from an interval file.
4
5usage: %prog bed_file out_file
6    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in interval file
7    -2, --cols2=N,N,N,N: Columns for chrom, start, end, strand in coverage file
8"""
9import sys
10from galaxy import eggs
11import pkg_resources; pkg_resources.require( "bx-python" )
12from bx.intervals import io
13from bx.cookbook import doc_optparse
14import psyco_full
15import commands
16import os
17from os import environ
18import tempfile
19from bisect import bisect
20
21INTERVAL_METADATA = ('chromCol',
22                     'startCol',
23                     'endCol',
24                     'strandCol',)
25
26COVERAGE_METADATA = ('chromCol',
27                     'positionCol',
28                     'forwardCol',
29                     'reverseCol',)
30
31def main( interval, coverage ):
32    """
33    Uses a sliding window of partitions to count coverages.
34    Every interval record adds its start and end to the partitions.  The result
35    is a list of partitions, or every position that has a (maybe) different
36    number of basepairs covered.  We don't worry about merging because we pop
37    as the sorted intervals are read in.  As the input start positions exceed
38    the partition positions in partitions, coverages are kicked out in bulk.
39    """
40    partitions = []
41    forward_covs = []
42    reverse_covs = []
43    offset = 0
44    chrom = None
45    lastchrom = None
46    for record in interval:
47        chrom = record.chrom
48        if lastchrom and not lastchrom == chrom and partitions:
49            for partition in xrange(0, len(partitions)-1):
50                forward = forward_covs[partition]
51                reverse = reverse_covs[partition]
52                if forward+reverse > 0:
53                    coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]),
54                                   forward=forward, reverse=reverse)
55            partitions = []
56            forward_covs = []
57            reverse_covs = []
58           
59        start_index = bisect(partitions, record.start)
60        forward = int(record.strand == "+")
61        reverse = int(record.strand == "-")
62        forward_base = 0
63        reverse_base = 0
64        if start_index > 0:
65            forward_base = forward_covs[start_index-1]
66            reverse_base = reverse_covs[start_index-1]
67        partitions.insert(start_index, record.start)
68        forward_covs.insert(start_index, forward_base)
69        reverse_covs.insert(start_index, reverse_base)
70        end_index = bisect(partitions, record.end)
71        for index in xrange(start_index, end_index):
72            forward_covs[index] += forward
73            reverse_covs[index] += reverse
74        partitions.insert(end_index, record.end)
75        forward_covs.insert(end_index, forward_covs[end_index-1] - forward )
76        reverse_covs.insert(end_index, reverse_covs[end_index-1] - reverse )
77       
78        if partitions:
79            for partition in xrange(0, start_index):
80                forward = forward_covs[partition]
81                reverse = reverse_covs[partition]
82                if forward+reverse > 0:
83                    coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]),
84                                   forward=forward, reverse=reverse)
85            partitions = partitions[start_index:]
86            forward_covs = forward_covs[start_index:]
87            reverse_covs = reverse_covs[start_index:]
88       
89        lastchrom = chrom
90       
91    # Finish the last chromosome
92    if partitions:
93        for partition in xrange(0, len(partitions)-1):
94            forward = forward_covs[partition]
95            reverse = reverse_covs[partition]
96            if forward+reverse > 0:
97                coverage.write(chrom=chrom, position=xrange(partitions[partition],partitions[partition+1]),
98                               forward=forward, reverse=reverse)
99   
100class CoverageWriter( object ):
101    def __init__( self, out_stream=None, chromCol=0, positionCol=1, forwardCol=2, reverseCol=3 ):
102        self.out_stream = out_stream
103        self.reverseCol = reverseCol
104        self.nlines = 0
105        positions = {str(chromCol):'%(chrom)s',
106                     str(positionCol):'%(position)d',
107                     str(forwardCol):'%(forward)d',
108                     str(reverseCol):'%(reverse)d'}
109        if reverseCol < 0:
110            self.template = "%(0)s\t%(1)s\t%(2)s\n" % positions
111        else:
112            self.template = "%(0)s\t%(1)s\t%(2)s\t%(3)s\n" % positions
113           
114    def write(self, **kwargs ):
115        if self.reverseCol < 0: kwargs['forward'] += kwargs['reverse']
116        posgen = kwargs['position']
117        for position in posgen:
118            kwargs['position'] = position
119            self.out_stream.write(self.template % kwargs)
120
121    def close(self):
122        self.out_stream.flush()
123        self.out_stream.close()
124       
125if __name__ == "__main__":
126    options, args = doc_optparse.parse( __doc__ )
127    try:
128        chr_col_1, start_col_1, end_col_1, strand_col_1 = [int(x)-1 for x in options.cols1.split(',')]
129        chr_col_2, position_col_2, forward_col_2, reverse_col_2 = [int(x)-1 for x in options.cols2.split(',')]     
130        in_fname, out_fname = args
131    except:
132        doc_optparse.exception()
133
134    # Sort through a tempfile first
135    temp_file = tempfile.NamedTemporaryFile(mode="r")
136    environ['LC_ALL'] = 'POSIX'
137    commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1+1,start_col_1+1,end_col_1+1, temp_file.name, in_fname)
138    errorcode, stdout = commands.getstatusoutput(commandline)
139
140    coverage = CoverageWriter( out_stream = open(out_fname, "a"),
141                               chromCol = chr_col_2, positionCol = position_col_2,
142                               forwardCol = forward_col_2, reverseCol = reverse_col_2, )
143    temp_file.seek(0)
144    interval = io.NiceReaderWrapper( temp_file,
145                                     chrom_col=chr_col_1,
146                                     start_col=start_col_1,
147                                     end_col=end_col_1,
148                                     strand_col=strand_col_1,
149                                     fix_strand=True )
150    main( interval, coverage )
151    temp_file.close()
152    coverage.close()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。