root/galaxy-central/tools/new_operations/gops_complement.py

リビジョン 2, 3.0 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Complement regions.
4
5usage: %prog in_file out_file
6    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in file
7    -l, --lengths=N: Filename of .len file for species (chromosome lengths)
8    -a, --all: Complement all chromosomes (Genome-wide complement)
9"""
10from galaxy import eggs
11import pkg_resources
12pkg_resources.require( "bx-python" )
13import sys, traceback, fileinput
14from warnings import warn
15from bx.intervals import *
16from bx.intervals.io import *
17from bx.intervals.operations.complement import complement
18from bx.intervals.operations.subtract import subtract
19from bx.cookbook import doc_optparse
20from galaxy.tools.util.galaxyops import *
21
22assert sys.version_info[:2] >= ( 2, 4 )
23
24def main():
25    allchroms = False
26    upstream_pad = 0
27    downstream_pad = 0
28
29    options, args = doc_optparse.parse( __doc__ )
30    try:
31        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
32        lengths = options.lengths
33        if options.all: allchroms = True
34        in_fname, out_fname = args
35    except:
36        doc_optparse.exception()
37
38    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
39                            chrom_col=chr_col_1,
40                            start_col=start_col_1,
41                            end_col=end_col_1,
42                            strand_col=strand_col_1,
43                            fix_strand=True )
44
45    lens = dict()
46    chroms = list()
47    # dbfile is used to determine the length of each chromosome.  The lengths
48    # are added to the lens dict and passed copmlement operation code in bx.
49    dbfile = fileinput.FileInput( lengths )
50   
51    if dbfile:
52        if not allchroms:
53            try:
54                for line in dbfile:
55                    fields = line.split("\t")
56                    lens[fields[0]] = int(fields[1])
57            except:
58                # assume LEN doesn't exist or is corrupt somehow
59                pass
60        elif allchroms:
61            try:
62                for line in dbfile:
63                    fields = line.split("\t")
64                    end = int(fields[1])
65                    chroms.append("\t".join([fields[0],"0",str(end)]))
66            except:
67                pass
68
69    # Safety...if the dbfile didn't exist and we're on allchroms, then
70    # default to generic complement
71    if allchroms and len(chroms) == 0:
72        allchroms = False
73
74    if allchroms:
75        chromReader = GenomicIntervalReader(chroms)
76        generator = subtract([chromReader, g1])
77    else:
78        generator = complement(g1, lens)
79
80    out_file = open( out_fname, "w" )
81
82    try:
83        for interval in generator:
84            if type( interval ) is GenomicInterval:
85                out_file.write( "%s\n" % "\t".join( interval ) )
86            else:
87                out_file.write( "%s\n" % interval )
88    except ParseError, exc:
89        out_file.close()
90        fail( "Invalid file format: %s" % str( exc ) )
91
92    out_file.close()
93
94    if g1.skipped > 0:
95        print skipped( g1, filedesc="" )
96
97if __name__ == "__main__":
98    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。