root/galaxy-central/tools/new_operations/gops_subtract.py @ 2

リビジョン 2, 3.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Find regions of first interval file that do not overlap regions in a second
4interval file. Interval files can either be BED or GFF format.
5
6usage: %prog interval_file_1 interval_file_2 out_file
7    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
8    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
9    -m, --mincols=N: Require this much overlap (default 1bp)
10    -p, --pieces: just print pieces of second set (after padding)
11    -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
12    -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
13"""
14from galaxy import eggs
15import pkg_resources
16pkg_resources.require( "bx-python" )
17import sys, traceback, fileinput
18from warnings import warn
19from bx.intervals import *
20from bx.intervals.io import *
21from bx.intervals.operations.subtract import *
22from bx.cookbook import doc_optparse
23from galaxy.tools.util.galaxyops import *
24from galaxy.tools.util.gff_util import *
25
26assert sys.version_info[:2] >= ( 2, 4 )
27
28def main():
29    mincols = 1
30    upstream_pad = 0
31    downstream_pad = 0
32
33    options, args = doc_optparse.parse( __doc__ )
34    try:
35        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
36        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )     
37        if options.mincols: mincols = int( options.mincols )
38        pieces = bool( options.pieces )
39        in1_gff_format = bool( options.gff1 )
40        in2_gff_format = bool( options.gff2 )
41        in_fname, in2_fname, out_fname = args
42    except:
43        doc_optparse.exception()
44
45    # Set readers to handle either GFF or default format.
46    if in1_gff_format:
47        in1_reader_wrapper = GFFReaderWrapper
48    else:
49        in1_reader_wrapper = NiceReaderWrapper
50    if in2_gff_format:
51        in2_reader_wrapper = GFFReaderWrapper
52    else:
53        in2_reader_wrapper = NiceReaderWrapper
54       
55    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
56                            chrom_col=chr_col_1,
57                            start_col=start_col_1,
58                            end_col=end_col_1,
59                            strand_col=strand_col_1,
60                            fix_strand=True )
61    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
62                            chrom_col=chr_col_2,
63                            start_col=start_col_2,
64                            end_col=end_col_2,
65                            strand_col=strand_col_2,
66                            fix_strand=True )
67
68    out_file = open( out_fname, "w" )
69
70    try:
71        for line in subtract( [g1,g2], pieces=pieces, mincols=mincols ):
72            if type( line ) is GenomicInterval:
73                if in1_gff_format:
74                    line = convert_bed_coords_to_gff( line )
75                out_file.write( "%s\n" % "\t".join( line.fields ) )
76            else:
77                out_file.write( "%s\n" % line )
78    except ParseError, exc:
79        out_file.close()
80        fail( "Invalid file format: %s" % str( exc ) )
81
82    out_file.close()
83
84    if g1.skipped > 0:
85        print skipped( g1, filedesc=" of 2nd dataset" )
86    if g2.skipped > 0:
87        print skipped( g2, filedesc=" of 1st dataset" )
88
89if __name__ == "__main__":
90    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。