root/galaxy-central/tools/new_operations/gops_join.py

リビジョン 2, 2.8 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Join two sets of intervals using their overlap as the key.
4
5usage: %prog bed_file_1 bed_file_2 out_file
6    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
7    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
8    -m, --mincols=N: Require this much overlap (default 1bp)
9    -f, --fill=N: none, right, left, both
10"""
11from galaxy import eggs
12import pkg_resources
13pkg_resources.require( "bx-python" )
14import sys, traceback, fileinput
15from warnings import warn
16from bx.intervals import *
17from bx.intervals.io import *
18from bx.intervals.operations.join import *
19from bx.cookbook import doc_optparse
20from galaxy.tools.util.galaxyops import *
21
22assert sys.version_info[:2] >= ( 2, 4 )
23
24def main():
25    mincols = 1
26    upstream_pad = 0
27    downstream_pad = 0
28    leftfill = False
29    rightfill = False
30   
31    options, args = doc_optparse.parse( __doc__ )
32    try:
33        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
34        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )     
35        if options.mincols: mincols = int( options.mincols )
36        if options.fill:
37            if options.fill == "both":
38                rightfill = leftfill = True
39            else:
40                rightfill = options.fill == "right"
41                leftfill = options.fill == "left"
42        in_fname, in2_fname, out_fname = args
43    except:
44        doc_optparse.exception()
45
46    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
47                            chrom_col=chr_col_1,
48                            start_col=start_col_1,
49                            end_col=end_col_1,
50                            strand_col=strand_col_1,
51                            fix_strand=True )
52    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
53                            chrom_col=chr_col_2,
54                            start_col=start_col_2,
55                            end_col=end_col_2,
56                            strand_col=strand_col_2,
57                            fix_strand=True )
58
59    out_file = open( out_fname, "w" )
60
61    try:
62        for outfields in join(g1, g2, mincols=mincols, rightfill=rightfill, leftfill=leftfill):
63            if type( outfields ) is list:
64                out_file.write( "%s\n" % "\t".join( outfields ) )
65            else:
66                out_file.write( "%s\n" % outfields )
67    except ParseError, exc:
68        out_file.close()
69        fail( "Invalid file format: %s" % str( exc ) )
70    except MemoryError:
71        out_file.close()
72        fail( "Input datasets were too large to complete the join operation." )
73
74    out_file.close()
75
76    if g1.skipped > 0:
77        print skipped( g1, filedesc=" of 1st dataset" )
78    if g2.skipped > 0:
79        print skipped( g2, filedesc=" of 2nd dataset" )
80
81if __name__ == "__main__":
82    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。