root/galaxy-central/tools/new_operations/gops_concat.py

リビジョン 2, 2.7 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2"""
3Concatenate two bed files.  The concatenated files are returned in the
4same format as the first.  If --sameformat is specified, then all
5columns will be treated as the same, and all fields will be saved,
6although the output will be trimmed to match the primary input.  In
7addition, if --sameformat is specified, missing fields will be padded
8with a period(.).
9
10usage: %prog in_file_1 in_file_2 out_file
11    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in first file
12    -2, --cols2=N,N,N,N: Columns for chrom, start, end, strand in second file
13    -s, --sameformat: All files are precisely the same format.
14"""
15from galaxy import eggs
16import pkg_resources
17pkg_resources.require( "bx-python" )
18import sys, traceback, fileinput
19from warnings import warn
20from bx.intervals import *
21from bx.intervals.io import *
22from bx.intervals.operations.concat import *
23from bx.cookbook import doc_optparse
24from galaxy.tools.util.galaxyops import *
25
26assert sys.version_info[:2] >= ( 2, 4 )
27
28def main():
29    sameformat=False
30    upstream_pad = 0
31    downstream_pad = 0
32
33    options, args = doc_optparse.parse( __doc__ )
34    try:
35        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
36        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
37        if options.sameformat: sameformat = True
38        in_file_1, in_file_2, out_fname = args
39    except:
40        doc_optparse.exception()
41
42    g1 = NiceReaderWrapper( fileinput.FileInput( in_file_1 ),
43                            chrom_col=chr_col_1,
44                            start_col=start_col_1,
45                            end_col=end_col_1,
46                            fix_strand=True )
47
48    g2 = NiceReaderWrapper( fileinput.FileInput( in_file_2 ),
49                            chrom_col=chr_col_2,
50                            start_col=start_col_2,
51                            end_col=end_col_2,
52                            strand_col=strand_col_2,
53                            fix_strand=True )
54
55    if strand_col_1 >= 0:
56        g1.strand_col = strand_col_1
57
58    out_file = open( out_fname, "w" )
59
60    try:
61        for line in concat( [g1, g2], sameformat=sameformat ):
62            if type( line ) is GenomicInterval:
63                out_file.write( "%s\n" % "\t".join( line.fields ) )
64            else:
65                out_file.write( "%s\n" % line )
66    except ParseError, exc:
67        out_file.close()
68        fail( "Invalid file format: %s" % str( exc ) )
69
70    out_file.close()
71
72    if g1.skipped > 0:
73        print skipped( g1, filedesc=" of 1st dataset" )
74    if g2.skipped > 0:
75        print skipped( g2, filedesc=" of 2nd dataset" )
76       
77if __name__ == "__main__":
78    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。