root/galaxy-central/tools/ngs_rna/cuffdiff_wrapper.py

リビジョン 2, 7.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3import optparse, os, shutil, subprocess, sys, tempfile
4
5def stop_err( msg ):
6    sys.stderr.write( "%s\n" % msg )
7    sys.exit()
8
9def __main__():
10    #Parse Command Line
11    parser = optparse.OptionParser()
12   
13    # Cuffdiff options.
14    parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
15    parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
16    parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
17                                                                                For, example, for paired end runs with fragments selected at 300bp, \
18                                                                                where each end is 50bp, you should set -r to be 200. The default is 45bp.')
19    parser.add_option( '-Q', '--min-mapqual', dest='min_mapqual', help='Instructs Cufflinks to ignore alignments with a SAM mapping quality lower than this number. The default is 0.' )
20    parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
21    parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
22
23    # Advanced Options:
24    parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
25    parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
26   
27    # Wrapper / Galaxy options.
28    parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
29    parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
30    parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
31
32    parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
33    parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
34    parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
35    parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
36    parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
37    parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
38    parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
39    parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
40    parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
41    parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
42    parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
43   
44    (options, args) = parser.parse_args()
45   
46    # Make temp directory for output.
47    tmp_output_dir = tempfile.mkdtemp()
48   
49    # Build command.
50   
51    # Base.
52    cmd = "cuffdiff"
53   
54    # Add options.
55    if options.inner_dist_std_dev:
56        cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
57    if options.num_threads:
58        cmd += ( " -p %i" % int ( options.num_threads ) )
59    if options.inner_mean_dist:
60        cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
61    if options.min_mapqual:
62        cmd += ( " -Q %i" % int ( options.min_mapqual ) )
63    if options.min_alignment_count:
64        cmd += ( " -c %i" % int ( options.min_alignment_count ) )
65    if options.FDR:
66        cmd += ( " --FDR %f" % float( options.FDR ) )
67    if options.num_importance_samples:
68        cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
69    if options.max_mle_iterations:
70        cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
71           
72    # Add inputs.
73    cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
74
75    # Run command.
76    try:
77        tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
78        tmp_stderr = open( tmp_name, 'wb' )
79        proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_output_dir, stderr=tmp_stderr.fileno() )
80        returncode = proc.wait()
81        tmp_stderr.close()
82       
83        # Get stderr, allowing for case where it's very large.
84        tmp_stderr = open( tmp_name, 'rb' )
85        stderr = ''
86        buffsize = 1048576
87        try:
88            while True:
89                stderr += tmp_stderr.read( buffsize )
90                if not stderr or len( stderr ) % buffsize != 0:
91                    break
92        except OverflowError:
93            pass
94        tmp_stderr.close()
95       
96        # Error checking.
97        if returncode != 0:
98            raise Exception, stderr
99           
100        # check that there are results in the output file
101        if len( open( tmp_output_dir + "/isoforms.fpkm_tracking", 'rb' ).read().strip() ) == 0:
102            raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
103    except Exception, e:
104        stop_err( 'Error running cuffdiff. ' + str( e ) )
105
106       
107    # Copy output files from tmp directory to specified files.
108    try:
109        try:
110            shutil.copyfile( tmp_output_dir + "/isoforms.fpkm_tracking", options.isoforms_fpkm_tracking_output )
111            shutil.copyfile( tmp_output_dir + "/genes.fpkm_tracking", options.genes_fpkm_tracking_output )
112            shutil.copyfile( tmp_output_dir + "/cds.fpkm_tracking", options.cds_fpkm_tracking_output )
113            shutil.copyfile( tmp_output_dir + "/tss_groups.fpkm_tracking", options.tss_groups_fpkm_tracking_output )
114            shutil.copyfile( tmp_output_dir + "/0_1_isoform_exp.diff", options.isoforms_exp_output )
115            shutil.copyfile( tmp_output_dir + "/0_1_gene_exp.diff", options.genes_exp_output )
116            shutil.copyfile( tmp_output_dir + "/0_1_tss_group_exp.diff", options.tss_groups_exp_output )
117            shutil.copyfile( tmp_output_dir + "/0_1_splicing.diff", options.splicing_diff_output )
118            shutil.copyfile( tmp_output_dir + "/0_1_cds.diff", options.cds_diff_output )
119            shutil.copyfile( tmp_output_dir + "/0_1_cds_exp.diff", options.cds_diff_output )
120            shutil.copyfile( tmp_output_dir + "/0_1_promoters.diff", options.promoters_diff_output )   
121        except Exception, e:
122            stop_err( 'Error in cuffdiff:\n' + str( e ) )
123    finally:
124        # Clean up temp dirs
125        if os.path.exists( tmp_output_dir ):
126            shutil.rmtree( tmp_output_dir )
127
128if __name__=="__main__": __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。