[2] | 1 | #Dan Blankenberg
|
---|
| 2 | #Filters a MAF file according to the provided code file, which is generated in maf_filter.xml <configfiles>
|
---|
| 3 | #Also allows filtering by number of columns in a block, and limiting output species
|
---|
| 4 | import sys, os, shutil
|
---|
| 5 | from galaxy import eggs
|
---|
| 6 | import pkg_resources; pkg_resources.require( "bx-python" )
|
---|
| 7 | import bx.align.maf
|
---|
| 8 | from galaxy.tools.util import maf_utilities
|
---|
| 9 |
|
---|
| 10 | def main():
|
---|
| 11 | #Read command line arguments
|
---|
| 12 | try:
|
---|
| 13 | script_file = sys.argv.pop( 1 )
|
---|
| 14 | maf_file = sys.argv.pop( 1 )
|
---|
| 15 | out_file = sys.argv.pop( 1 )
|
---|
| 16 | additional_files_path = sys.argv.pop( 1 )
|
---|
| 17 | species = maf_utilities.parse_species_option( sys.argv.pop( 1 ) )
|
---|
| 18 | min_size = int( sys.argv.pop( 1 ) )
|
---|
| 19 | max_size = int( sys.argv.pop( 1 ) )
|
---|
| 20 | if max_size < 1: max_size = sys.maxint
|
---|
| 21 | min_species_per_block = int( sys.argv.pop( 1 ) )
|
---|
| 22 | exclude_incomplete_blocks = int( sys.argv.pop( 1 ) )
|
---|
| 23 | if species:
|
---|
| 24 | num_species = len( species )
|
---|
| 25 | else:
|
---|
| 26 | num_species = len( sys.argv.pop( 1 ).split( ',') )
|
---|
| 27 | except:
|
---|
| 28 | print >>sys.stderr, "One or more arguments is missing.\nUsage: maf_filter.py maf_filter_file input_maf output_maf path_to_save_debug species_to_keep"
|
---|
| 29 | sys.exit()
|
---|
| 30 |
|
---|
| 31 | #Open input and output MAF files
|
---|
| 32 | try:
|
---|
| 33 | maf_reader = bx.align.maf.Reader( open( maf_file,'r' ) )
|
---|
| 34 | maf_writer = bx.align.maf.Writer( open( out_file,'w' ) )
|
---|
| 35 | except:
|
---|
| 36 | print >>sys.stderr, "Your MAF file appears to be malformed."
|
---|
| 37 | sys.exit()
|
---|
| 38 |
|
---|
| 39 | #Save script file for debuging/verification info later
|
---|
| 40 | os.mkdir( additional_files_path )
|
---|
| 41 | shutil.copy( script_file, os.path.join( additional_files_path, 'debug.txt' ) )
|
---|
| 42 |
|
---|
| 43 | #Loop through blocks, running filter on each
|
---|
| 44 | #'maf_block' and 'ret_val' are used/shared in the provided code file
|
---|
| 45 | #'ret_val' should be set to True if the block is to be kept
|
---|
| 46 | i = 0
|
---|
| 47 | blocks_kept = 0
|
---|
| 48 | for i, maf_block in enumerate( maf_reader ):
|
---|
| 49 | if min_size <= maf_block.text_size <= max_size:
|
---|
| 50 | local = {'maf_block':maf_block, 'ret_val':False}
|
---|
| 51 | execfile( script_file, {}, local )
|
---|
| 52 | if local['ret_val']:
|
---|
| 53 | #Species limiting must be done after filters as filters could be run on non-requested output species
|
---|
| 54 | if species:
|
---|
| 55 | maf_block = maf_block.limit_to_species( species )
|
---|
| 56 | if len( maf_block.components ) >= min_species_per_block and ( not exclude_incomplete_blocks or len( maf_block.components ) >= num_species ):
|
---|
| 57 | maf_writer.write( maf_block )
|
---|
| 58 | blocks_kept += 1
|
---|
| 59 | maf_writer.close()
|
---|
| 60 | maf_reader.close()
|
---|
| 61 | if i == 0: print "Your file contains no valid maf_blocks."
|
---|
| 62 | else: print 'Kept %s of %s blocks (%.2f%%).' % ( blocks_kept, i + 1, float( blocks_kept ) / float( i + 1 ) * 100.0 )
|
---|
| 63 |
|
---|
| 64 | if __name__ == "__main__":
|
---|
| 65 | main()
|
---|