[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | """ |
---|
| 4 | Reads a list of intervals and a maf. Produces a new maf containing the |
---|
| 5 | blocks or parts of blocks in the original that overlapped the intervals. |
---|
| 6 | |
---|
| 7 | If a MAF file, not UID, is provided the MAF file is indexed before being processed. |
---|
| 8 | |
---|
| 9 | NOTE: If two intervals overlap the same block it will be written twice. |
---|
| 10 | |
---|
| 11 | usage: %prog maf_file [options] |
---|
| 12 | -d, --dbkey=d: Database key, ie hg17 |
---|
| 13 | -c, --chromCol=c: Column of Chr |
---|
| 14 | -s, --startCol=s: Column of Start |
---|
| 15 | -e, --endCol=e: Column of End |
---|
| 16 | -S, --strandCol=S: Column of Strand |
---|
| 17 | -t, --mafType=t: Type of MAF source to use |
---|
| 18 | -m, --mafFile=m: Path of source MAF file, if not using cached version |
---|
| 19 | -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version |
---|
| 20 | -i, --interval_file=i: Input interval file |
---|
| 21 | -o, --output_file=o: Output MAF file |
---|
| 22 | -p, --species=p: Species to include in output |
---|
| 23 | -P, --split_blocks_by_species=P: Split blocks by species |
---|
| 24 | -r, --remove_all_gap_columns=r: Remove all Gap columns |
---|
| 25 | -l, --indexLocation=l: Override default maf_index.loc file |
---|
| 26 | -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc ) |
---|
| 27 | """ |
---|
| 28 | |
---|
| 29 | #Dan Blankenberg |
---|
| 30 | from galaxy import eggs |
---|
| 31 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 32 | from bx.cookbook import doc_optparse |
---|
| 33 | import bx.align.maf |
---|
| 34 | import bx.intervals.io |
---|
| 35 | from galaxy.tools.util import maf_utilities |
---|
| 36 | import sys |
---|
| 37 | |
---|
| 38 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 39 | |
---|
| 40 | def __main__(): |
---|
| 41 | index = index_filename = None |
---|
| 42 | mincols = 0 |
---|
| 43 | |
---|
| 44 | #Parse Command Line |
---|
| 45 | options, args = doc_optparse.parse( __doc__ ) |
---|
| 46 | |
---|
| 47 | if options.dbkey: dbkey = options.dbkey |
---|
| 48 | else: dbkey = None |
---|
| 49 | if dbkey in [None, "?"]: |
---|
| 50 | maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." ) |
---|
| 51 | |
---|
| 52 | species = maf_utilities.parse_species_option( options.species ) |
---|
| 53 | |
---|
| 54 | if options.chromCol: chromCol = int( options.chromCol ) - 1 |
---|
| 55 | else: |
---|
| 56 | maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." ) |
---|
| 57 | |
---|
| 58 | if options.startCol: startCol = int( options.startCol ) - 1 |
---|
| 59 | else: |
---|
| 60 | maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." ) |
---|
| 61 | |
---|
| 62 | if options.endCol: endCol = int( options.endCol ) - 1 |
---|
| 63 | else: |
---|
| 64 | maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." ) |
---|
| 65 | |
---|
| 66 | if options.strandCol: strandCol = int( options.strandCol ) - 1 |
---|
| 67 | else: |
---|
| 68 | strandCol = -1 |
---|
| 69 | |
---|
| 70 | if options.interval_file: interval_file = options.interval_file |
---|
| 71 | else: |
---|
| 72 | maf_utilities.tool_fail( "Input interval file has not been specified." ) |
---|
| 73 | |
---|
| 74 | if options.output_file: output_file = options.output_file |
---|
| 75 | else: |
---|
| 76 | maf_utilities.tool_fail( "Output file has not been specified." ) |
---|
| 77 | |
---|
| 78 | split_blocks_by_species = remove_all_gap_columns = False |
---|
| 79 | if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': |
---|
| 80 | split_blocks_by_species = True |
---|
| 81 | if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': |
---|
| 82 | remove_all_gap_columns = True |
---|
| 83 | else: |
---|
| 84 | remove_all_gap_columns = True |
---|
| 85 | #Finish parsing command line |
---|
| 86 | |
---|
| 87 | #Open indexed access to MAFs |
---|
| 88 | if options.mafType: |
---|
| 89 | if options.indexLocation: |
---|
| 90 | index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation ) |
---|
| 91 | else: |
---|
| 92 | index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile ) |
---|
| 93 | if index is None: |
---|
| 94 | maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) ) |
---|
| 95 | elif options.mafFile: |
---|
| 96 | index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] ) |
---|
| 97 | if index is None: |
---|
| 98 | maf_utilities.tool_fail( "Your MAF file appears to be malformed." ) |
---|
| 99 | else: |
---|
| 100 | maf_utilities.tool_fail( "Desired source MAF type has not been specified." ) |
---|
| 101 | |
---|
| 102 | #Create MAF writter |
---|
| 103 | out = bx.align.maf.Writer( open(output_file, "w") ) |
---|
| 104 | |
---|
| 105 | #Iterate over input regions |
---|
| 106 | num_blocks = 0 |
---|
| 107 | num_regions = None |
---|
| 108 | for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ): |
---|
| 109 | src = maf_utilities.src_merge( dbkey, region.chrom ) |
---|
| 110 | for block in index.get_as_iterator( src, region.start, region.end ): |
---|
| 111 | if split_blocks_by_species: |
---|
| 112 | blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ] |
---|
| 113 | else: |
---|
| 114 | blocks = [ block ] |
---|
| 115 | for block in blocks: |
---|
| 116 | block = maf_utilities.chop_block_by_region( block, src, region ) |
---|
| 117 | if block is not None: |
---|
| 118 | if species is not None: |
---|
| 119 | block = block.limit_to_species( species ) |
---|
| 120 | block = maf_utilities.orient_block_by_region( block, src, region ) |
---|
| 121 | if remove_all_gap_columns: |
---|
| 122 | block.remove_all_gap_columns() |
---|
| 123 | out.write( block ) |
---|
| 124 | num_blocks += 1 |
---|
| 125 | |
---|
| 126 | #Close output MAF |
---|
| 127 | out.close() |
---|
| 128 | |
---|
| 129 | #remove index file if created during run |
---|
| 130 | maf_utilities.remove_temp_index_file( index_filename ) |
---|
| 131 | |
---|
| 132 | if num_blocks: |
---|
| 133 | print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) ) |
---|
| 134 | elif num_regions is not None: |
---|
| 135 | print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 ) |
---|
| 136 | else: |
---|
| 137 | print "No valid regions have been provided." |
---|
| 138 | |
---|
| 139 | if __name__ == "__main__": __main__() |
---|