by specified attributes maf_filter.py $maf_filter_file $input1 $out_file1 $out_file1.files_path $species $min_size $max_size $min_species_per_block $exclude_incomplete_blocks ${input1.metadata.species} #set $is_isnot_valid = {"==":"==", "!=":"!=", "in":"in", "not in":"not in"} def maf_block_pass_filter( maf_block ): #for $maf_filter in $maf_filters: #if $len( $maf_filter['species1_attributes']['filter_condition'] ) == 0: #continue #end if primary_component = maf_block.get_component_by_src_start( """$maf_filter['species1'].value.encode( 'string_escape' )""".decode( 'string_escape' ) ) if primary_component is not None: #if $maf_filter['species1_attributes']['species1_attribute_type'] == 'attribute_chr': if primary_component.src.split( "." )[-1] $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), 'is in' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ): #else if primary_component.strand $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), '==' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ): #end if #for $filter_condition in $maf_filter['species1_attributes']['filter_condition']: secondary_component = maf_block.get_component_by_src_start( """$filter_condition['species2'].value.encode( 'string_escape' )""".decode( 'string_escape' ) ) #if $filter_condition['species2_attributes']['species2_attribute_type'] == 'attribute_chr': if secondary_component is not None: if not ( secondary_component.src.split( "." )[-1] $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), 'is in' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ) ): return False #else: if secondary_component is not None: if not ( secondary_component.strand $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), '==' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ) ): return False #end if #end for #end for return True ret_val = maf_block_pass_filter( maf_block ) This tool allows you to build complex filters to be applied to each alignment block of a MAF file. You can define restraints on species based upon chromosome and strand. You can specify comma separated lists of chromosomes where appropriate. .. class:: infomark For example, this tool is useful to restrict a set of alignments to only those blocks which contain alignments between chromosomes that are considered homologous. ----- .. class:: warningmark If a species is not found in a particular block, all filters on that species are ignored. ----- This tool allows the user to remove any undesired species from a MAF file. If no species are specified then all species will be kept. If species are specified, columns which contain only gaps are removed. The options for this are: * **Exclude blocks which have missing species** - suppose you want to restrict an 8-way alignment to human, mouse, and rat. The tool will first remove all other species. Next, if this option is set to **YES** the tool WILL NOT return MAF blocks, which do not include human, mouse, or rat. This means that all alignment blocks returned by the tool will have exactly three sequences in this example. * **Exclude blocks which have only one species** - if this option is set to **YES** all single sequence alignment blocks WILL NOT be returned. ----- You can also provide a size range and limit your output to the MAF blocks which fall within the specified range.