Context Navigation

flanking_features.py @ 2

リビジョン 2, 10.2 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	#!/usr/bin/env python
	2	#By: Guruprasad Ananda
	3	"""
	4	Fetch closest up/downstream interval from features corresponding to every interval in primary
	5
	6	usage: %prog primary_file features_file out_file direction
	7	-1, --cols1=N,N,N,N: Columns for start, end, strand in first file
	8	-2, --cols2=N,N,N,N: Columns for start, end, strand in second file
	9	-G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval
	10	-H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval
	11	"""
	12	from galaxy import eggs
	13	import pkg_resources
	14	pkg_resources.require( "bx-python" )
	15	import sys, traceback, fileinput
	16	from warnings import warn
	17	from bx.cookbook import doc_optparse
	18	from galaxy.tools.util.galaxyops import *
	19	from bx.intervals.io import *
	20	from bx.intervals.operations import quicksect
	21	from galaxy.tools.util.gff_util import *
	22
	23	assert sys.version_info[:2] >= ( 2, 4 )
	24
	25	def get_closest_feature (node, direction, threshold_up, threshold_down, report_func_up, report_func_down):
	26	#direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases
	27	#threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand
	28	#threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand
	29	if direction == 1:
	30	if node.maxend < threshold_up:
	31	if node.end == node.maxend:
	32	report_func_up(node)
	33	elif node.right and node.left:
	34	if node.right.maxend == node.maxend:
	35	get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	36	elif node.left.maxend == node.maxend:
	37	get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	38	elif node.right and node.right.maxend == node.maxend:
	39	get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	40	elif node.left and node.left.maxend == node.maxend:
	41	get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	42	elif node.minend < threshold_up:
	43	if node.end < threshold_up:
	44	report_func_up(node)
	45	if node.left and node.right:
	46	if node.right.minend < threshold_up:
	47	get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	48	if node.left.minend < threshold_up:
	49	get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	50	elif node.left:
	51	if node.left.minend < threshold_up:
	52	get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	53	elif node.right:
	54	if node.right.minend < threshold_up:
	55	get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	56	elif direction == 0:
	57	if node.start > threshold_down:
	58	report_func_down(node)
	59	if node.left:
	60	get_closest_feature(node.left, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	61	else:
	62	if node.right:
	63	get_closest_feature(node.right, direction, threshold_up, threshold_down, report_func_up, report_func_down)
	64
	65	def proximal_region_finder(readers, region, comments=True):
	66	"""
	67	Returns an iterator that yields elements of the form [ <original_interval>, <closest_feature> ].
	68	Intervals are GenomicInterval objects.
	69	"""
	70	primary = readers[0]
	71	features = readers[1]
	72	either = False
	73	if region == 'Upstream':
	74	up, down = True, False
	75	elif region == 'Downstream':
	76	up, down = False, True
	77	else:
	78	up, down = True, True
	79	if region == 'Either':
	80	either = True
	81
	82	# Read features into memory:
	83	rightTree = quicksect.IntervalTree()
	84	for item in features:
	85	if type( item ) is GenomicInterval:
	86	rightTree.insert( item, features.linenum, item )
	87
	88	for interval in primary:
	89	if type( interval ) is Header:
	90	yield interval
	91	if type( interval ) is Comment and comments:
	92	yield interval
	93	elif type( interval ) == GenomicInterval:
	94	chrom = interval.chrom
	95	start = int(interval.start)
	96	end = int(interval.end)
	97	strand = interval.strand
	98	if chrom not in rightTree.chroms:
	99	continue
	100	else:
	101	root = rightTree.chroms[chrom] #root node for the chrom tree
	102	result_up = []
	103	result_down = []
	104	if (strand == '+' and up) or (strand == '-' and down):
	105	#upstream +ve strand and downstream -ve strand cases
	106	get_closest_feature (root, 1, start, None, lambda node: result_up.append( node ), None)
	107
	108	if (strand == '+' and down) or (strand == '-' and up):
	109	#downstream +ve strand and upstream -ve strand case
	110	get_closest_feature (root, 0, None, end-1, None, lambda node: result_down.append( node ))
	111
	112	if result_up:
	113	if len(result_up) > 1: #The results_up list has a list of intervals upstream to the given interval.
	114	ends = []
	115	for n in result_up:
	116	ends.append(n.end)
	117	res_ind = ends.index(max(ends)) #fetch the index of the closest interval i.e. the interval with the max end from the results_up list
	118	else:
	119	res_ind = 0
	120	if not(either):
	121	yield [ interval, result_up[res_ind].other ]
	122
	123	if result_down:
	124	if not(either):
	125	#The last element of result_down will be the closest element to the given interval
	126	yield [ interval, result_down[-1].other ]
	127
	128	if either and (result_up or result_down):
	129	iter_val = []
	130	if result_up and result_down:
	131	if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)):
	132	iter_val = [ interval, result_up[res_ind].other ]
	133	else:
	134	#The last element of result_down will be the closest element to the given interval
	135	iter_val = [ interval, result_down[-1].other ]
	136	elif result_up:
	137	iter_val = [ interval, result_up[res_ind].other ]
	138	elif result_down:
	139	#The last element of result_down will be the closest element to the given interval
	140	iter_val = [ interval, result_down[-1].other ]
	141	yield iter_val
	142
	143	def main():
	144	options, args = doc_optparse.parse( __doc__ )
	145	try:
	146	chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
	147	chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )
	148	in1_gff_format = bool( options.gff1 )
	149	in2_gff_format = bool( options.gff2 )
	150	in_fname, in2_fname, out_fname, direction = args
	151	except:
	152	doc_optparse.exception()
	153
	154	# Set readers to handle either GFF or default format.
	155	if in1_gff_format:
	156	in1_reader_wrapper = GFFReaderWrapper
	157	else:
	158	in1_reader_wrapper = NiceReaderWrapper
	159	if in2_gff_format:
	160	in2_reader_wrapper = GFFReaderWrapper
	161	else:
	162	in2_reader_wrapper = NiceReaderWrapper
	163
	164	g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
	165	chrom_col=chr_col_1,
	166	start_col=start_col_1,
	167	end_col=end_col_1,
	168	strand_col=strand_col_1,
	169	fix_strand=True )
	170	g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
	171	chrom_col=chr_col_2,
	172	start_col=start_col_2,
	173	end_col=end_col_2,
	174	strand_col=strand_col_2,
	175	fix_strand=True )
	176
	177	# Find flanking features.
	178	out_file = open( out_fname, "w" )
	179	try:
	180	for result in proximal_region_finder([g1,g2], direction):
	181	if type( result ) is list:
	182	line, closest_feature = result
	183	# Need to join outputs differently depending on file types.
	184	if in1_gff_format:
	185	# Output is GFF with added attribute 'closest feature.'
	186
	187	# Invervals are in BED coordinates; need to convert to GFF.
	188	line = convert_bed_coords_to_gff( line )
	189	closest_feature = convert_bed_coords_to_gff( closest_feature )
	190
	191	# Replace double quotes with single quotes in closest feature's attributes.
	192	out_file.write( "%s closest_feature \"%s\" \n" %
	193	( "\t".join( line.fields ), \
	194	"\t".join( closest_feature.fields ).replace( "\"", "\\\"" )
	195	) )
	196	else:
	197	# Output is BED + closest feature fields.
	198	output_line_fields = []
	199	output_line_fields.extend( line.fields )
	200	output_line_fields.extend( closest_feature.fields )
	201	out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) )
	202	else:
	203	out_file.write( "%s\n" % result )
	204	except ParseError, exc:
	205	fail( "Invalid file format: %s" % str( exc ) )
	206
	207	print "Direction: %s" %(direction)
	208	if g1.skipped > 0:
	209	print skipped( g1, filedesc=" of 1st dataset" )
	210	if g2.skipped > 0:
	211	print skipped( g2, filedesc=" of 2nd dataset" )
	212
	213	if __name__ == "__main__":
	214	main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/new_operations/flanking_features.py @ 2

異なるフォーマットでダウンロード: