Context Navigation

ucsc_gene_bed_to_exon_bed.py @ 2

リビジョン 2, 6.5 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central
属性 svn:executable の設定値 ``*

Rev	行番号
[2]	1	#!/usr/bin/env python
	2
	3	"""
	4	Read a table dump in the UCSC gene table format and print a tab separated
	5	list of intervals corresponding to requested features of each gene.
	6
	7	usage: ucsc_gene_table_to_intervals.py [options]
	8
	9	options:
	10	-h, --help show this help message and exit
	11	-rREGION, --region=REGION
	12	Limit to region: one of coding, utr3, utr5, codon, intron, transcribed [default]
	13	-e, --exons Only print intervals overlapping an exon
	14	-i, --input=inputfile input file
	15	-o, --output=outputfile output file
	16	"""
	17
	18	import optparse, string, sys
	19
	20	assert sys.version_info[:2] >= ( 2, 4 )
	21
	22	def main():
	23
	24	# Parse command line
	25	parser = optparse.OptionParser( usage="%prog [options] " )
	26	parser.add_option( "-r", "--region", dest="region", default="transcribed",
	27	help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
	28	parser.add_option( "-e", "--exons", action="store_true", dest="exons",
	29	help="Only print intervals overlapping an exon" )
	30	parser.add_option( "-s", "--strand", action="store_true", dest="strand",
	31	help="Print strand after interval" )
	32	parser.add_option( "-i", "--input", dest="input", default=None,
	33	help="Input file" )
	34	parser.add_option( "-o", "--output", dest="output", default=None,
	35	help="Output file" )
	36	options, args = parser.parse_args()
	37	assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed', 'intron', 'codon' ), "Invalid region argument"
	38
	39	try:
	40	out_file = open (options.output,"w")
	41	except:
	42	print >> sys.stderr, "Bad output file."
	43	sys.exit(0)
	44
	45	try:
	46	in_file = open (options.input)
	47	except:
	48	print >> sys.stderr, "Bad input file."
	49	sys.exit(0)
	50
	51	print "Region:", options.region+";"
	52	"""print "Only overlap with Exons:",
	53	if options.exons:
	54	print "Yes"
	55	else:
	56	print "No"
	57	"""
	58
	59	# Read table and handle each gene
	60	for line in in_file:
	61	try:
	62	if line[0:1] == "#":
	63	continue
	64	# Parse fields from gene tabls
	65	fields = line.split( '\t' )
	66	chrom = fields[0]
	67	tx_start = int( fields[1] )
	68	tx_end = int( fields[2] )
	69	name = fields[3]
	70	strand = fields[5].replace(" ","_")
	71	cds_start = int( fields[6] )
	72	cds_end = int( fields[7] )
	73
	74	# Determine the subset of the transcribed region we are interested in
	75	if options.region == 'utr3':
	76	if strand == '-': region_start, region_end = tx_start, cds_start
	77	else: region_start, region_end = cds_end, tx_end
	78	elif options.region == 'utr5':
	79	if strand == '-': region_start, region_end = cds_end, tx_end
	80	else: region_start, region_end = tx_start, cds_start
	81	elif options.region == 'coding' or options.region == 'codon':
	82	region_start, region_end = cds_start, cds_end
	83	else:
	84	region_start, region_end = tx_start, tx_end
	85
	86	# If only interested in exons, print the portion of each exon overlapping
	87	# the region of interest, otherwise print the span of the region
	88	# options.exons is always TRUE
	89	if options.exons:
	90	exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
	91	exon_starts = map((lambda x: x + tx_start ), exon_starts)
	92	exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
	93	exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);
	94
	95	#for Intron regions:
	96	if options.region == 'intron':
	97	i=0
	98	while i < len(exon_starts)-1:
	99	intron_starts = exon_ends[i]
	100	intron_ends = exon_starts[i+1]
	101	if strand: print_tab_sep(out_file, chrom, intron_starts, intron_ends, name, "0", strand )
	102	else: print_tab_sep(out_file, chrom, intron_starts, intron_ends )
	103	i+=1
	104	#for non-intron regions:
	105	else:
	106	shift = 0
	107	for start, end in zip( exon_starts, exon_ends ):
	108	start = max( start, region_start )
	109	end = min( end, region_end )
	110	if start < end:
	111	if options.region == 'codon':
	112	start += shift
	113	c_start = start
	114	while c_start+3 <= end:
	115	if strand:
	116	print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
	117	else:
	118	print_tab_sep(out_file, chrom, c_start, c_start+3)
	119	c_start += 3
	120	shift = (3 - ((end-start)%3))%3
	121	else:
	122	if strand:
	123	print_tab_sep(out_file, chrom, start, end, name, "0", strand )
	124	else:
	125	print_tab_sep(out_file, chrom, start, end )
	126	"""
	127	else:
	128	if options.region == 'codon':
	129	c_start = start
	130	c_end = end
	131	if c_start > c_end:
	132	t = c_start
	133	c_start = c_end
	134	c_end = t
	135	while c_start+3 <= c_end:
	136	if strand:
	137	print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
	138	else:
	139	print_tab_sep(out_file, chrom, c_start, c_start+3)
	140	c_start += 3
	141	else:
	142	if strand:
	143	print_tab_sep(out_file, chrom, region_start, region_end, name, "0", strand )
	144	else:
	145	print_tab_sep(out_file, chrom, region_start, region_end )
	146	"""
	147	except:
	148	continue
	149
	150	def print_tab_sep(out_file, *args ):
	151	"""Print items in `l` to stdout separated by tabs"""
	152	print >>out_file, string.join( [ str( f ) for f in args ], '\t' )
	153
	154	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/filters/ucsc_gene_bed_to_exon_bed.py @ 2

異なるフォーマットでダウンロード: