Context Navigation

ucsc_gene_bed_to_exon_bed.py @ 2

リビジョン 2, 6.5 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central
属性 svn:executable の設定値 ``*

行番号
1	#!/usr/bin/env python
2
3	"""
4	Read a table dump in the UCSC gene table format and print a tab separated
5	list of intervals corresponding to requested features of each gene.
6
7	usage: ucsc_gene_table_to_intervals.py [options]
8
9	options:
10	-h, --help show this help message and exit
11	-rREGION, --region=REGION
12	Limit to region: one of coding, utr3, utr5, codon, intron, transcribed [default]
13	-e, --exons Only print intervals overlapping an exon
14	-i, --input=inputfile input file
15	-o, --output=outputfile output file
16	"""
17
18	import optparse, string, sys
19
20	assert sys.version_info[:2] >= ( 2, 4 )
21
22	def main():
23
24	# Parse command line
25	parser = optparse.OptionParser( usage="%prog [options] " )
26	parser.add_option( "-r", "--region", dest="region", default="transcribed",
27	help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
28	parser.add_option( "-e", "--exons", action="store_true", dest="exons",
29	help="Only print intervals overlapping an exon" )
30	parser.add_option( "-s", "--strand", action="store_true", dest="strand",
31	help="Print strand after interval" )
32	parser.add_option( "-i", "--input", dest="input", default=None,
33	help="Input file" )
34	parser.add_option( "-o", "--output", dest="output", default=None,
35	help="Output file" )
36	options, args = parser.parse_args()
37	assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed', 'intron', 'codon' ), "Invalid region argument"
38
39	try:
40	out_file = open (options.output,"w")
41	except:
42	print >> sys.stderr, "Bad output file."
43	sys.exit(0)
44
45	try:
46	in_file = open (options.input)
47	except:
48	print >> sys.stderr, "Bad input file."
49	sys.exit(0)
50
51	print "Region:", options.region+";"
52	"""print "Only overlap with Exons:",
53	if options.exons:
54	print "Yes"
55	else:
56	print "No"
57	"""
58
59	# Read table and handle each gene
60	for line in in_file:
61	try:
62	if line[0:1] == "#":
63	continue
64	# Parse fields from gene tabls
65	fields = line.split( '\t' )
66	chrom = fields[0]
67	tx_start = int( fields[1] )
68	tx_end = int( fields[2] )
69	name = fields[3]
70	strand = fields[5].replace(" ","_")
71	cds_start = int( fields[6] )
72	cds_end = int( fields[7] )
73
74	# Determine the subset of the transcribed region we are interested in
75	if options.region == 'utr3':
76	if strand == '-': region_start, region_end = tx_start, cds_start
77	else: region_start, region_end = cds_end, tx_end
78	elif options.region == 'utr5':
79	if strand == '-': region_start, region_end = cds_end, tx_end
80	else: region_start, region_end = tx_start, cds_start
81	elif options.region == 'coding' or options.region == 'codon':
82	region_start, region_end = cds_start, cds_end
83	else:
84	region_start, region_end = tx_start, tx_end
85
86	# If only interested in exons, print the portion of each exon overlapping
87	# the region of interest, otherwise print the span of the region
88	# options.exons is always TRUE
89	if options.exons:
90	exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) )
91	exon_starts = map((lambda x: x + tx_start ), exon_starts)
92	exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) )
93	exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends);
94
95	#for Intron regions:
96	if options.region == 'intron':
97	i=0
98	while i < len(exon_starts)-1:
99	intron_starts = exon_ends[i]
100	intron_ends = exon_starts[i+1]
101	if strand: print_tab_sep(out_file, chrom, intron_starts, intron_ends, name, "0", strand )
102	else: print_tab_sep(out_file, chrom, intron_starts, intron_ends )
103	i+=1
104	#for non-intron regions:
105	else:
106	shift = 0
107	for start, end in zip( exon_starts, exon_ends ):
108	start = max( start, region_start )
109	end = min( end, region_end )
110	if start < end:
111	if options.region == 'codon':
112	start += shift
113	c_start = start
114	while c_start+3 <= end:
115	if strand:
116	print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
117	else:
118	print_tab_sep(out_file, chrom, c_start, c_start+3)
119	c_start += 3
120	shift = (3 - ((end-start)%3))%3
121	else:
122	if strand:
123	print_tab_sep(out_file, chrom, start, end, name, "0", strand )
124	else:
125	print_tab_sep(out_file, chrom, start, end )
126	"""
127	else:
128	if options.region == 'codon':
129	c_start = start
130	c_end = end
131	if c_start > c_end:
132	t = c_start
133	c_start = c_end
134	c_end = t
135	while c_start+3 <= c_end:
136	if strand:
137	print_tab_sep(out_file, chrom, c_start, c_start+3, name, "0", strand )
138	else:
139	print_tab_sep(out_file, chrom, c_start, c_start+3)
140	c_start += 3
141	else:
142	if strand:
143	print_tab_sep(out_file, chrom, region_start, region_end, name, "0", strand )
144	else:
145	print_tab_sep(out_file, chrom, region_start, region_end )
146	"""
147	except:
148	continue
149
150	def print_tab_sep(out_file, *args ):
151	"""Print items in `l` to stdout separated by tabs"""
152	print >>out_file, string.join( [ str( f ) for f in args ], '\t' )
153
154	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/filters/ucsc_gene_bed_to_exon_bed.py @ 2

異なるフォーマットでダウンロード: