Context Navigation

ucsc_gene_table_to_intervals.py @ 3

リビジョン 3, 3.4 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	#!/usr/bin/python2.6
2
3	"""
4	Read a table dump in the UCSC gene table format and print a tab separated
5	list of intervals corresponding to requested features of each gene.
6
7	usage: ucsc_gene_table_to_intervals.py [options] < gene_table.txt
8
9	options:
10	-h, --help show this help message and exit
11	-rREGION, --region=REGION
12	Limit to region: one of coding, utr3, utr5, transcribed [default]
13	-e, --exons Only print intervals overlapping an exon
14	"""
15
16	import optparse, string, sys
17
18	def main():
19	# Parse command line
20	parser = optparse.OptionParser( usage="%prog [options] < gene_table.txt" )
21	parser.add_option( "-r", "--region", dest="region", default="transcribed",
22	help="Limit to region: one of coding, utr3, utr5, transcribed [default]" )
23	parser.add_option( "-e", "--exons", action="store_true", dest="exons",
24	help="Only print intervals overlapping an exon" )
25	parser.add_option( "-s", "--strand", action="store_true", dest="strand",
26	help="Print strand after interval" )
27	parser.add_option( "-b", "--nobin", action="store_false", dest="discard_first_column", default=True,
28	help="file doesn't contain a 'bin' column (use this for pre-hg18 files)" )
29	options, args = parser.parse_args()
30	assert options.region in ( 'coding', 'utr3', 'utr5', 'transcribed' ), "Invalid region argument"
31
32	# Read table from stdin and handle each gene
33	for line in sys.stdin:
34
35	# Parse fields from gene tabls
36	fields = line.split( '\t' )
37	if (options.discard_first_column): fields.pop(0)
38	chrom = fields[1]
39	strand = fields[2]
40	tx_start = int( fields[3] )
41	tx_end = int( fields[4] )
42	cds_start = int( fields[5] )
43	cds_end = int( fields[6] )
44
45	# Determine the subset of the transcribed region we are interested in
46	if options.region == 'utr3':
47	if strand == '-': region_start, region_end = tx_start, cds_start
48	else: region_start, region_end = cds_end, tx_end
49	elif options.region == 'utr5':
50	if strand == '-': region_start, region_end = cds_end, tx_end
51	else: region_start, region_end = tx_start, cds_start
52	elif options.region == 'coding':
53	region_start, region_end = cds_start, cds_end
54	else:
55	region_start, region_end = tx_start, tx_end
56
57	# If only interested in exons, print the portion of each exon overlapping
58	# the region of interest, otherwise print the span of the region
59	if options.exons:
60	exon_starts = map( int, fields[8].rstrip( ',\n' ).split( ',' ) )
61	exon_ends = map( int, fields[9].rstrip( ',\n' ).split( ',' ) )
62	for start, end in zip( exon_starts, exon_ends ):
63	start = max( start, region_start )
64	end = min( end, region_end )
65	if start < end:
66	if strand: print_tab_sep( chrom, start, end, strand )
67	else: print_tab_sep( chrom, start, end )
68	else:
69	if strand: print_tab_sep( chrom, region_start, region_end, strand )
70	else: print_tab_sep( chrom, region_start, region_end )
71
72	def print_tab_sep( *args ):
73	"""Print items in `l` to stdout separated by tabs"""
74	print string.join( [ str( f ) for f in args ], '\t' )
75
76	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/ucsc_gene_table_to_intervals.py @ 3

異なるフォーマットでダウンロード: