Context Navigation

get_scores_galaxy.py

リビジョン 2, 5.1 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central
属性 svn:executable の設定値 ``*

行番号
1	#!/usr/bin/env python
2
3	"""
4	usage: %prog data_file.h5 region_mapping.bed in_file out_file chrom_col start_col end_col [options]
5	-p, --perCol: standardize to lod per column
6	"""
7
8	from __future__ import division
9
10	import sys
11	from galaxy import eggs
12	from numpy import *
13	from tables import *
14
15	import pkg_resources; pkg_resources.require( "bx-python" )
16	from bx.cookbook import doc_optparse
17
18	from bx import intervals
19
20	# ignore wanrnings about NumArray flavor
21	from warnings import filterwarnings
22	from tables.exceptions import FlavorWarning
23	filterwarnings("ignore", category=FlavorWarning)
24
25	assert sys.version_info[:2] >= ( 2, 4 )
26
27	def stop_err( msg ):
28	sys.stderr.write(msg)
29	sys.exit()
30
31	def main():
32	# Parse command line
33	options, args = doc_optparse.parse( __doc__ )
34	try:
35	h5_fname = args[0]
36	mapping_fname = args[1]
37	in_fname = args[2]
38	out_fname = args[3]
39	chrom_col, start_col, end_col = map( lambda x: int( x ) - 1, args[4:7] )
40	per_col = bool( options.perCol )
41	except Exception, e:
42	doc_optparse.exception()
43
44	if h5_fname == 'None.h5':
45	stop_err( 'Invalid genome build, this tool currently only works with data from build hg17. Click the pencil icon in your history item to correct the build if appropriate.' )
46
47	# Open the h5 file
48	h5 = openFile( h5_fname, mode = "r" )
49	# Load intervals and names for the subregions
50	intersecters = {}
51	for i, line in enumerate( file( mapping_fname ) ):
52	line = line.rstrip( '\r\n' )
53	if line and not line.startswith( '#' ):
54	chr, start, end, name = line.split()[0:4]
55	if not intersecters.has_key( chr ):
56	intersecters[ chr ] = intervals.Intersecter()
57	intersecters[ chr ].add_interval( intervals.Interval( int( start ), int( end ), name ) )
58
59	# Find the subregion containing each input interval
60	skipped_lines = 0
61	first_invalid_line = 0
62	invalid_line = ''
63	out_file = open( out_fname, "w" )
64	warnings = []
65	warning = ''
66	for i, line in enumerate( file( in_fname ) ):
67	line = line.rstrip( '\r\n' )
68	if line.startswith( '#' ):
69	if i == 0:
70	out_file.write( "%s\tscore\n" % line )
71	else:
72	out_file.write( "%s\n" % line )
73	fields = line.split( "\t" )
74	try:
75	chr = fields[ chrom_col ]
76	start = int( fields[ start_col ] )
77	end = int( fields[ end_col ] )
78	except:
79	warning = "Invalid value for chrom, start or end column."
80	warnings.append( warning )
81	skipped_lines += 1
82	if not invalid_line:
83	first_invalid_line = i + 1
84	invalid_line = line
85	continue
86	# Find matching interval
87	try:
88	matches = intersecters[ chr ].find( start, end )
89	except:
90	warning = "'%s' is not a valid chrom value for the region. " %chr
91	warnings.append( warning )
92	skipped_lines += 1
93	if not invalid_line:
94	first_invalid_line = i + 1
95	invalid_line = line
96	continue
97	if not len( matches ) == 1:
98	warning = "Interval must match exactly one target region. "
99	warnings.append( warning )
100	skipped_lines += 1
101	if not invalid_line:
102	first_invalid_line = i + 1
103	invalid_line = line
104	continue
105	region = matches[0]
106	if not ( start >= region.start and end <= region.end ):
107	warning = "Interval must fall entirely within region. "
108	warnings.append( warning )
109	skipped_lines += 1
110	if not invalid_line:
111	first_invalid_line = i + 1
112	invalid_line = line
113	continue
114	region_name = region.value
115	rel_start = start - region.start
116	rel_end = end - region.start
117	if not rel_start < rel_end:
118	warning = "Region %s is empty, relative start:%d, relative end:%d. " % ( region_name, rel_start, rel_end )
119	warnings.append( warning )
120	skipped_lines += 1
121	if not invalid_line:
122	first_invalid_line = i + 1
123	invalid_line = line
124	continue
125	s = h5.getNode( h5.root, "scores_" + region_name )
126	c = h5.getNode( h5.root, "counts_" + region_name )
127	score = s[rel_end-1]
128	count = c[rel_end-1]
129	if rel_start > 0:
130	score -= s[rel_start-1]
131	count -= c[rel_start-1]
132	if per_col:
133	score /= count
134	fields.append( str( score ) )
135	out_file.write( "%s\n" % "\t".join( fields ) )
136	# Close the file handle
137	h5.close()
138	out_file.close()
139
140	if warnings:
141	warn_msg = "PhastOdds scores are only available for ENCODE regions. %d warnings, 1st is: " % len( warnings )
142	warn_msg += warnings[0]
143	print warn_msg
144	if skipped_lines:
145	print 'Skipped %d invalid lines, 1st is #%d, "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
146
147	if __name__ == "__main__": main()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/extract/phastOdds/get_scores_galaxy.py

異なるフォーマットでダウンロード: