Context Navigation

init.py

リビジョン 2, 17.4 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

行番号
1	"""
2	Utility functions used systemwide.
3
4	"""
5	import logging
6	import threading, random, string, re, binascii, pickle, time, datetime, math, re, os, sys, tempfile, stat, grp
7
8	# Older py compatibility
9	try:
10	set()
11	except:
12	from sets import Set as set
13
14	try:
15	from hashlib import md5
16	except ImportError:
17	from md5 import new as md5
18
19	import pkg_resources
20
21	pkg_resources.require( 'docutils' )
22	import docutils.core
23	from galaxy.util.docutils_ext.htmlfrag import Writer as HTMLFragWriter
24
25	pkg_resources.require( 'elementtree' )
26	from elementtree import ElementTree, ElementInclude
27
28	pkg_resources.require( "wchartype" )
29	import wchartype
30
31	log = logging.getLogger(__name__)
32	_lock = threading.RLock()
33
34	gzip_magic = '\037\213'
35	bz2_magic = 'BZh'
36
37	def is_multi_byte( chars ):
38	for char in chars:
39	try:
40	char = unicode( char )
41	except UnicodeDecodeError, e:
42	# Probably binary
43	return False
44	if wchartype.is_asian( char ) or \
45	wchartype.is_full_width( char ) or \
46	wchartype.is_kanji( char ) or \
47	wchartype.is_hiragana( char ) or \
48	wchartype.is_katakana( char ) or \
49	wchartype.is_half_katakana( char ) or \
50	wchartype.is_hangul( char ) or \
51	wchartype.is_full_digit( char ) or \
52	wchartype.is_full_letter( char ):
53	return True
54	return False
55
56	def synchronized(func):
57	"""This wrapper will serialize access to 'func' to a single thread. Use it as a decorator."""
58	def caller(params, *kparams):
59	_lock.acquire(True) # Wait
60	try:
61	return func(params, *kparams)
62	finally:
63	_lock.release()
64	return caller
65
66	def file_iter(fname, sep=None):
67	"""
68	This generator iterates over a file and yields its lines
69	splitted via the C{sep} parameter. Skips empty lines and lines starting with
70	the C{#} character.
71
72	>>> lines = [ line for line in file_iter(__file__) ]
73	>>> len(lines) != 0
74	True
75	"""
76	for line in file(fname):
77	if line and line[0] != '#':
78	yield line.split(sep)
79
80	def file_reader(fp, chunk_size=65536):
81	"""This generator yields the open fileobject in chunks (default 64k). Closes the file at the end"""
82	while 1:
83	data = fp.read(chunk_size)
84	if not data:
85	break
86	yield data
87	fp.close()
88
89	def unique_id(KEY_SIZE=128):
90	"""
91	Generates an unique id
92
93	>>> ids = [ unique_id() for i in range(1000) ]
94	>>> len(set(ids))
95	1000
96	"""
97	id = str( random.getrandbits( KEY_SIZE ) )
98	return md5(id).hexdigest()
99
100	def parse_xml(fname):
101	"""Returns a parsed xml tree"""
102	tree = ElementTree.parse(fname)
103	root = tree.getroot()
104	ElementInclude.include(root)
105	return tree
106
107	def xml_to_string(elem):
108	"""Returns an string from and xml tree"""
109	text = ElementTree.tostring(elem)
110	return text
111
112	# characters that are valid
113	valid_chars = set(string.letters + string.digits + " -=_.()/+*^,:?!")
114
115	# characters that are allowed but need to be escaped
116	mapped_chars = { '>' :'__gt__',
117	'<' :'__lt__',
118	"'" :'__sq__',
119	'"' :'__dq__',
120	'[' :'__ob__',
121	']' :'__cb__',
122	'{' :'__oc__',
123	'}' :'__cc__',
124	'@' : '__at__',
125	'\n' : '__cn__',
126	'\r' : '__cr__',
127	'\t' : '__tc__'
128	}
129
130	def restore_text(text):
131	"""Restores sanitized text"""
132	for key, value in mapped_chars.items():
133	text = text.replace(value, key)
134	return text
135
136	def sanitize_text(text):
137	"""Restricts the characters that are allowed in a text"""
138	out = []
139	for c in text:
140	if c in valid_chars:
141	out.append(c)
142	elif c in mapped_chars:
143	out.append(mapped_chars[c])
144	else:
145	out.append('X') # makes debugging easier
146	return ''.join(out)
147
148	def sanitize_param(value):
149	"""Clean incoming parameters (strings or lists)"""
150	if isinstance( value, basestring ):
151	return sanitize_text(value)
152	elif isinstance( value, list ):
153	return map(sanitize_text, value)
154	else:
155	print value
156	raise Exception, 'Unknown parameter type (%s)' % ( type( value ) )
157
158	class Params:
159	"""
160	Stores and 'sanitizes' parameters. Alphanumeric characters and the
161	non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}).
162	Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__}
163	(see L{mapped_chars}). All other characters are replaced with C{X}.
164
165	Operates on string or list values only (HTTP parameters).
166
167	>>> values = { 'status':'on', 'symbols':[ 'alpha', '<>', '$rm&#!' ] }
168	>>> par = Params(values)
169	>>> par.status
170	'on'
171	>>> par.value == None # missing attributes return None
172	True
173	>>> par.get('price', 0)
174	0
175	>>> par.symbols # replaces unknown symbols with X
176	['alpha', '__lt____gt__', 'XrmXX!']
177	>>> par.flatten() # flattening to a list
178	[('status', 'on'), ('symbols', 'alpha'), ('symbols', '__lt____gt__'), ('symbols', 'XrmXX!')]
179	"""
180
181	# is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers?
182	NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths']
183
184	def __init__( self, params, sanitize=True ):
185	if sanitize:
186	for key, value in params.items():
187	if key not in self.NEVER_SANITIZE and True not in [ key.endswith( "\|%s" % nonsanitize_parameter ) for nonsanitize_parameter in self.NEVER_SANITIZE ]: #sanitize check both ungrouped and grouped parameters by name. Anything relying on NEVER_SANITIZE should be changed to not require this and NEVER_SANITIZE should be removed.
188	self.__dict__[ key ] = sanitize_param( value )
189	else:
190	self.__dict__[ key ] = value
191	else:
192	self.__dict__.update(params)
193
194	def flatten(self):
195	"""
196	Creates a tuple list from a dict with a tuple/value pair for every value that is a list
197	"""
198	flat = []
199	for key, value in self.__dict__.items():
200	if type(value) == type([]):
201	for v in value:
202	flat.append( (key, v) )
203	else:
204	flat.append( (key, value) )
205	return flat
206
207	def __getattr__(self, name):
208	"""This is here to ensure that we get None for non existing parameters"""
209	return None
210
211	def get(self, key, default):
212	return self.__dict__.get(key, default)
213
214	def __str__(self):
215	return '%s' % self.__dict__
216
217	def __len__(self):
218	return len(self.__dict__)
219
220	def __iter__(self):
221	return iter(self.__dict__)
222
223	def update(self, values):
224	self.__dict__.update(values)
225
226	def rst_to_html( s ):
227	"""Convert a blob of reStructuredText to HTML"""
228	log = logging.getLogger( "docutils" )
229	class FakeStream( object ):
230	def write( self, str ):
231	if len( str ) > 0 and not str.isspace():
232	log.warn( str )
233	return docutils.core.publish_string( s, writer=HTMLFragWriter(), settings_overrides=dict( warning_stream=FakeStream() ) )
234
235	def xml_text(root, name=None):
236	"""Returns the text inside an element"""
237	if name is not None:
238	# Try attribute first
239	val = root.get(name)
240	if val:
241	return val
242	# Then try as element
243	elem = root.find(name)
244	else:
245	elem = root
246	if elem is not None and elem.text:
247	text = ''.join(elem.text.splitlines())
248	return text.strip()
249	# No luck, return empty string
250	return ''
251
252	def string_as_bool( string ):
253	if str( string ).lower() in ( 'true', 'yes', 'on' ):
254	return True
255	else:
256	return False
257
258	def listify( item ):
259	"""
260	Make a single item a single item list, or return a list if passed a
261	list. Passing a None returns an empty list.
262	"""
263	if not item:
264	return []
265	elif isinstance( item, list ):
266	return item
267	elif isinstance( item, basestring ) and item.count( ',' ):
268	return item.split( ',' )
269	else:
270	return [ item ]
271
272	def commaify(amount):
273	orig = amount
274	new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', amount)
275	if orig == new:
276	return new
277	else:
278	return commaify(new)
279
280	def object_to_string( obj ):
281	return binascii.hexlify( pickle.dumps( obj, 2 ) )
282
283	def string_to_object( s ):
284	return pickle.loads( binascii.unhexlify( s ) )
285
286	def get_ucsc_by_build(build):
287	sites = []
288	for site in ucsc_build_sites:
289	if build in site['builds']:
290	sites.append((site['name'],site['url']))
291	return sites
292	def get_gbrowse_sites_by_build(build):
293	sites = []
294	for site in gbrowse_build_sites:
295	if build in site['builds']:
296	sites.append((site['name'],site['url']))
297	return sites
298	def get_genetrack_sites():
299	sites = []
300	for site in genetrack_sites:
301	sites.append( ( site['name'], site['url'] ) )
302	return sites
303
304	def read_dbnames(filename):
305	""" Read build names from file """
306	class DBNames( list ):
307	default_value = "?"
308	default_name = "unspecified (?)"
309	db_names = DBNames()
310	try:
311	ucsc_builds = {}
312	man_builds = [] #assume these are integers
313	name_to_db_base = {}
314	for line in open(filename):
315	try:
316	if line[0:1] == "#": continue
317	fields = line.replace("\r","").replace("\n","").split("\t")
318	#Special case of unspecified build is at top of list
319	if fields[0] == "?":
320	db_names.insert(0,(fields[0],fields[1]))
321	continue
322	try: #manual build (i.e. microbes)
323	int(fields[0])
324	man_builds.append((fields[1], fields[0]))
325	except: #UCSC build
326	db_base = fields[0].rstrip('0123456789')
327	if db_base not in ucsc_builds:
328	ucsc_builds[db_base] = []
329	name_to_db_base[fields[1]] = db_base
330	#we want to sort within a species numerically by revision number
331	build_rev = re.compile(r'\d+$')
332	try: build_rev = int(build_rev.findall(fields[0])[0])
333	except: build_rev = 0
334	ucsc_builds[db_base].append((build_rev, fields[0],fields[1]))
335	except: continue
336	sort_names = name_to_db_base.keys()
337	sort_names.sort()
338	for name in sort_names:
339	db_base = name_to_db_base[name]
340	ucsc_builds[db_base].sort()
341	ucsc_builds[db_base].reverse()
342	ucsc_builds[db_base] = [(build, name) for build_rev, build, name in ucsc_builds[db_base]]
343	db_names = DBNames( db_names + ucsc_builds[db_base] )
344	if len( db_names ) > 1 and len( man_builds ) > 0: db_names.append( ( db_names.default_value, '----- Additional Species Are Below -----' ) )
345	man_builds.sort()
346	man_builds = [(build, name) for name, build in man_builds]
347	db_names = DBNames( db_names + man_builds )
348	except Exception, e:
349	print "ERROR: Unable to read builds file:", e
350	if len(db_names)<1:
351	db_names = DBNames( [( db_names.default_value, db_names.default_name )] )
352	return db_names
353
354	def read_build_sites( filename, check_builds=True ):
355	""" read db names to ucsc mappings from file, this file should probably be merged with the one above """
356	build_sites = []
357	try:
358	for line in open(filename):
359	try:
360	if line[0:1] == "#": continue
361	fields = line.replace("\r","").replace("\n","").split("\t")
362	site_name = fields[0]
363	site = fields[1]
364	if check_builds:
365	site_builds = fields[2].split(",")
366	site_dict = {'name':site_name, 'url':site, 'builds':site_builds}
367	else:
368	site_dict = {'name':site_name, 'url':site}
369	build_sites.append( site_dict )
370	except: continue
371	except:
372	print "ERROR: Unable to read builds for site file %s" %filename
373	return build_sites
374
375	def relpath( path, start = None ):
376	"""Return a relative version of a path"""
377	#modified from python 2.6.1 source code
378
379	#version 2.6+ has it built in, we'll use the 'official' copy
380	if sys.version_info[:2] >= ( 2, 6 ):
381	if start is not None:
382	return os.path.relpath( path, start )
383	return os.path.relpath( path )
384
385	#we need to initialize some local parameters
386	curdir = os.curdir
387	pardir = os.pardir
388	sep = os.sep
389	commonprefix = os.path.commonprefix
390	join = os.path.join
391	if start is None:
392	start = curdir
393
394	#below is the unedited (but formated) relpath() from posixpath.py of 2.6.1
395	#this will likely not function properly on non-posix systems, i.e. windows
396	if not path:
397	raise ValueError( "no path specified" )
398
399	start_list = os.path.abspath( start ).split( sep )
400	path_list = os.path.abspath( path ).split( sep )
401
402	# Work out how much of the filepath is shared by start and path.
403	i = len( commonprefix( [ start_list, path_list ] ) )
404
405	rel_list = [ pardir ] * ( len( start_list )- i ) + path_list[ i: ]
406	if not rel_list:
407	return curdir
408	return join( *rel_list )
409
410	def stringify_dictionary_keys( in_dict ):
411	#returns a new dictionary
412	#changes unicode keys into strings, only works on top level (does not recurse)
413	#unicode keys are not valid for expansion into keyword arguments on method calls
414	out_dict = {}
415	for key, value in in_dict.iteritems():
416	out_dict[ str( key ) ] = value
417	return out_dict
418
419	def recursively_stringify_dictionary_keys( d ):
420	if isinstance(d, dict):
421	return dict([(k.encode('utf-8'), recursively_stringify_dictionary_keys(v)) for k,v in d.iteritems()])
422	elif isinstance(d, list):
423	return [recursively_stringify_dictionary_keys(x) for x in d]
424	else:
425	return d
426
427	def mkstemp_ln( src, prefix='mkstemp_ln_' ):
428	"""
429	From tempfile._mkstemp_inner, generate a hard link in the same dir with a
430	random name. Created so we can persist the underlying file of a
431	NamedTemporaryFile upon its closure.
432	"""
433	dir = os.path.dirname(src)
434	names = tempfile._get_candidate_names()
435	for seq in xrange(tempfile.TMP_MAX):
436	name = names.next()
437	file = os.path.join(dir, prefix + name)
438	try:
439	linked_path = os.link( src, file )
440	return (os.path.abspath(file))
441	except OSError, e:
442	if e.errno == errno.EEXIST:
443	continue # try again
444	raise
445	raise IOError, (errno.EEXIST, "No usable temporary file name found")
446
447	def umask_fix_perms( path, umask, unmasked_perms, gid=None ):
448	"""
449	umask-friendly permissions fixing
450	"""
451	perms = unmasked_perms & ~umask
452	try:
453	st = os.stat( path )
454	except OSError, e:
455	log.exception( 'Unable to set permissions or group on %s' % path )
456	return
457	# fix modes
458	if stat.S_IMODE( st.st_mode ) != perms:
459	try:
460	os.chmod( path, perms )
461	except Exception, e:
462	log.warning( 'Unable to honor umask (%s) for %s, tried to set: %s but mode remains %s, error was: %s' % ( oct( umask ), \
463	path,
464	oct( perms ),
465	oct( stat.S_IMODE( st.st_mode ) ),
466	e ) )
467	# fix group
468	if gid is not None and st.st_gid != gid:
469	try:
470	os.chown( path, -1, gid )
471	except Exception, e:
472	try:
473	desired_group = grp.getgrgid( gid )
474	current_group = grp.getgrgid( st.st_gid )
475	except:
476	desired_group = gid
477	current_group = st.st_gid
478	log.warning( 'Unable to honor primary group (%s) for %s, group remains %s, error was: %s' % ( desired_group, \
479	path,
480	current_group,
481	e ) )
482
483	galaxy_root_path = os.path.join(__path__[0], "..","..","..")
484	# The dbnames list is used in edit attributes and the upload tool
485	dbnames = read_dbnames( os.path.join( galaxy_root_path, "tool-data", "shared", "ucsc", "builds.txt" ) )
486	ucsc_build_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "ucsc", "ucsc_build_sites.txt" ) )
487	gbrowse_build_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "gbrowse", "gbrowse_build_sites.txt" ) )
488	genetrack_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "genetrack", "genetrack_sites.txt" ), check_builds=False )
489
490	if __name__ == '__main__':
491	import doctest, sys
492	doctest.testmod(sys.modules[__name__], verbose=False)

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/lib/galaxy/util/__init__.py

異なるフォーマットでダウンロード:

root/galaxy-central/lib/galaxy/util/init.py