Context Navigation

data.py

リビジョン 2, 21.6 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

行番号
1	import logging, os, sys, time, tempfile
2	from galaxy import util
3	from galaxy.util.odict import odict
4	from galaxy.util.bunch import Bunch
5	from cgi import escape
6	import metadata
7	import zipfile
8	from metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions
9
10	log = logging.getLogger(__name__)
11
12	# Valid first column and strand column values vor bed, other formats
13	col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho']
14	valid_strand = ['+', '-', '.']
15
16	class DataMeta( type ):
17	"""
18	Metaclass for Data class. Sets up metadata spec.
19	"""
20	def __init__( cls, name, bases, dict_ ):
21	cls.metadata_spec = metadata.MetadataSpecCollection()
22	for base in bases: #loop through bases (class/types) of cls
23	if hasattr( base, "metadata_spec" ): #base of class Data (object) has no metadata
24	cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls
25	metadata.Statement.process( cls )
26
27	class Data( object ):
28	"""
29	Base class for all datatypes. Implements basic interfaces as well
30	as class methods for metadata.
31
32	>>> class DataTest( Data ):
33	... MetadataElement( name="test" )
34	...
35	>>> DataTest.metadata_spec.test.name
36	'test'
37	>>> DataTest.metadata_spec.test.desc
38	'test'
39	>>> type( DataTest.metadata_spec.test.param )
40	<class 'galaxy.datatypes.metadata.MetadataParameter'>
41
42	"""
43	__metaclass__ = DataMeta
44	# Add metadata elements
45	MetadataElement( name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?" )
46	# Stores the set of display applications, and viewing methods, supported by this datatype
47	supported_display_apps = {}
48	# If False, the peek is regenerated whenever a dataset of this type is copied
49	copy_safe_peek = True
50	# The dataset contains binary data --> do not space_to_tab or convert newlines, etc.
51	# Allow binary file uploads of this type when True.
52	is_binary = True
53	# Allow user to change between this datatype and others. If False, this datatype
54	# cannot be changed from or into.
55	allow_datatype_change = True
56	#Composite datatypes
57	composite_type = None
58	composite_files = odict()
59	primary_file_name = 'index'
60	#A per datatype setting (inherited): max file size (in bytes) for setting optional metadata
61	_max_optional_metadata_filesize = None
62
63	def __init__(self, **kwd):
64	"""Initialize the datatype"""
65	object.__init__(self, **kwd)
66	self.supported_display_apps = self.supported_display_apps.copy()
67	self.composite_files = self.composite_files.copy()
68	self.display_applications = odict()
69	def write_from_stream(self, dataset, stream):
70	"""Writes data from a stream"""
71	fd = open(dataset.file_name, 'wb')
72	while 1:
73	chunk = stream.read(1048576)
74	if not chunk:
75	break
76	os.write(fd, chunk)
77	os.close(fd)
78	def set_raw_data(self, dataset, data):
79	"""Saves the data on the disc"""
80	fd = open(dataset.file_name, 'wb')
81	os.write(fd, data)
82	os.close(fd)
83	def get_raw_data( self, dataset ):
84	"""Returns the full data. To stream it open the file_name and read/write as needed"""
85	try:
86	return file(datset.file_name, 'rb').read(-1)
87	except OSError, e:
88	log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name))
89	return ''
90	def groom_dataset_content( self, file_name ):
91	"""This function is called on an output dataset file after the content is initially generated."""
92	pass
93	def init_meta( self, dataset, copy_from=None ):
94	# Metadata should be left mostly uninitialized. Dataset will
95	# handle returning default values when metadata is not set.
96	# copy_from allows metadata to be passed in that will be
97	# copied. (although this seems ambiguous, see
98	# Dataset.set_metadata. It always copies the rhs in order to
99	# flag the object as modified for SQLAlchemy.
100	if copy_from:
101	dataset.metadata = copy_from.metadata
102	def set_meta( self, dataset, overwrite = True, **kwd ):
103	"""Unimplemented method, allows guessing of metadata from contents of file"""
104	return True
105	def missing_meta( self, dataset, check = [], skip = [] ):
106	"""
107	Checks for empty metadata values, Returns True if non-optional metadata is missing
108	Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored
109	Specifying a list of 'skip' items will return True even when a named metadata value is missing
110	"""
111	if check:
112	to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ]
113	else:
114	to_check = dataset.metadata.items()
115	for key, value in to_check:
116	if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ):
117	continue #we skip check for optional and nonrequested values here
118	if not value:
119	return True
120	return False
121	def set_max_optional_metadata_filesize( self, max_value ):
122	try:
123	max_value = int( max_value )
124	except:
125	return
126	self.__class__._max_optional_metadata_filesize = max_value
127	def get_max_optional_metadata_filesize( self ):
128	rval = self.__class__._max_optional_metadata_filesize
129	if rval is None:
130	return -1
131	return rval
132	max_optional_metadata_filesize = property( get_max_optional_metadata_filesize, set_max_optional_metadata_filesize )
133	def set_peek( self, dataset, is_multi_byte=False ):
134	"""Set the peek and blurb text"""
135	if not dataset.dataset.purged:
136	dataset.peek = ''
137	dataset.blurb = 'data'
138	else:
139	dataset.peek = 'file does not exist'
140	dataset.blurb = 'file purged from disk'
141	def display_peek(self, dataset ):
142	"""Create HTML table, used for displaying peek"""
143	out = ['<table cellspacing="0" cellpadding="3">']
144	try:
145	if not dataset.peek:
146	dataset.set_peek()
147	data = dataset.peek
148	lines = data.splitlines()
149	for line in lines:
150	line = line.strip()
151	if not line:
152	continue
153	if type( line ) is unicode:
154	out.append( '<tr><td>%s</td></tr>' % escape( line ) )
155	else:
156	out.append( '<tr><td>%s</td></tr>' % escape( unicode( line, 'utf-8' ) ) )
157	out.append( '</table>' )
158	out = "".join( out )
159	except Exception, exc:
160	out = "Can't create peek %s" % str( exc )
161	return out
162	def display_name(self, dataset):
163	"""Returns formatted html of dataset name"""
164	try:
165	if type ( dataset.name ) is unicode:
166	return escape( dataset.name )
167	else:
168	return escape( unicode( dataset.name, 'utf-8 ') )
169	except:
170	return "name unavailable"
171	def display_info(self, dataset):
172	"""Returns formatted html of dataset info"""
173	try:
174	# Change new line chars to html
175	info = escape( dataset.info )
176	if info.find( '\r\n' ) >= 0:
177	info = info.replace( '\r\n', '<br/>' )
178	if info.find( '\r' ) >= 0:
179	info = info.replace( '\r', '<br/>' )
180	if info.find( '\n' ) >= 0:
181	info = info.replace( '\n', '<br/>' )
182
183	# Convert to unicode to display non-ascii characters.
184	if type( info ) is not unicode:
185	info = unicode( info, 'utf-8')
186
187	return info
188	except:
189	return "info unavailable"
190	def validate(self, dataset):
191	"""Unimplemented validate, return no exceptions"""
192	return list()
193	def repair_methods(self, dataset):
194	"""Unimplemented method, returns dict with method/option for repairing errors"""
195	return None
196	def get_mime(self):
197	"""Returns the mime type of the datatype"""
198	return 'application/octet-stream'
199	def add_display_app ( self, app_id, label, file_function, links_function ):
200	"""
201	Adds a display app to the datatype.
202	app_id is a unique id
203	label is the primary display label, e.g., display at 'UCSC'
204	file_function is a string containing the name of the function that returns a properly formatted display
205	links_function is a string containing the name of the function that returns a list of (link_name,link)
206	"""
207	self.supported_display_apps = self.supported_display_apps.copy()
208	self.supported_display_apps[app_id] = {'label':label,'file_function':file_function,'links_function':links_function}
209	def remove_display_app (self, app_id):
210	"""Removes a display app from the datatype"""
211	self.supported_display_apps = self.supported_display_apps.copy()
212	try:
213	del self.supported_display_apps[app_id]
214	except:
215	log.exception('Tried to remove display app %s from datatype %s, but this display app is not declared.' % ( type, self.__class__.__name__ ) )
216	def clear_display_apps( self ):
217	self.supported_display_apps = {}
218	def add_display_application( self, display_application ):
219	"""New style display applications"""
220	assert display_application.id not in self.display_applications, 'Attempted to add a display application twice'
221	self.display_applications[ display_application.id ] = display_application
222	def get_display_application( self, key, default = None ):
223	return self.display_applications.get( key, default )
224	def get_display_applications_by_dataset( self, dataset, trans ):
225	rval = odict()
226	for key, value in self.display_applications.iteritems():
227	value = value.filter_by_dataset( dataset, trans )
228	if value.links:
229	rval[key] = value
230	return rval
231	def get_display_types(self):
232	"""Returns display types available"""
233	return self.supported_display_apps.keys()
234	def get_display_label(self, type):
235	"""Returns primary label for display app"""
236	try:
237	return self.supported_display_apps[type]['label']
238	except:
239	return 'unknown'
240	def as_display_type(self, dataset, type, **kwd):
241	"""Returns modified file contents for a particular display type """
242	try:
243	if type in self.get_display_types():
244	return getattr (self, self.supported_display_apps[type]['file_function']) (dataset, **kwd)
245	except:
246	log.exception('Function %s is referred to in datatype %s for displaying as type %s, but is not accessible' % (self.supported_display_apps[type]['file_function'], self.__class__.__name__, type) )
247	return "This display type (%s) is not implemented for this datatype (%s)." % ( type, dataset.ext)
248	def get_display_links( self, dataset, type, app, base_url, target_frame='_blank', **kwd ):
249	"""
250	Returns a list of tuples of (name, link) for a particular display type. No check on
251	'access' permissions is done here - if you can view the dataset, you can also save it
252	or send it to a destination outside of Galaxy, so Galaxy security restrictions do not
253	apply anyway.
254	"""
255	try:
256	if type in self.get_display_types():
257	return target_frame, getattr ( self, self.supported_display_apps[type]['links_function'] ) ( dataset, type, app, base_url, **kwd )
258	except:
259	log.exception( 'Function %s is referred to in datatype %s for generating links for type %s, but is not accessible' \
260	% ( self.supported_display_apps[type]['links_function'], self.__class__.__name__, type ) )
261	return []
262	def get_converter_types(self, original_dataset, datatypes_registry):
263	"""Returns available converters by type for this dataset"""
264	return datatypes_registry.get_converters_by_datatype(original_dataset.ext)
265	def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ):
266	"""Returns ( target_ext, existing converted dataset )"""
267	return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd )
268	def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True, deps=None):
269	"""This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure."""
270	converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type )
271
272	if converter is None:
273	raise Exception( "A converter does not exist for %s to %s." % ( original_dataset.ext, target_type ) )
274	#Generate parameter dictionary
275	params = {}
276	#determine input parameter name and add to params
277	input_name = 'input1'
278	for key, value in converter.inputs.items():
279	if (deps) and (value.name in deps):
280	params[value.name] = deps[value.name]
281	elif value.type == 'data':
282	input_name = key
283
284	params[input_name] = original_dataset
285	#Run converter, job is dispatched through Queue
286	converted_dataset = converter.execute( trans, incoming = params, set_output_hid = visible )[1]
287	if len(params) > 0:
288	trans.log_event( "Converter params: %s" % (str(params)), tool_id=converter.id )
289	if not visible:
290	for name, value in converted_dataset.iteritems():
291	value.visible = False
292	if return_output:
293	return converted_dataset
294	return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid)
295	#We need to clear associated files before we set metadata
296	#so that as soon as metadata starts to be set, e.g. implicitly converted datasets are deleted and no longer available 'while' metadata is being set, not just after
297	#We'll also clear after setting metadata, for backwards compatibility
298	def after_setting_metadata( self, dataset ):
299	"""This function is called on the dataset after metadata is set."""
300	dataset.clear_associated_files( metadata_safe = True )
301	def before_setting_metadata( self, dataset ):
302	"""This function is called on the dataset before metadata is set."""
303	dataset.clear_associated_files( metadata_safe = True )
304	def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, is_binary = False, space_to_tab = False, **kwds ):
305	kwds[ 'name' ] = name
306	kwds[ 'optional' ] = optional
307	kwds[ 'mimetype' ] = mimetype
308	kwds[ 'description' ] = description
309	kwds[ 'substitute_name_with_metadata' ] = substitute_name_with_metadata
310	kwds[ 'is_binary' ] = is_binary
311	kwds[ 'space_to_tab' ] = space_to_tab
312	return Bunch( **kwds )
313	def add_composite_file( self, name, **kwds ):
314	#self.composite_files = self.composite_files.copy()
315	self.composite_files[ name ] = self.__new_composite_file( name, **kwds )
316	def __substitute_composite_key( self, key, composite_file, dataset = None ):
317	if composite_file.substitute_name_with_metadata:
318	if dataset:
319	meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) )
320	else:
321	meta_value = self.spec[composite_file.substitute_name_with_metadata].default
322	return key % meta_value
323	return key
324	@property
325	def writable_files( self, dataset = None ):
326	files = odict()
327	if self.composite_type != 'auto_primary_file':
328	files[ self.primary_file_name ] = self.__new_composite_file( self.primary_file_name )
329	for key, value in self.get_composite_files( dataset = dataset ).iteritems():
330	files[ key ] = value
331	return files
332	def get_composite_files( self, dataset = None ):
333	def substitute_composite_key( key, composite_file ):
334	if composite_file.substitute_name_with_metadata:
335	if dataset:
336	meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) )
337	else:
338	meta_value = self.metadata_spec[ composite_file.substitute_name_with_metadata ].default
339	return key % meta_value
340	return key
341	files = odict()
342	for key, value in self.composite_files.iteritems():
343	files[ substitute_composite_key( key, value ) ] = value
344	return files
345	def generate_auto_primary_file( self, dataset = None ):
346	raise Exception( "generate_auto_primary_file is not implemented for this datatype." )
347	@property
348	def has_resolution(self):
349	return False
350
351	class Text( Data ):
352	file_ext = 'txt'
353
354	"""Add metadata elements"""
355	MetadataElement( name="data_lines", default=0, desc="Number of data lines", readonly=True, optional=True, visible=False, no_value=0 )
356
357	def write_from_stream(self, dataset, stream):
358	"""Writes data from a stream"""
359	# write it twice for now
360	fd, temp_name = tempfile.mkstemp()
361	while 1:
362	chunk = stream.read(1048576)
363	if not chunk:
364	break
365	os.write(fd, chunk)
366	os.close(fd)
367	# rewrite the file with unix newlines
368	fp = open(dataset.file_name, 'wt')
369	for line in file(temp_name, "U"):
370	line = line.strip() + '\n'
371	fp.write(line)
372	fp.close()
373	def set_raw_data(self, dataset, data):
374	"""Saves the data on the disc"""
375	fd, temp_name = tempfile.mkstemp()
376	os.write(fd, data)
377	os.close(fd)
378	# rewrite the file with unix newlines
379	fp = open(dataset.file_name, 'wt')
380	for line in file(temp_name, "U"):
381	line = line.strip() + '\n'
382	fp.write(line)
383	fp.close()
384	os.remove( temp_name )
385	def get_mime(self):
386	"""Returns the mime type of the datatype"""
387	return 'text/plain'
388	def set_meta( self, dataset, **kwd ):
389	"""
390	Set the number of lines of data in dataset,
391	skipping all blank lines and comments.
392	"""
393	data_lines = 0
394	for line in file( dataset.file_name ):
395	line = line.strip()
396	if line and not line.startswith( '#' ):
397	data_lines += 1
398	dataset.metadata.data_lines = data_lines
399	def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
400	if not dataset.dataset.purged:
401	# The file must exist on disk for the get_file_peek() method
402	dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
403	if line_count is None:
404	# See if line_count is stored in the metadata
405	if dataset.metadata.data_lines:
406	dataset.blurb = "%s lines" % util.commaify( str( dataset.metadata.data_lines ) )
407	else:
408	# Number of lines is not known ( this should not happen ), and auto-detect is
409	# needed to set metadata
410	dataset.blurb = "? lines"
411	else:
412	dataset.blurb = "%s lines" % util.commaify( str( line_count ) )
413	else:
414	dataset.peek = 'file does not exist'
415	dataset.blurb = 'file purged from disk'
416
417	class Newick( Text ):
418	pass
419
420	# ------------- Utility methods --------------
421
422	def get_test_fname( fname ):
423	"""Returns test data filename"""
424	path, name = os.path.split(__file__)
425	full_path = os.path.join( path, 'test', fname )
426	return full_path
427	def nice_size(size):
428	"""
429	Returns a readably formatted string with the size
430
431	>>> nice_size(100)
432	'100.0 bytes'
433	>>> nice_size(10000)
434	'9.8 Kb'
435	>>> nice_size(1000000)
436	'976.6 Kb'
437	>>> nice_size(100000000)
438	'95.4 Mb'
439	"""
440	words = [ 'bytes', 'Kb', 'Mb', 'Gb' ]
441	try:
442	size = float( size )
443	except:
444	return '??? bytes'
445	for ind, word in enumerate(words):
446	step = 1024 ** (ind + 1)
447	if step > size:
448	size = size / float(1024 ** ind)
449	out = "%.1f %s" % (size, word)
450	return out
451	return '??? bytes'
452	def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ):
453	"""
454	Returns the first LINE_COUNT lines wrapped to WIDTH
455
456	## >>> fname = get_test_fname('4.bed')
457	## >>> get_file_peek(fname)
458	## 'chr22 30128507 31828507 uc003bnx.1_cds_2_0_chr22_29227_f 0 +\n'
459	"""
460	lines = []
461	count = 0
462	file_type = None
463	data_checked = False
464	temp = open( file_name, "U" )
465	while count <= LINE_COUNT:
466	line = temp.readline( WIDTH )
467	if line and not is_multi_byte and not data_checked:
468	# See if we have a compressed or binary file
469	if line[0:2] == util.gzip_magic:
470	file_type = 'gzipped'
471	break
472	else:
473	for char in line:
474	if ord( char ) > 128:
475	file_type = 'binary'
476	break
477	data_checked = True
478	if file_type in [ 'gzipped', 'binary' ]:
479	break
480	lines.append( line )
481	count += 1
482	temp.close()
483	if file_type in [ 'gzipped', 'binary' ]:
484	text = "%s file" % file_type
485	else:
486	try:
487	text = unicode( '\n'.join( lines ), 'utf-8' )
488	except UnicodeDecodeError:
489	text = "binary/unknown file"
490	return text

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/lib/galaxy/datatypes/data.py

異なるフォーマットでダウンロード: