Context Navigation

data.py @ 2

リビジョン 2, 21.6 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

Rev	行番号
[2]	1	import logging, os, sys, time, tempfile
	2	from galaxy import util
	3	from galaxy.util.odict import odict
	4	from galaxy.util.bunch import Bunch
	5	from cgi import escape
	6	import metadata
	7	import zipfile
	8	from metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions
	9
	10	log = logging.getLogger(__name__)
	11
	12	# Valid first column and strand column values vor bed, other formats
	13	col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho']
	14	valid_strand = ['+', '-', '.']
	15
	16	class DataMeta( type ):
	17	"""
	18	Metaclass for Data class. Sets up metadata spec.
	19	"""
	20	def __init__( cls, name, bases, dict_ ):
	21	cls.metadata_spec = metadata.MetadataSpecCollection()
	22	for base in bases: #loop through bases (class/types) of cls
	23	if hasattr( base, "metadata_spec" ): #base of class Data (object) has no metadata
	24	cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls
	25	metadata.Statement.process( cls )
	26
	27	class Data( object ):
	28	"""
	29	Base class for all datatypes. Implements basic interfaces as well
	30	as class methods for metadata.
	31
	32	>>> class DataTest( Data ):
	33	... MetadataElement( name="test" )
	34	...
	35	>>> DataTest.metadata_spec.test.name
	36	'test'
	37	>>> DataTest.metadata_spec.test.desc
	38	'test'
	39	>>> type( DataTest.metadata_spec.test.param )
	40	<class 'galaxy.datatypes.metadata.MetadataParameter'>
	41
	42	"""
	43	__metaclass__ = DataMeta
	44	# Add metadata elements
	45	MetadataElement( name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?" )
	46	# Stores the set of display applications, and viewing methods, supported by this datatype
	47	supported_display_apps = {}
	48	# If False, the peek is regenerated whenever a dataset of this type is copied
	49	copy_safe_peek = True
	50	# The dataset contains binary data --> do not space_to_tab or convert newlines, etc.
	51	# Allow binary file uploads of this type when True.
	52	is_binary = True
	53	# Allow user to change between this datatype and others. If False, this datatype
	54	# cannot be changed from or into.
	55	allow_datatype_change = True
	56	#Composite datatypes
	57	composite_type = None
	58	composite_files = odict()
	59	primary_file_name = 'index'
	60	#A per datatype setting (inherited): max file size (in bytes) for setting optional metadata
	61	_max_optional_metadata_filesize = None
	62
	63	def __init__(self, **kwd):
	64	"""Initialize the datatype"""
	65	object.__init__(self, **kwd)
	66	self.supported_display_apps = self.supported_display_apps.copy()
	67	self.composite_files = self.composite_files.copy()
	68	self.display_applications = odict()
	69	def write_from_stream(self, dataset, stream):
	70	"""Writes data from a stream"""
	71	fd = open(dataset.file_name, 'wb')
	72	while 1:
	73	chunk = stream.read(1048576)
	74	if not chunk:
	75	break
	76	os.write(fd, chunk)
	77	os.close(fd)
	78	def set_raw_data(self, dataset, data):
	79	"""Saves the data on the disc"""
	80	fd = open(dataset.file_name, 'wb')
	81	os.write(fd, data)
	82	os.close(fd)
	83	def get_raw_data( self, dataset ):
	84	"""Returns the full data. To stream it open the file_name and read/write as needed"""
	85	try:
	86	return file(datset.file_name, 'rb').read(-1)
	87	except OSError, e:
	88	log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name))
	89	return ''
	90	def groom_dataset_content( self, file_name ):
	91	"""This function is called on an output dataset file after the content is initially generated."""
	92	pass
	93	def init_meta( self, dataset, copy_from=None ):
	94	# Metadata should be left mostly uninitialized. Dataset will
	95	# handle returning default values when metadata is not set.
	96	# copy_from allows metadata to be passed in that will be
	97	# copied. (although this seems ambiguous, see
	98	# Dataset.set_metadata. It always copies the rhs in order to
	99	# flag the object as modified for SQLAlchemy.
	100	if copy_from:
	101	dataset.metadata = copy_from.metadata
	102	def set_meta( self, dataset, overwrite = True, **kwd ):
	103	"""Unimplemented method, allows guessing of metadata from contents of file"""
	104	return True
	105	def missing_meta( self, dataset, check = [], skip = [] ):
	106	"""
	107	Checks for empty metadata values, Returns True if non-optional metadata is missing
	108	Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored
	109	Specifying a list of 'skip' items will return True even when a named metadata value is missing
	110	"""
	111	if check:
	112	to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ]
	113	else:
	114	to_check = dataset.metadata.items()
	115	for key, value in to_check:
	116	if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ):
	117	continue #we skip check for optional and nonrequested values here
	118	if not value:
	119	return True
	120	return False
	121	def set_max_optional_metadata_filesize( self, max_value ):
	122	try:
	123	max_value = int( max_value )
	124	except:
	125	return
	126	self.__class__._max_optional_metadata_filesize = max_value
	127	def get_max_optional_metadata_filesize( self ):
	128	rval = self.__class__._max_optional_metadata_filesize
	129	if rval is None:
	130	return -1
	131	return rval
	132	max_optional_metadata_filesize = property( get_max_optional_metadata_filesize, set_max_optional_metadata_filesize )
	133	def set_peek( self, dataset, is_multi_byte=False ):
	134	"""Set the peek and blurb text"""
	135	if not dataset.dataset.purged:
	136	dataset.peek = ''
	137	dataset.blurb = 'data'
	138	else:
	139	dataset.peek = 'file does not exist'
	140	dataset.blurb = 'file purged from disk'
	141	def display_peek(self, dataset ):
	142	"""Create HTML table, used for displaying peek"""
	143	out = ['<table cellspacing="0" cellpadding="3">']
	144	try:
	145	if not dataset.peek:
	146	dataset.set_peek()
	147	data = dataset.peek
	148	lines = data.splitlines()
	149	for line in lines:
	150	line = line.strip()
	151	if not line:
	152	continue
	153	if type( line ) is unicode:
	154	out.append( '<tr><td>%s</td></tr>' % escape( line ) )
	155	else:
	156	out.append( '<tr><td>%s</td></tr>' % escape( unicode( line, 'utf-8' ) ) )
	157	out.append( '</table>' )
	158	out = "".join( out )
	159	except Exception, exc:
	160	out = "Can't create peek %s" % str( exc )
	161	return out
	162	def display_name(self, dataset):
	163	"""Returns formatted html of dataset name"""
	164	try:
	165	if type ( dataset.name ) is unicode:
	166	return escape( dataset.name )
	167	else:
	168	return escape( unicode( dataset.name, 'utf-8 ') )
	169	except:
	170	return "name unavailable"
	171	def display_info(self, dataset):
	172	"""Returns formatted html of dataset info"""
	173	try:
	174	# Change new line chars to html
	175	info = escape( dataset.info )
	176	if info.find( '\r\n' ) >= 0:
	177	info = info.replace( '\r\n', '<br/>' )
	178	if info.find( '\r' ) >= 0:
	179	info = info.replace( '\r', '<br/>' )
	180	if info.find( '\n' ) >= 0:
	181	info = info.replace( '\n', '<br/>' )
	182
	183	# Convert to unicode to display non-ascii characters.
	184	if type( info ) is not unicode:
	185	info = unicode( info, 'utf-8')
	186
	187	return info
	188	except:
	189	return "info unavailable"
	190	def validate(self, dataset):
	191	"""Unimplemented validate, return no exceptions"""
	192	return list()
	193	def repair_methods(self, dataset):
	194	"""Unimplemented method, returns dict with method/option for repairing errors"""
	195	return None
	196	def get_mime(self):
	197	"""Returns the mime type of the datatype"""
	198	return 'application/octet-stream'
	199	def add_display_app ( self, app_id, label, file_function, links_function ):
	200	"""
	201	Adds a display app to the datatype.
	202	app_id is a unique id
	203	label is the primary display label, e.g., display at 'UCSC'
	204	file_function is a string containing the name of the function that returns a properly formatted display
	205	links_function is a string containing the name of the function that returns a list of (link_name,link)
	206	"""
	207	self.supported_display_apps = self.supported_display_apps.copy()
	208	self.supported_display_apps[app_id] = {'label':label,'file_function':file_function,'links_function':links_function}
	209	def remove_display_app (self, app_id):
	210	"""Removes a display app from the datatype"""
	211	self.supported_display_apps = self.supported_display_apps.copy()
	212	try:
	213	del self.supported_display_apps[app_id]
	214	except:
	215	log.exception('Tried to remove display app %s from datatype %s, but this display app is not declared.' % ( type, self.__class__.__name__ ) )
	216	def clear_display_apps( self ):
	217	self.supported_display_apps = {}
	218	def add_display_application( self, display_application ):
	219	"""New style display applications"""
	220	assert display_application.id not in self.display_applications, 'Attempted to add a display application twice'
	221	self.display_applications[ display_application.id ] = display_application
	222	def get_display_application( self, key, default = None ):
	223	return self.display_applications.get( key, default )
	224	def get_display_applications_by_dataset( self, dataset, trans ):
	225	rval = odict()
	226	for key, value in self.display_applications.iteritems():
	227	value = value.filter_by_dataset( dataset, trans )
	228	if value.links:
	229	rval[key] = value
	230	return rval
	231	def get_display_types(self):
	232	"""Returns display types available"""
	233	return self.supported_display_apps.keys()
	234	def get_display_label(self, type):
	235	"""Returns primary label for display app"""
	236	try:
	237	return self.supported_display_apps[type]['label']
	238	except:
	239	return 'unknown'
	240	def as_display_type(self, dataset, type, **kwd):
	241	"""Returns modified file contents for a particular display type """
	242	try:
	243	if type in self.get_display_types():
	244	return getattr (self, self.supported_display_apps[type]['file_function']) (dataset, **kwd)
	245	except:
	246	log.exception('Function %s is referred to in datatype %s for displaying as type %s, but is not accessible' % (self.supported_display_apps[type]['file_function'], self.__class__.__name__, type) )
	247	return "This display type (%s) is not implemented for this datatype (%s)." % ( type, dataset.ext)
	248	def get_display_links( self, dataset, type, app, base_url, target_frame='_blank', **kwd ):
	249	"""
	250	Returns a list of tuples of (name, link) for a particular display type. No check on
	251	'access' permissions is done here - if you can view the dataset, you can also save it
	252	or send it to a destination outside of Galaxy, so Galaxy security restrictions do not
	253	apply anyway.
	254	"""
	255	try:
	256	if type in self.get_display_types():
	257	return target_frame, getattr ( self, self.supported_display_apps[type]['links_function'] ) ( dataset, type, app, base_url, **kwd )
	258	except:
	259	log.exception( 'Function %s is referred to in datatype %s for generating links for type %s, but is not accessible' \
	260	% ( self.supported_display_apps[type]['links_function'], self.__class__.__name__, type ) )
	261	return []
	262	def get_converter_types(self, original_dataset, datatypes_registry):
	263	"""Returns available converters by type for this dataset"""
	264	return datatypes_registry.get_converters_by_datatype(original_dataset.ext)
	265	def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ):
	266	"""Returns ( target_ext, existing converted dataset )"""
	267	return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd )
	268	def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True, deps=None):
	269	"""This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure."""
	270	converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type )
	271
	272	if converter is None:
	273	raise Exception( "A converter does not exist for %s to %s." % ( original_dataset.ext, target_type ) )
	274	#Generate parameter dictionary
	275	params = {}
	276	#determine input parameter name and add to params
	277	input_name = 'input1'
	278	for key, value in converter.inputs.items():
	279	if (deps) and (value.name in deps):
	280	params[value.name] = deps[value.name]
	281	elif value.type == 'data':
	282	input_name = key
	283
	284	params[input_name] = original_dataset
	285	#Run converter, job is dispatched through Queue
	286	converted_dataset = converter.execute( trans, incoming = params, set_output_hid = visible )[1]
	287	if len(params) > 0:
	288	trans.log_event( "Converter params: %s" % (str(params)), tool_id=converter.id )
	289	if not visible:
	290	for name, value in converted_dataset.iteritems():
	291	value.visible = False
	292	if return_output:
	293	return converted_dataset
	294	return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid)
	295	#We need to clear associated files before we set metadata
	296	#so that as soon as metadata starts to be set, e.g. implicitly converted datasets are deleted and no longer available 'while' metadata is being set, not just after
	297	#We'll also clear after setting metadata, for backwards compatibility
	298	def after_setting_metadata( self, dataset ):
	299	"""This function is called on the dataset after metadata is set."""
	300	dataset.clear_associated_files( metadata_safe = True )
	301	def before_setting_metadata( self, dataset ):
	302	"""This function is called on the dataset before metadata is set."""
	303	dataset.clear_associated_files( metadata_safe = True )
	304	def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, is_binary = False, space_to_tab = False, **kwds ):
	305	kwds[ 'name' ] = name
	306	kwds[ 'optional' ] = optional
	307	kwds[ 'mimetype' ] = mimetype
	308	kwds[ 'description' ] = description
	309	kwds[ 'substitute_name_with_metadata' ] = substitute_name_with_metadata
	310	kwds[ 'is_binary' ] = is_binary
	311	kwds[ 'space_to_tab' ] = space_to_tab
	312	return Bunch( **kwds )
	313	def add_composite_file( self, name, **kwds ):
	314	#self.composite_files = self.composite_files.copy()
	315	self.composite_files[ name ] = self.__new_composite_file( name, **kwds )
	316	def __substitute_composite_key( self, key, composite_file, dataset = None ):
	317	if composite_file.substitute_name_with_metadata:
	318	if dataset:
	319	meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) )
	320	else:
	321	meta_value = self.spec[composite_file.substitute_name_with_metadata].default
	322	return key % meta_value
	323	return key
	324	@property
	325	def writable_files( self, dataset = None ):
	326	files = odict()
	327	if self.composite_type != 'auto_primary_file':
	328	files[ self.primary_file_name ] = self.__new_composite_file( self.primary_file_name )
	329	for key, value in self.get_composite_files( dataset = dataset ).iteritems():
	330	files[ key ] = value
	331	return files
	332	def get_composite_files( self, dataset = None ):
	333	def substitute_composite_key( key, composite_file ):
	334	if composite_file.substitute_name_with_metadata:
	335	if dataset:
	336	meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) )
	337	else:
	338	meta_value = self.metadata_spec[ composite_file.substitute_name_with_metadata ].default
	339	return key % meta_value
	340	return key
	341	files = odict()
	342	for key, value in self.composite_files.iteritems():
	343	files[ substitute_composite_key( key, value ) ] = value
	344	return files
	345	def generate_auto_primary_file( self, dataset = None ):
	346	raise Exception( "generate_auto_primary_file is not implemented for this datatype." )
	347	@property
	348	def has_resolution(self):
	349	return False
	350
	351	class Text( Data ):
	352	file_ext = 'txt'
	353
	354	"""Add metadata elements"""
	355	MetadataElement( name="data_lines", default=0, desc="Number of data lines", readonly=True, optional=True, visible=False, no_value=0 )
	356
	357	def write_from_stream(self, dataset, stream):
	358	"""Writes data from a stream"""
	359	# write it twice for now
	360	fd, temp_name = tempfile.mkstemp()
	361	while 1:
	362	chunk = stream.read(1048576)
	363	if not chunk:
	364	break
	365	os.write(fd, chunk)
	366	os.close(fd)
	367	# rewrite the file with unix newlines
	368	fp = open(dataset.file_name, 'wt')
	369	for line in file(temp_name, "U"):
	370	line = line.strip() + '\n'
	371	fp.write(line)
	372	fp.close()
	373	def set_raw_data(self, dataset, data):
	374	"""Saves the data on the disc"""
	375	fd, temp_name = tempfile.mkstemp()
	376	os.write(fd, data)
	377	os.close(fd)
	378	# rewrite the file with unix newlines
	379	fp = open(dataset.file_name, 'wt')
	380	for line in file(temp_name, "U"):
	381	line = line.strip() + '\n'
	382	fp.write(line)
	383	fp.close()
	384	os.remove( temp_name )
	385	def get_mime(self):
	386	"""Returns the mime type of the datatype"""
	387	return 'text/plain'
	388	def set_meta( self, dataset, **kwd ):
	389	"""
	390	Set the number of lines of data in dataset,
	391	skipping all blank lines and comments.
	392	"""
	393	data_lines = 0
	394	for line in file( dataset.file_name ):
	395	line = line.strip()
	396	if line and not line.startswith( '#' ):
	397	data_lines += 1
	398	dataset.metadata.data_lines = data_lines
	399	def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
	400	if not dataset.dataset.purged:
	401	# The file must exist on disk for the get_file_peek() method
	402	dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
	403	if line_count is None:
	404	# See if line_count is stored in the metadata
	405	if dataset.metadata.data_lines:
	406	dataset.blurb = "%s lines" % util.commaify( str( dataset.metadata.data_lines ) )
	407	else:
	408	# Number of lines is not known ( this should not happen ), and auto-detect is
	409	# needed to set metadata
	410	dataset.blurb = "? lines"
	411	else:
	412	dataset.blurb = "%s lines" % util.commaify( str( line_count ) )
	413	else:
	414	dataset.peek = 'file does not exist'
	415	dataset.blurb = 'file purged from disk'
	416
	417	class Newick( Text ):
	418	pass
	419
	420	# ------------- Utility methods --------------
	421
	422	def get_test_fname( fname ):
	423	"""Returns test data filename"""
	424	path, name = os.path.split(__file__)
	425	full_path = os.path.join( path, 'test', fname )
	426	return full_path
	427	def nice_size(size):
	428	"""
	429	Returns a readably formatted string with the size
	430
	431	>>> nice_size(100)
	432	'100.0 bytes'
	433	>>> nice_size(10000)
	434	'9.8 Kb'
	435	>>> nice_size(1000000)
	436	'976.6 Kb'
	437	>>> nice_size(100000000)
	438	'95.4 Mb'
	439	"""
	440	words = [ 'bytes', 'Kb', 'Mb', 'Gb' ]
	441	try:
	442	size = float( size )
	443	except:
	444	return '??? bytes'
	445	for ind, word in enumerate(words):
	446	step = 1024 ** (ind + 1)
	447	if step > size:
	448	size = size / float(1024 ** ind)
	449	out = "%.1f %s" % (size, word)
	450	return out
	451	return '??? bytes'
	452	def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ):
	453	"""
	454	Returns the first LINE_COUNT lines wrapped to WIDTH
	455
	456	## >>> fname = get_test_fname('4.bed')
	457	## >>> get_file_peek(fname)
	458	## 'chr22 30128507 31828507 uc003bnx.1_cds_2_0_chr22_29227_f 0 +\n'
	459	"""
	460	lines = []
	461	count = 0
	462	file_type = None
	463	data_checked = False
	464	temp = open( file_name, "U" )
	465	while count <= LINE_COUNT:
	466	line = temp.readline( WIDTH )
	467	if line and not is_multi_byte and not data_checked:
	468	# See if we have a compressed or binary file
	469	if line[0:2] == util.gzip_magic:
	470	file_type = 'gzipped'
	471	break
	472	else:
	473	for char in line:
	474	if ord( char ) > 128:
	475	file_type = 'binary'
	476	break
	477	data_checked = True
	478	if file_type in [ 'gzipped', 'binary' ]:
	479	break
	480	lines.append( line )
	481	count += 1
	482	temp.close()
	483	if file_type in [ 'gzipped', 'binary' ]:
	484	text = "%s file" % file_type
	485	else:
	486	try:
	487	text = unicode( '\n'.join( lines ), 'utf-8' )
	488	except UnicodeDecodeError:
	489	text = "binary/unknown file"
	490	return text

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/lib/galaxy/datatypes/data.py @ 2

異なるフォーマットでダウンロード: