Context Navigation

0005_cleanup_datasets_fix.py @ 2

リビジョン 2, 36.6 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	import sys, logging, os, time, datetime, errno
	2
	3	log = logging.getLogger( __name__ )
	4	log.setLevel(logging.DEBUG)
	5	handler = logging.StreamHandler( sys.stdout )
	6	format = "%(name)s %(levelname)s %(asctime)s %(message)s"
	7	formatter = logging.Formatter( format )
	8	handler.setFormatter( formatter )
	9	log.addHandler( handler )
	10
	11	from migrate import migrate_engine
	12	from sqlalchemy import and_
	13
	14	from sqlalchemy import *
	15	now = datetime.datetime.utcnow
	16	from sqlalchemy.orm import *
	17
	18	from galaxy.model.orm.ext.assignmapper import assign_mapper
	19
	20	from galaxy.model.custom_types import *
	21
	22	from galaxy.util.bunch import Bunch
	23
	24
	25	metadata = MetaData( migrate_engine )
	26	context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
	27
	28
	29	## classes
	30	def get_permitted_actions( **kwds ):
	31	return Bunch()
	32
	33	def directory_hash_id( id ):
	34	s = str( id )
	35	l = len( s )
	36	# Shortcut -- ids 0-999 go under ../000/
	37	if l < 4:
	38	return [ "000" ]
	39	# Pad with zeros until a multiple of three
	40	padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
	41	# Drop the last three digits -- 1000 files per directory
	42	padded = padded[:-3]
	43	# Break into chunks of three
	44	return [ padded[i3:(i+1)3] for i in range( len( padded ) // 3 ) ]
	45
	46
	47	class Dataset( object ):
	48	states = Bunch( NEW = 'new',
	49	UPLOAD = 'upload',
	50	QUEUED = 'queued',
	51	RUNNING = 'running',
	52	OK = 'ok',
	53	EMPTY = 'empty',
	54	ERROR = 'error',
	55	DISCARDED = 'discarded' )
	56	permitted_actions = get_permitted_actions( filter='DATASET' )
	57	file_path = "/tmp/"
	58	engine = None
	59	def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
	60	self.id = id
	61	self.state = state
	62	self.deleted = False
	63	self.purged = False
	64	self.purgable = purgable
	65	self.external_filename = external_filename
	66	self._extra_files_path = extra_files_path
	67	self.file_size = file_size
	68	def get_file_name( self ):
	69	if not self.external_filename:
	70	assert self.id is not None, "ID must be set before filename used (commit the object)"
	71	# First try filename directly under file_path
	72	filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
	73	# Only use that filename if it already exists (backward compatibility),
	74	# otherwise construct hashed path
	75	if not os.path.exists( filename ):
	76	dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
	77	# Create directory if it does not exist
	78	try:
	79	os.makedirs( dir )
	80	except OSError, e:
	81	# File Exists is okay, otherwise reraise
	82	if e.errno != errno.EEXIST:
	83	raise
	84	# Return filename inside hashed directory
	85	return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
	86	else:
	87	filename = self.external_filename
	88	# Make filename absolute
	89	return os.path.abspath( filename )
	90	def set_file_name ( self, filename ):
	91	if not filename:
	92	self.external_filename = None
	93	else:
	94	self.external_filename = filename
	95	file_name = property( get_file_name, set_file_name )
	96	@property
	97	def extra_files_path( self ):
	98	if self._extra_files_path:
	99	path = self._extra_files_path
	100	else:
	101	path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
	102	#only use path directly under self.file_path if it exists
	103	if not os.path.exists( path ):
	104	path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
	105	# Make path absolute
	106	return os.path.abspath( path )
	107	def get_size( self ):
	108	"""Returns the size of the data on disk"""
	109	if self.file_size:
	110	return self.file_size
	111	else:
	112	try:
	113	return os.path.getsize( self.file_name )
	114	except OSError:
	115	return 0
	116	def set_size( self ):
	117	"""Returns the size of the data on disk"""
	118	try:
	119	if not self.file_size:
	120	self.file_size = os.path.getsize( self.file_name )
	121	except OSError:
	122	self.file_size = 0
	123	def has_data( self ):
	124	"""Detects whether there is any data"""
	125	return self.get_size() > 0
	126	def mark_deleted( self, include_children=True ):
	127	self.deleted = True
	128	# FIXME: sqlalchemy will replace this
	129	def _delete(self):
	130	"""Remove the file that corresponds to this data"""
	131	try:
	132	os.remove(self.data.file_name)
	133	except OSError, e:
	134	log.critical('%s delete error %s' % (self.__class__.__name__, e))
	135
	136	class DatasetInstance( object ):
	137	"""A base class for all 'dataset instances', HDAs, LDAs, etc"""
	138	states = Dataset.states
	139	permitted_actions = Dataset.permitted_actions
	140	def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
	141	dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
	142	parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
	143	self.name = name or "Unnamed dataset"
	144	self.id = id
	145	self.info = info
	146	self.blurb = blurb
	147	self.peek = peek
	148	self.extension = extension
	149	self.designation = designation
	150	self.metadata = metadata or dict()
	151	if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
	152	self.dbkey = dbkey
	153	self.deleted = deleted
	154	self.visible = visible
	155	# Relationships
	156	if not dataset and create_dataset:
	157	dataset = Dataset( state=Dataset.states.NEW )
	158	context.add( dataset )
	159	context.flush()
	160	self.dataset = dataset
	161	self.parent_id = parent_id
	162	self.validation_errors = validation_errors
	163	@property
	164	def ext( self ):
	165	return self.extension
	166	def get_dataset_state( self ):
	167	return self.dataset.state
	168	def set_dataset_state ( self, state ):
	169	self.dataset.state = state
	170	context.add( self.dataset )
	171	context.flush() #flush here, because hda.flush() won't flush the Dataset object
	172	state = property( get_dataset_state, set_dataset_state )
	173	def get_file_name( self ):
	174	return self.dataset.get_file_name()
	175	def set_file_name (self, filename):
	176	return self.dataset.set_file_name( filename )
	177	file_name = property( get_file_name, set_file_name )
	178	@property
	179	def extra_files_path( self ):
	180	return self.dataset.extra_files_path
	181	@property
	182	def datatype( self ):
	183	return datatypes_registry.get_datatype_by_extension( self.extension )
	184	def get_metadata( self ):
	185	if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
	186	self._metadata_collection = MetadataCollection( self )
	187	return self._metadata_collection
	188	def set_metadata( self, bunch ):
	189	# Needs to accept a MetadataCollection, a bunch, or a dict
	190	self._metadata = self.metadata.make_dict_copy( bunch )
	191	metadata = property( get_metadata, set_metadata )
	192	# This provide backwards compatibility with using the old dbkey
	193	# field in the database. That field now maps to "old_dbkey" (see mapping.py).
	194	def get_dbkey( self ):
	195	dbkey = self.metadata.dbkey
	196	if not isinstance(dbkey, list): dbkey = [dbkey]
	197	if dbkey in [[None], []]: return "?"
	198	return dbkey[0]
	199	def set_dbkey( self, value ):
	200	if "dbkey" in self.datatype.metadata_spec:
	201	if not isinstance(value, list):
	202	self.metadata.dbkey = [value]
	203	else:
	204	self.metadata.dbkey = value
	205	dbkey = property( get_dbkey, set_dbkey )
	206	def change_datatype( self, new_ext ):
	207	self.clear_associated_files()
	208	datatypes_registry.change_datatype( self, new_ext )
	209	def get_size( self ):
	210	"""Returns the size of the data on disk"""
	211	return self.dataset.get_size()
	212	def set_size( self ):
	213	"""Returns the size of the data on disk"""
	214	return self.dataset.set_size()
	215	def has_data( self ):
	216	"""Detects whether there is any data"""
	217	return self.dataset.has_data()
	218	def get_raw_data( self ):
	219	"""Returns the full data. To stream it open the file_name and read/write as needed"""
	220	return self.datatype.get_raw_data( self )
	221	def write_from_stream( self, stream ):
	222	"""Writes data from a stream"""
	223	self.datatype.write_from_stream(self, stream)
	224	def set_raw_data( self, data ):
	225	"""Saves the data on the disc"""
	226	self.datatype.set_raw_data(self, data)
	227	def get_mime( self ):
	228	"""Returns the mime type of the data"""
	229	return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
	230	def set_peek( self, is_multi_byte=False ):
	231	return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
	232	def init_meta( self, copy_from=None ):
	233	return self.datatype.init_meta( self, copy_from=copy_from )
	234	def set_meta( self, **kwd ):
	235	self.clear_associated_files( metadata_safe = True )
	236	return self.datatype.set_meta( self, **kwd )
	237	def missing_meta( self, **kwd ):
	238	return self.datatype.missing_meta( self, **kwd )
	239	def as_display_type( self, type, **kwd ):
	240	return self.datatype.as_display_type( self, type, **kwd )
	241	def display_peek( self ):
	242	return self.datatype.display_peek( self )
	243	def display_name( self ):
	244	return self.datatype.display_name( self )
	245	def display_info( self ):
	246	return self.datatype.display_info( self )
	247	def get_converted_files_by_type( self, file_type ):
	248	valid = []
	249	for assoc in self.implicitly_converted_datasets:
	250	if not assoc.deleted and assoc.type == file_type:
	251	valid.append( assoc.dataset )
	252	return valid
	253	def clear_associated_files( self, metadata_safe = False, purge = False ):
	254	raise 'Unimplemented'
	255	def get_child_by_designation(self, designation):
	256	for child in self.children:
	257	if child.designation == designation:
	258	return child
	259	return None
	260	def get_converter_types(self):
	261	return self.datatype.get_converter_types( self, datatypes_registry)
	262	def find_conversion_destination( self, accepted_formats, **kwd ):
	263	"""Returns ( target_ext, exisiting converted dataset )"""
	264	return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
	265	def add_validation_error( self, validation_error ):
	266	self.validation_errors.append( validation_error )
	267	def extend_validation_errors( self, validation_errors ):
	268	self.validation_errors.extend(validation_errors)
	269	def mark_deleted( self, include_children=True ):
	270	self.deleted = True
	271	if include_children:
	272	for child in self.children:
	273	child.mark_deleted()
	274	def mark_undeleted( self, include_children=True ):
	275	self.deleted = False
	276	if include_children:
	277	for child in self.children:
	278	child.mark_undeleted()
	279	def undeletable( self ):
	280	if self.purged:
	281	return False
	282	return True
	283	@property
	284	def source_library_dataset( self ):
	285	def get_source( dataset ):
	286	if isinstance( dataset, LibraryDatasetDatasetAssociation ):
	287	if dataset.library_dataset:
	288	return ( dataset, dataset.library_dataset )
	289	if dataset.copied_from_library_dataset_dataset_association:
	290	source = get_source( dataset.copied_from_library_dataset_dataset_association )
	291	if source:
	292	return source
	293	if dataset.copied_from_history_dataset_association:
	294	source = get_source( dataset.copied_from_history_dataset_association )
	295	if source:
	296	return source
	297	return ( None, None )
	298	return get_source( self )
	299
	300
	301	class HistoryDatasetAssociation( DatasetInstance ):
	302	def __init__( self,
	303	hid = None,
	304	history = None,
	305	copied_from_history_dataset_association = None,
	306	copied_from_library_dataset_dataset_association = None,
	307	**kwd ):
	308	DatasetInstance.__init__( self, **kwd )
	309	self.hid = hid
	310	# Relationships
	311	self.history = history
	312	self.copied_from_history_dataset_association = copied_from_history_dataset_association
	313	self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
	314	def copy( self, copy_children = False, parent_id = None, target_history = None ):
	315	hda = HistoryDatasetAssociation( hid=self.hid,
	316	name=self.name,
	317	info=self.info,
	318	blurb=self.blurb,
	319	peek=self.peek,
	320	extension=self.extension,
	321	dbkey=self.dbkey,
	322	dataset = self.dataset,
	323	visible=self.visible,
	324	deleted=self.deleted,
	325	parent_id=parent_id,
	326	copied_from_history_dataset_association=self,
	327	history = target_history )
	328	context.add( hda )
	329	context.flush()
	330	hda.set_size()
	331	# Need to set after flushed, as MetadataFiles require dataset.id
	332	hda.metadata = self.metadata
	333	if copy_children:
	334	for child in self.children:
	335	child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
	336	if not self.datatype.copy_safe_peek:
	337	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
	338	hda.set_peek()
	339	context.flush()
	340	return hda
	341	def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
	342	if replace_dataset:
	343	# The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
	344	library_dataset = replace_dataset
	345	else:
	346	# If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
	347	# LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
	348	library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
	349	context.add( library_dataset )
	350	context.flush()
	351	ldda = LibraryDatasetDatasetAssociation( name=self.name,
	352	info=self.info,
	353	blurb=self.blurb,
	354	peek=self.peek,
	355	extension=self.extension,
	356	dbkey=self.dbkey,
	357	dataset=self.dataset,
	358	library_dataset=library_dataset,
	359	visible=self.visible,
	360	deleted=self.deleted,
	361	parent_id=parent_id,
	362	copied_from_history_dataset_association=self,
	363	user=self.history.user )
	364	context.add( ldda )
	365	context.flush()
	366	# Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
	367	# Must set metadata after ldda flushed, as MetadataFiles require ldda.id
	368	ldda.metadata = self.metadata
	369	if not replace_dataset:
	370	target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
	371	context.add( target_folder )
	372	context.flush()
	373	library_dataset.library_dataset_dataset_association_id = ldda.id
	374	context.add( library_dataset )
	375	context.flush()
	376	for child in self.children:
	377	child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
	378	if not self.datatype.copy_safe_peek:
	379	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
	380	ldda.set_peek()
	381	context.flush()
	382	return ldda
	383	def clear_associated_files( self, metadata_safe = False, purge = False ):
	384	# metadata_safe = True means to only clear when assoc.metadata_safe == False
	385	for assoc in self.implicitly_converted_datasets:
	386	if not metadata_safe or not assoc.metadata_safe:
	387	assoc.clear( purge = purge )
	388
	389
	390
	391	class LibraryDatasetDatasetAssociation( DatasetInstance ):
	392	def __init__( self,
	393	copied_from_history_dataset_association=None,
	394	copied_from_library_dataset_dataset_association=None,
	395	library_dataset=None,
	396	user=None,
	397	**kwd ):
	398	DatasetInstance.__init__( self, **kwd )
	399	self.copied_from_history_dataset_association = copied_from_history_dataset_association
	400	self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
	401	self.library_dataset = library_dataset
	402	self.user = user
	403	def to_history_dataset_association( self, target_history, parent_id=None ):
	404	hid = target_history._next_hid()
	405	hda = HistoryDatasetAssociation( name=self.name,
	406	info=self.info,
	407	blurb=self.blurb,
	408	peek=self.peek,
	409	extension=self.extension,
	410	dbkey=self.dbkey,
	411	dataset=self.dataset,
	412	visible=self.visible,
	413	deleted=self.deleted,
	414	parent_id=parent_id,
	415	copied_from_library_dataset_dataset_association=self,
	416	history=target_history,
	417	hid=hid )
	418	context.flush()
	419	hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
	420	for child in self.children:
	421	child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
	422	if not self.datatype.copy_safe_peek:
	423	hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
	424	context.add( hda )
	425	context.flush()
	426	return hda
	427	def copy( self, copy_children = False, parent_id = None, target_folder = None ):
	428	ldda = LibraryDatasetDatasetAssociation( name=self.name,
	429	info=self.info,
	430	blurb=self.blurb,
	431	peek=self.peek,
	432	extension=self.extension,
	433	dbkey=self.dbkey,
	434	dataset=self.dataset,
	435	visible=self.visible,
	436	deleted=self.deleted,
	437	parent_id=parent_id,
	438	copied_from_library_dataset_dataset_association=self,
	439	folder=target_folder )
	440	context.add( ldda )
	441	context.flush()
	442	# Need to set after flushed, as MetadataFiles require dataset.id
	443	ldda.metadata = self.metadata
	444	if copy_children:
	445	for child in self.children:
	446	child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
	447	if not self.datatype.copy_safe_peek:
	448	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
	449	ldda.set_peek()
	450	context.flush()
	451	return ldda
	452	def clear_associated_files( self, metadata_safe = False, purge = False ):
	453	return
	454	def get_library_item_info_templates( self, template_list=[], restrict=False ):
	455	# If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
	456	if self.library_dataset_dataset_info_template_associations:
	457	template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
	458	self.library_dataset.get_library_item_info_templates( template_list, restrict )
	459	return template_list
	460
	461
	462
	463	class LibraryDataset( object ):
	464	# This class acts as a proxy to the currently selected LDDA
	465	def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
	466	self.folder = folder
	467	self.order_id = order_id
	468	self.name = name
	469	self.info = info
	470	self.library_dataset_dataset_association = library_dataset_dataset_association
	471	def set_library_dataset_dataset_association( self, ldda ):
	472	self.library_dataset_dataset_association = ldda
	473	ldda.library_dataset = self
	474	context.add_all( ( self, ldda ) )
	475	context.flush()
	476	def get_info( self ):
	477	if self.library_dataset_dataset_association:
	478	return self.library_dataset_dataset_association.info
	479	elif self._info:
	480	return self._info
	481	else:
	482	return 'no info'
	483	def set_info( self, info ):
	484	self._info = info
	485	info = property( get_info, set_info )
	486	def get_name( self ):
	487	if self.library_dataset_dataset_association:
	488	return self.library_dataset_dataset_association.name
	489	elif self._name:
	490	return self._name
	491	else:
	492	return 'Unnamed dataset'
	493	def set_name( self, name ):
	494	self._name = name
	495	name = property( get_name, set_name )
	496	def display_name( self ):
	497	self.library_dataset_dataset_association.display_name()
	498	def get_purged( self ):
	499	return self.library_dataset_dataset_association.dataset.purged
	500	def set_purged( self, purged ):
	501	if purged:
	502	raise Exception( "Not implemented" )
	503	if not purged and self.purged:
	504	raise Exception( "Cannot unpurge once purged" )
	505	purged = property( get_purged, set_purged )
	506	def get_library_item_info_templates( self, template_list=[], restrict=False ):
	507	# If restrict is True, we'll return only those templates directly associated with this LibraryDataset
	508	if self.library_dataset_info_template_associations:
	509	template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
	510	if restrict not in [ 'True', True ]:
	511	self.folder.get_library_item_info_templates( template_list, restrict )
	512	return template_list
	513
	514	##tables
	515
	516
	517	Dataset.table = Table( "dataset", metadata,
	518	Column( "id", Integer, primary_key=True ),
	519	Column( "create_time", DateTime, default=now ),
	520	Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
	521	Column( "state", TrimmedString( 64 ) ),
	522	Column( "deleted", Boolean, index=True, default=False ),
	523	Column( "purged", Boolean, index=True, default=False ),
	524	Column( "purgable", Boolean, default=True ),
	525	Column( "external_filename" , TEXT ),
	526	Column( "_extra_files_path", TEXT ),
	527	Column( 'file_size', Numeric( 15, 0 ) ) )
	528
	529
	530
	531	HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
	532	Column( "id", Integer, primary_key=True ),
	533	Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
	534	Column( "create_time", DateTime, default=now ),
	535	Column( "update_time", DateTime, default=now, onupdate=now ),
	536	Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
	537	Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
	538	Column( "hid", Integer ),
	539	Column( "name", TrimmedString( 255 ) ),
	540	Column( "info", TrimmedString( 255 ) ),
	541	Column( "blurb", TrimmedString( 255 ) ),
	542	Column( "peek" , TEXT ),
	543	Column( "extension", TrimmedString( 64 ) ),
	544	Column( "metadata", MetadataType(), key="_metadata" ),
	545	Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
	546	Column( "designation", TrimmedString( 255 ) ),
	547	Column( "deleted", Boolean, index=True, default=False ),
	548	Column( "visible", Boolean ) )
	549
	550
	551	LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
	552	Column( "id", Integer, primary_key=True ),
	553	Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
	554	Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
	555	Column( "create_time", DateTime, default=now ),
	556	Column( "update_time", DateTime, default=now, onupdate=now ),
	557	Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
	558	Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
	559	Column( "name", TrimmedString( 255 ) ),
	560	Column( "info", TrimmedString( 255 ) ),
	561	Column( "blurb", TrimmedString( 255 ) ),
	562	Column( "peek" , TEXT ),
	563	Column( "extension", TrimmedString( 64 ) ),
	564	Column( "metadata", MetadataType(), key="_metadata" ),
	565	Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
	566	Column( "designation", TrimmedString( 255 ) ),
	567	Column( "deleted", Boolean, index=True, default=False ),
	568	Column( "visible", Boolean ),
	569	Column( "message", TrimmedString( 255 ) ) )
	570
	571	LibraryDataset.table = Table( "library_dataset", metadata,
	572	Column( "id", Integer, primary_key=True ),
	573	Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
	574	Column( "order_id", Integer ),
	575	Column( "create_time", DateTime, default=now ),
	576	Column( "update_time", DateTime, default=now, onupdate=now ),
	577	Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
	578	Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
	579	Column( "deleted", Boolean, index=True, default=False ) )
	580
	581
	582
	583	##mappers
	584
	585
	586	assign_mapper( context, Dataset, Dataset.table,
	587	properties=dict(
	588	history_associations=relation(
	589	HistoryDatasetAssociation,
	590	primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
	591	active_history_associations=relation(
	592	HistoryDatasetAssociation,
	593	primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
	594	library_associations=relation(
	595	LibraryDatasetDatasetAssociation,
	596	primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
	597	active_library_associations=relation(
	598	LibraryDatasetDatasetAssociation,
	599	primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
	600	) )
	601
	602
	603	assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table,
	604	properties=dict(
	605	dataset=relation(
	606	Dataset,
	607	primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
	608	# .history defined in History mapper
	609	copied_to_history_dataset_associations=relation(
	610	HistoryDatasetAssociation,
	611	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
	612	backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
	613	copied_to_library_dataset_dataset_associations=relation(
	614	LibraryDatasetDatasetAssociation,
	615	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
	616	backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
	617	children=relation(
	618	HistoryDatasetAssociation,
	619	primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
	620	backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
	621	visible_children=relation(
	622	HistoryDatasetAssociation,
	623	primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
	624	) )
	625
	626	assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
	627	properties=dict(
	628	dataset=relation( Dataset ),
	629	library_dataset = relation( LibraryDataset,
	630	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
	631	copied_to_library_dataset_dataset_associations=relation(
	632	LibraryDatasetDatasetAssociation,
	633	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
	634	backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
	635	copied_to_history_dataset_associations=relation(
	636	HistoryDatasetAssociation,
	637	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
	638	backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
	639	children=relation(
	640	LibraryDatasetDatasetAssociation,
	641	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
	642	backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
	643	visible_children=relation(
	644	LibraryDatasetDatasetAssociation,
	645	primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
	646	) )
	647
	648	assign_mapper( context, LibraryDataset, LibraryDataset.table,
	649	properties=dict(
	650	library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
	651	expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
	652	) )
	653
	654
	655	def __guess_dataset_by_filename( filename ):
	656	"""Return a guessed dataset by filename"""
	657	try:
	658	fields = os.path.split( filename )
	659	if fields:
	660	if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
	661	return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
	662	except:
	663	pass #some parsing error, we can't guess Dataset
	664	return None
	665
	666	def upgrade():
	667	log.debug( "Fixing a discrepancy concerning deleted shared history items." )
	668	affected_items = 0
	669	start_time = time.time()
	670	for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ):
	671	for dataset_instance in dataset.history_associations + dataset.library_associations:
	672	if not dataset_instance.deleted:
	673	dataset.deleted = False
	674	if dataset.file_size in [ None, 0 ]:
	675	dataset.set_size() #Restore filesize
	676	affected_items += 1
	677	break
	678	context.flush()
	679	log.debug( "%i items affected, and restored." % ( affected_items ) )
	680	log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
	681
	682	#fix share before hda
	683	log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
	684	dataset_by_filename = {}
	685	changed_associations = 0
	686	start_time = time.time()
	687	for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ):
	688	if dataset.file_name in dataset_by_filename:
	689	guessed_dataset = dataset_by_filename[ dataset.file_name ]
	690	else:
	691	guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
	692	if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
	693	guessed_dataset = None
	694	dataset_by_filename[ dataset.file_name ] = guessed_dataset
	695
	696	if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
	697	for dataset_instance in dataset.history_associations + dataset.library_associations:
	698	dataset_instance.dataset = guessed_dataset
	699	changed_associations += 1
	700	#mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
	701	dataset.deleted = True
	702	dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
	703	dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
	704	context.flush()
	705	log.debug( "%i items affected, and restored." % ( changed_associations ) )
	706	log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
	707
	708	def downgrade():
	709	log.debug( "Downgrade is not possible." )
	710

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py @ 2

異なるフォーマットでダウンロード: