Context Navigation

0005_cleanup_datasets_fix.py

リビジョン 2, 36.6 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

行番号
1	import sys, logging, os, time, datetime, errno
2
3	log = logging.getLogger( __name__ )
4	log.setLevel(logging.DEBUG)
5	handler = logging.StreamHandler( sys.stdout )
6	format = "%(name)s %(levelname)s %(asctime)s %(message)s"
7	formatter = logging.Formatter( format )
8	handler.setFormatter( formatter )
9	log.addHandler( handler )
10
11	from migrate import migrate_engine
12	from sqlalchemy import and_
13
14	from sqlalchemy import *
15	now = datetime.datetime.utcnow
16	from sqlalchemy.orm import *
17
18	from galaxy.model.orm.ext.assignmapper import assign_mapper
19
20	from galaxy.model.custom_types import *
21
22	from galaxy.util.bunch import Bunch
23
24
25	metadata = MetaData( migrate_engine )
26	context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) )
27
28
29	## classes
30	def get_permitted_actions( **kwds ):
31	return Bunch()
32
33	def directory_hash_id( id ):
34	s = str( id )
35	l = len( s )
36	# Shortcut -- ids 0-999 go under ../000/
37	if l < 4:
38	return [ "000" ]
39	# Pad with zeros until a multiple of three
40	padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s
41	# Drop the last three digits -- 1000 files per directory
42	padded = padded[:-3]
43	# Break into chunks of three
44	return [ padded[i3:(i+1)3] for i in range( len( padded ) // 3 ) ]
45
46
47	class Dataset( object ):
48	states = Bunch( NEW = 'new',
49	UPLOAD = 'upload',
50	QUEUED = 'queued',
51	RUNNING = 'running',
52	OK = 'ok',
53	EMPTY = 'empty',
54	ERROR = 'error',
55	DISCARDED = 'discarded' )
56	permitted_actions = get_permitted_actions( filter='DATASET' )
57	file_path = "/tmp/"
58	engine = None
59	def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ):
60	self.id = id
61	self.state = state
62	self.deleted = False
63	self.purged = False
64	self.purgable = purgable
65	self.external_filename = external_filename
66	self._extra_files_path = extra_files_path
67	self.file_size = file_size
68	def get_file_name( self ):
69	if not self.external_filename:
70	assert self.id is not None, "ID must be set before filename used (commit the object)"
71	# First try filename directly under file_path
72	filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id )
73	# Only use that filename if it already exists (backward compatibility),
74	# otherwise construct hashed path
75	if not os.path.exists( filename ):
76	dir = os.path.join( self.file_path, *directory_hash_id( self.id ) )
77	# Create directory if it does not exist
78	try:
79	os.makedirs( dir )
80	except OSError, e:
81	# File Exists is okay, otherwise reraise
82	if e.errno != errno.EEXIST:
83	raise
84	# Return filename inside hashed directory
85	return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) )
86	else:
87	filename = self.external_filename
88	# Make filename absolute
89	return os.path.abspath( filename )
90	def set_file_name ( self, filename ):
91	if not filename:
92	self.external_filename = None
93	else:
94	self.external_filename = filename
95	file_name = property( get_file_name, set_file_name )
96	@property
97	def extra_files_path( self ):
98	if self._extra_files_path:
99	path = self._extra_files_path
100	else:
101	path = os.path.join( self.file_path, "dataset_%d_files" % self.id )
102	#only use path directly under self.file_path if it exists
103	if not os.path.exists( path ):
104	path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id )
105	# Make path absolute
106	return os.path.abspath( path )
107	def get_size( self ):
108	"""Returns the size of the data on disk"""
109	if self.file_size:
110	return self.file_size
111	else:
112	try:
113	return os.path.getsize( self.file_name )
114	except OSError:
115	return 0
116	def set_size( self ):
117	"""Returns the size of the data on disk"""
118	try:
119	if not self.file_size:
120	self.file_size = os.path.getsize( self.file_name )
121	except OSError:
122	self.file_size = 0
123	def has_data( self ):
124	"""Detects whether there is any data"""
125	return self.get_size() > 0
126	def mark_deleted( self, include_children=True ):
127	self.deleted = True
128	# FIXME: sqlalchemy will replace this
129	def _delete(self):
130	"""Remove the file that corresponds to this data"""
131	try:
132	os.remove(self.data.file_name)
133	except OSError, e:
134	log.critical('%s delete error %s' % (self.__class__.__name__, e))
135
136	class DatasetInstance( object ):
137	"""A base class for all 'dataset instances', HDAs, LDAs, etc"""
138	states = Dataset.states
139	permitted_actions = Dataset.permitted_actions
140	def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
141	dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
142	parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
143	self.name = name or "Unnamed dataset"
144	self.id = id
145	self.info = info
146	self.blurb = blurb
147	self.peek = peek
148	self.extension = extension
149	self.designation = designation
150	self.metadata = metadata or dict()
151	if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata'
152	self.dbkey = dbkey
153	self.deleted = deleted
154	self.visible = visible
155	# Relationships
156	if not dataset and create_dataset:
157	dataset = Dataset( state=Dataset.states.NEW )
158	context.add( dataset )
159	context.flush()
160	self.dataset = dataset
161	self.parent_id = parent_id
162	self.validation_errors = validation_errors
163	@property
164	def ext( self ):
165	return self.extension
166	def get_dataset_state( self ):
167	return self.dataset.state
168	def set_dataset_state ( self, state ):
169	self.dataset.state = state
170	context.add( self.dataset )
171	context.flush() #flush here, because hda.flush() won't flush the Dataset object
172	state = property( get_dataset_state, set_dataset_state )
173	def get_file_name( self ):
174	return self.dataset.get_file_name()
175	def set_file_name (self, filename):
176	return self.dataset.set_file_name( filename )
177	file_name = property( get_file_name, set_file_name )
178	@property
179	def extra_files_path( self ):
180	return self.dataset.extra_files_path
181	@property
182	def datatype( self ):
183	return datatypes_registry.get_datatype_by_extension( self.extension )
184	def get_metadata( self ):
185	if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute?
186	self._metadata_collection = MetadataCollection( self )
187	return self._metadata_collection
188	def set_metadata( self, bunch ):
189	# Needs to accept a MetadataCollection, a bunch, or a dict
190	self._metadata = self.metadata.make_dict_copy( bunch )
191	metadata = property( get_metadata, set_metadata )
192	# This provide backwards compatibility with using the old dbkey
193	# field in the database. That field now maps to "old_dbkey" (see mapping.py).
194	def get_dbkey( self ):
195	dbkey = self.metadata.dbkey
196	if not isinstance(dbkey, list): dbkey = [dbkey]
197	if dbkey in [[None], []]: return "?"
198	return dbkey[0]
199	def set_dbkey( self, value ):
200	if "dbkey" in self.datatype.metadata_spec:
201	if not isinstance(value, list):
202	self.metadata.dbkey = [value]
203	else:
204	self.metadata.dbkey = value
205	dbkey = property( get_dbkey, set_dbkey )
206	def change_datatype( self, new_ext ):
207	self.clear_associated_files()
208	datatypes_registry.change_datatype( self, new_ext )
209	def get_size( self ):
210	"""Returns the size of the data on disk"""
211	return self.dataset.get_size()
212	def set_size( self ):
213	"""Returns the size of the data on disk"""
214	return self.dataset.set_size()
215	def has_data( self ):
216	"""Detects whether there is any data"""
217	return self.dataset.has_data()
218	def get_raw_data( self ):
219	"""Returns the full data. To stream it open the file_name and read/write as needed"""
220	return self.datatype.get_raw_data( self )
221	def write_from_stream( self, stream ):
222	"""Writes data from a stream"""
223	self.datatype.write_from_stream(self, stream)
224	def set_raw_data( self, data ):
225	"""Saves the data on the disc"""
226	self.datatype.set_raw_data(self, data)
227	def get_mime( self ):
228	"""Returns the mime type of the data"""
229	return datatypes_registry.get_mimetype_by_extension( self.extension.lower() )
230	def set_peek( self, is_multi_byte=False ):
231	return self.datatype.set_peek( self, is_multi_byte=is_multi_byte )
232	def init_meta( self, copy_from=None ):
233	return self.datatype.init_meta( self, copy_from=copy_from )
234	def set_meta( self, **kwd ):
235	self.clear_associated_files( metadata_safe = True )
236	return self.datatype.set_meta( self, **kwd )
237	def missing_meta( self, **kwd ):
238	return self.datatype.missing_meta( self, **kwd )
239	def as_display_type( self, type, **kwd ):
240	return self.datatype.as_display_type( self, type, **kwd )
241	def display_peek( self ):
242	return self.datatype.display_peek( self )
243	def display_name( self ):
244	return self.datatype.display_name( self )
245	def display_info( self ):
246	return self.datatype.display_info( self )
247	def get_converted_files_by_type( self, file_type ):
248	valid = []
249	for assoc in self.implicitly_converted_datasets:
250	if not assoc.deleted and assoc.type == file_type:
251	valid.append( assoc.dataset )
252	return valid
253	def clear_associated_files( self, metadata_safe = False, purge = False ):
254	raise 'Unimplemented'
255	def get_child_by_designation(self, designation):
256	for child in self.children:
257	if child.designation == designation:
258	return child
259	return None
260	def get_converter_types(self):
261	return self.datatype.get_converter_types( self, datatypes_registry)
262	def find_conversion_destination( self, accepted_formats, **kwd ):
263	"""Returns ( target_ext, exisiting converted dataset )"""
264	return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd )
265	def add_validation_error( self, validation_error ):
266	self.validation_errors.append( validation_error )
267	def extend_validation_errors( self, validation_errors ):
268	self.validation_errors.extend(validation_errors)
269	def mark_deleted( self, include_children=True ):
270	self.deleted = True
271	if include_children:
272	for child in self.children:
273	child.mark_deleted()
274	def mark_undeleted( self, include_children=True ):
275	self.deleted = False
276	if include_children:
277	for child in self.children:
278	child.mark_undeleted()
279	def undeletable( self ):
280	if self.purged:
281	return False
282	return True
283	@property
284	def source_library_dataset( self ):
285	def get_source( dataset ):
286	if isinstance( dataset, LibraryDatasetDatasetAssociation ):
287	if dataset.library_dataset:
288	return ( dataset, dataset.library_dataset )
289	if dataset.copied_from_library_dataset_dataset_association:
290	source = get_source( dataset.copied_from_library_dataset_dataset_association )
291	if source:
292	return source
293	if dataset.copied_from_history_dataset_association:
294	source = get_source( dataset.copied_from_history_dataset_association )
295	if source:
296	return source
297	return ( None, None )
298	return get_source( self )
299
300
301	class HistoryDatasetAssociation( DatasetInstance ):
302	def __init__( self,
303	hid = None,
304	history = None,
305	copied_from_history_dataset_association = None,
306	copied_from_library_dataset_dataset_association = None,
307	**kwd ):
308	DatasetInstance.__init__( self, **kwd )
309	self.hid = hid
310	# Relationships
311	self.history = history
312	self.copied_from_history_dataset_association = copied_from_history_dataset_association
313	self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
314	def copy( self, copy_children = False, parent_id = None, target_history = None ):
315	hda = HistoryDatasetAssociation( hid=self.hid,
316	name=self.name,
317	info=self.info,
318	blurb=self.blurb,
319	peek=self.peek,
320	extension=self.extension,
321	dbkey=self.dbkey,
322	dataset = self.dataset,
323	visible=self.visible,
324	deleted=self.deleted,
325	parent_id=parent_id,
326	copied_from_history_dataset_association=self,
327	history = target_history )
328	context.add( hda )
329	context.flush()
330	hda.set_size()
331	# Need to set after flushed, as MetadataFiles require dataset.id
332	hda.metadata = self.metadata
333	if copy_children:
334	for child in self.children:
335	child_copy = child.copy( copy_children = copy_children, parent_id = hda.id )
336	if not self.datatype.copy_safe_peek:
337	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
338	hda.set_peek()
339	context.flush()
340	return hda
341	def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ):
342	if replace_dataset:
343	# The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version.
344	library_dataset = replace_dataset
345	else:
346	# If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new
347	# LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset.
348	library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info )
349	context.add( library_dataset )
350	context.flush()
351	ldda = LibraryDatasetDatasetAssociation( name=self.name,
352	info=self.info,
353	blurb=self.blurb,
354	peek=self.peek,
355	extension=self.extension,
356	dbkey=self.dbkey,
357	dataset=self.dataset,
358	library_dataset=library_dataset,
359	visible=self.visible,
360	deleted=self.deleted,
361	parent_id=parent_id,
362	copied_from_history_dataset_association=self,
363	user=self.history.user )
364	context.add( ldda )
365	context.flush()
366	# Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset
367	# Must set metadata after ldda flushed, as MetadataFiles require ldda.id
368	ldda.metadata = self.metadata
369	if not replace_dataset:
370	target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey )
371	context.add( target_folder )
372	context.flush()
373	library_dataset.library_dataset_dataset_association_id = ldda.id
374	context.add( library_dataset )
375	context.flush()
376	for child in self.children:
377	child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id )
378	if not self.datatype.copy_safe_peek:
379	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
380	ldda.set_peek()
381	context.flush()
382	return ldda
383	def clear_associated_files( self, metadata_safe = False, purge = False ):
384	# metadata_safe = True means to only clear when assoc.metadata_safe == False
385	for assoc in self.implicitly_converted_datasets:
386	if not metadata_safe or not assoc.metadata_safe:
387	assoc.clear( purge = purge )
388
389
390
391	class LibraryDatasetDatasetAssociation( DatasetInstance ):
392	def __init__( self,
393	copied_from_history_dataset_association=None,
394	copied_from_library_dataset_dataset_association=None,
395	library_dataset=None,
396	user=None,
397	**kwd ):
398	DatasetInstance.__init__( self, **kwd )
399	self.copied_from_history_dataset_association = copied_from_history_dataset_association
400	self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association
401	self.library_dataset = library_dataset
402	self.user = user
403	def to_history_dataset_association( self, target_history, parent_id=None ):
404	hid = target_history._next_hid()
405	hda = HistoryDatasetAssociation( name=self.name,
406	info=self.info,
407	blurb=self.blurb,
408	peek=self.peek,
409	extension=self.extension,
410	dbkey=self.dbkey,
411	dataset=self.dataset,
412	visible=self.visible,
413	deleted=self.deleted,
414	parent_id=parent_id,
415	copied_from_library_dataset_dataset_association=self,
416	history=target_history,
417	hid=hid )
418	context.flush()
419	hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id
420	for child in self.children:
421	child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id )
422	if not self.datatype.copy_safe_peek:
423	hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
424	context.add( hda )
425	context.flush()
426	return hda
427	def copy( self, copy_children = False, parent_id = None, target_folder = None ):
428	ldda = LibraryDatasetDatasetAssociation( name=self.name,
429	info=self.info,
430	blurb=self.blurb,
431	peek=self.peek,
432	extension=self.extension,
433	dbkey=self.dbkey,
434	dataset=self.dataset,
435	visible=self.visible,
436	deleted=self.deleted,
437	parent_id=parent_id,
438	copied_from_library_dataset_dataset_association=self,
439	folder=target_folder )
440	context.add( ldda )
441	context.flush()
442	# Need to set after flushed, as MetadataFiles require dataset.id
443	ldda.metadata = self.metadata
444	if copy_children:
445	for child in self.children:
446	child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id )
447	if not self.datatype.copy_safe_peek:
448	# In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs
449	ldda.set_peek()
450	context.flush()
451	return ldda
452	def clear_associated_files( self, metadata_safe = False, purge = False ):
453	return
454	def get_library_item_info_templates( self, template_list=[], restrict=False ):
455	# If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation
456	if self.library_dataset_dataset_info_template_associations:
457	template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] )
458	self.library_dataset.get_library_item_info_templates( template_list, restrict )
459	return template_list
460
461
462
463	class LibraryDataset( object ):
464	# This class acts as a proxy to the currently selected LDDA
465	def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ):
466	self.folder = folder
467	self.order_id = order_id
468	self.name = name
469	self.info = info
470	self.library_dataset_dataset_association = library_dataset_dataset_association
471	def set_library_dataset_dataset_association( self, ldda ):
472	self.library_dataset_dataset_association = ldda
473	ldda.library_dataset = self
474	context.add_all( ( self, ldda ) )
475	context.flush()
476	def get_info( self ):
477	if self.library_dataset_dataset_association:
478	return self.library_dataset_dataset_association.info
479	elif self._info:
480	return self._info
481	else:
482	return 'no info'
483	def set_info( self, info ):
484	self._info = info
485	info = property( get_info, set_info )
486	def get_name( self ):
487	if self.library_dataset_dataset_association:
488	return self.library_dataset_dataset_association.name
489	elif self._name:
490	return self._name
491	else:
492	return 'Unnamed dataset'
493	def set_name( self, name ):
494	self._name = name
495	name = property( get_name, set_name )
496	def display_name( self ):
497	self.library_dataset_dataset_association.display_name()
498	def get_purged( self ):
499	return self.library_dataset_dataset_association.dataset.purged
500	def set_purged( self, purged ):
501	if purged:
502	raise Exception( "Not implemented" )
503	if not purged and self.purged:
504	raise Exception( "Cannot unpurge once purged" )
505	purged = property( get_purged, set_purged )
506	def get_library_item_info_templates( self, template_list=[], restrict=False ):
507	# If restrict is True, we'll return only those templates directly associated with this LibraryDataset
508	if self.library_dataset_info_template_associations:
509	template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] )
510	if restrict not in [ 'True', True ]:
511	self.folder.get_library_item_info_templates( template_list, restrict )
512	return template_list
513
514	##tables
515
516
517	Dataset.table = Table( "dataset", metadata,
518	Column( "id", Integer, primary_key=True ),
519	Column( "create_time", DateTime, default=now ),
520	Column( "update_time", DateTime, index=True, default=now, onupdate=now ),
521	Column( "state", TrimmedString( 64 ) ),
522	Column( "deleted", Boolean, index=True, default=False ),
523	Column( "purged", Boolean, index=True, default=False ),
524	Column( "purgable", Boolean, default=True ),
525	Column( "external_filename" , TEXT ),
526	Column( "_extra_files_path", TEXT ),
527	Column( 'file_size', Numeric( 15, 0 ) ) )
528
529
530
531	HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata,
532	Column( "id", Integer, primary_key=True ),
533	Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
534	Column( "create_time", DateTime, default=now ),
535	Column( "update_time", DateTime, default=now, onupdate=now ),
536	Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
537	Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
538	Column( "hid", Integer ),
539	Column( "name", TrimmedString( 255 ) ),
540	Column( "info", TrimmedString( 255 ) ),
541	Column( "blurb", TrimmedString( 255 ) ),
542	Column( "peek" , TEXT ),
543	Column( "extension", TrimmedString( 64 ) ),
544	Column( "metadata", MetadataType(), key="_metadata" ),
545	Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ),
546	Column( "designation", TrimmedString( 255 ) ),
547	Column( "deleted", Boolean, index=True, default=False ),
548	Column( "visible", Boolean ) )
549
550
551	LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata,
552	Column( "id", Integer, primary_key=True ),
553	Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ),
554	Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ),
555	Column( "create_time", DateTime, default=now ),
556	Column( "update_time", DateTime, default=now, onupdate=now ),
557	Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ),
558	Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ),
559	Column( "name", TrimmedString( 255 ) ),
560	Column( "info", TrimmedString( 255 ) ),
561	Column( "blurb", TrimmedString( 255 ) ),
562	Column( "peek" , TEXT ),
563	Column( "extension", TrimmedString( 64 ) ),
564	Column( "metadata", MetadataType(), key="_metadata" ),
565	Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ),
566	Column( "designation", TrimmedString( 255 ) ),
567	Column( "deleted", Boolean, index=True, default=False ),
568	Column( "visible", Boolean ),
569	Column( "message", TrimmedString( 255 ) ) )
570
571	LibraryDataset.table = Table( "library_dataset", metadata,
572	Column( "id", Integer, primary_key=True ),
573	Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected
574	Column( "order_id", Integer ),
575	Column( "create_time", DateTime, default=now ),
576	Column( "update_time", DateTime, default=now, onupdate=now ),
577	Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
578	Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?)
579	Column( "deleted", Boolean, index=True, default=False ) )
580
581
582
583	##mappers
584
585
586	assign_mapper( context, Dataset, Dataset.table,
587	properties=dict(
588	history_associations=relation(
589	HistoryDatasetAssociation,
590	primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ),
591	active_history_associations=relation(
592	HistoryDatasetAssociation,
593	primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ),
594	library_associations=relation(
595	LibraryDatasetDatasetAssociation,
596	primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ),
597	active_library_associations=relation(
598	LibraryDatasetDatasetAssociation,
599	primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) )
600	) )
601
602
603	assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table,
604	properties=dict(
605	dataset=relation(
606	Dataset,
607	primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ),
608	# .history defined in History mapper
609	copied_to_history_dataset_associations=relation(
610	HistoryDatasetAssociation,
611	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ),
612	backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
613	copied_to_library_dataset_dataset_associations=relation(
614	LibraryDatasetDatasetAssociation,
615	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
616	backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
617	children=relation(
618	HistoryDatasetAssociation,
619	primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ),
620	backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ),
621	visible_children=relation(
622	HistoryDatasetAssociation,
623	primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) )
624	) )
625
626	assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table,
627	properties=dict(
628	dataset=relation( Dataset ),
629	library_dataset = relation( LibraryDataset,
630	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ),
631	copied_to_library_dataset_dataset_associations=relation(
632	LibraryDatasetDatasetAssociation,
633	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
634	backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
635	copied_to_history_dataset_associations=relation(
636	HistoryDatasetAssociation,
637	primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ),
638	backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ),
639	children=relation(
640	LibraryDatasetDatasetAssociation,
641	primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ),
642	backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ),
643	visible_children=relation(
644	LibraryDatasetDatasetAssociation,
645	primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) )
646	) )
647
648	assign_mapper( context, LibraryDataset, LibraryDataset.table,
649	properties=dict(
650	library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ),
651	expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True )
652	) )
653
654
655	def __guess_dataset_by_filename( filename ):
656	"""Return a guessed dataset by filename"""
657	try:
658	fields = os.path.split( filename )
659	if fields:
660	if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat
661	return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) )
662	except:
663	pass #some parsing error, we can't guess Dataset
664	return None
665
666	def upgrade():
667	log.debug( "Fixing a discrepancy concerning deleted shared history items." )
668	affected_items = 0
669	start_time = time.time()
670	for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ):
671	for dataset_instance in dataset.history_associations + dataset.library_associations:
672	if not dataset_instance.deleted:
673	dataset.deleted = False
674	if dataset.file_size in [ None, 0 ]:
675	dataset.set_size() #Restore filesize
676	affected_items += 1
677	break
678	context.flush()
679	log.debug( "%i items affected, and restored." % ( affected_items ) )
680	log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
681
682	#fix share before hda
683	log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." )
684	dataset_by_filename = {}
685	changed_associations = 0
686	start_time = time.time()
687	for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ):
688	if dataset.file_name in dataset_by_filename:
689	guessed_dataset = dataset_by_filename[ dataset.file_name ]
690	else:
691	guessed_dataset = __guess_dataset_by_filename( dataset.file_name )
692	if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ):
693	guessed_dataset = None
694	dataset_by_filename[ dataset.file_name ] = guessed_dataset
695
696	if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset?
697	for dataset_instance in dataset.history_associations + dataset.library_associations:
698	dataset_instance.dataset = guessed_dataset
699	changed_associations += 1
700	#mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents
701	dataset.deleted = True
702	dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id )
703	dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct
704	context.flush()
705	log.debug( "%i items affected, and restored." % ( changed_associations ) )
706	log.debug( "Time elapsed: %s" % ( time.time() - start_time ) )
707
708	def downgrade():
709	log.debug( "Downgrade is not possible." )
710

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/lib/galaxy/model/migrate/versions/0005_cleanup_datasets_fix.py

異なるフォーマットでダウンロード: