1 | import sys, logging, os, time, datetime, errno |
---|
2 | |
---|
3 | log = logging.getLogger( __name__ ) |
---|
4 | log.setLevel(logging.DEBUG) |
---|
5 | handler = logging.StreamHandler( sys.stdout ) |
---|
6 | format = "%(name)s %(levelname)s %(asctime)s %(message)s" |
---|
7 | formatter = logging.Formatter( format ) |
---|
8 | handler.setFormatter( formatter ) |
---|
9 | log.addHandler( handler ) |
---|
10 | |
---|
11 | from migrate import migrate_engine |
---|
12 | from sqlalchemy import and_ |
---|
13 | |
---|
14 | from sqlalchemy import * |
---|
15 | now = datetime.datetime.utcnow |
---|
16 | from sqlalchemy.orm import * |
---|
17 | |
---|
18 | from galaxy.model.orm.ext.assignmapper import assign_mapper |
---|
19 | |
---|
20 | from galaxy.model.custom_types import * |
---|
21 | |
---|
22 | from galaxy.util.bunch import Bunch |
---|
23 | |
---|
24 | |
---|
25 | metadata = MetaData( migrate_engine ) |
---|
26 | context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) ) |
---|
27 | |
---|
28 | |
---|
29 | ## classes |
---|
30 | def get_permitted_actions( **kwds ): |
---|
31 | return Bunch() |
---|
32 | |
---|
33 | def directory_hash_id( id ): |
---|
34 | s = str( id ) |
---|
35 | l = len( s ) |
---|
36 | # Shortcut -- ids 0-999 go under ../000/ |
---|
37 | if l < 4: |
---|
38 | return [ "000" ] |
---|
39 | # Pad with zeros until a multiple of three |
---|
40 | padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s |
---|
41 | # Drop the last three digits -- 1000 files per directory |
---|
42 | padded = padded[:-3] |
---|
43 | # Break into chunks of three |
---|
44 | return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ] |
---|
45 | |
---|
46 | |
---|
47 | class Dataset( object ): |
---|
48 | states = Bunch( NEW = 'new', |
---|
49 | UPLOAD = 'upload', |
---|
50 | QUEUED = 'queued', |
---|
51 | RUNNING = 'running', |
---|
52 | OK = 'ok', |
---|
53 | EMPTY = 'empty', |
---|
54 | ERROR = 'error', |
---|
55 | DISCARDED = 'discarded' ) |
---|
56 | permitted_actions = get_permitted_actions( filter='DATASET' ) |
---|
57 | file_path = "/tmp/" |
---|
58 | engine = None |
---|
59 | def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ): |
---|
60 | self.id = id |
---|
61 | self.state = state |
---|
62 | self.deleted = False |
---|
63 | self.purged = False |
---|
64 | self.purgable = purgable |
---|
65 | self.external_filename = external_filename |
---|
66 | self._extra_files_path = extra_files_path |
---|
67 | self.file_size = file_size |
---|
68 | def get_file_name( self ): |
---|
69 | if not self.external_filename: |
---|
70 | assert self.id is not None, "ID must be set before filename used (commit the object)" |
---|
71 | # First try filename directly under file_path |
---|
72 | filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id ) |
---|
73 | # Only use that filename if it already exists (backward compatibility), |
---|
74 | # otherwise construct hashed path |
---|
75 | if not os.path.exists( filename ): |
---|
76 | dir = os.path.join( self.file_path, *directory_hash_id( self.id ) ) |
---|
77 | # Create directory if it does not exist |
---|
78 | try: |
---|
79 | os.makedirs( dir ) |
---|
80 | except OSError, e: |
---|
81 | # File Exists is okay, otherwise reraise |
---|
82 | if e.errno != errno.EEXIST: |
---|
83 | raise |
---|
84 | # Return filename inside hashed directory |
---|
85 | return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) ) |
---|
86 | else: |
---|
87 | filename = self.external_filename |
---|
88 | # Make filename absolute |
---|
89 | return os.path.abspath( filename ) |
---|
90 | def set_file_name ( self, filename ): |
---|
91 | if not filename: |
---|
92 | self.external_filename = None |
---|
93 | else: |
---|
94 | self.external_filename = filename |
---|
95 | file_name = property( get_file_name, set_file_name ) |
---|
96 | @property |
---|
97 | def extra_files_path( self ): |
---|
98 | if self._extra_files_path: |
---|
99 | path = self._extra_files_path |
---|
100 | else: |
---|
101 | path = os.path.join( self.file_path, "dataset_%d_files" % self.id ) |
---|
102 | #only use path directly under self.file_path if it exists |
---|
103 | if not os.path.exists( path ): |
---|
104 | path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id ) |
---|
105 | # Make path absolute |
---|
106 | return os.path.abspath( path ) |
---|
107 | def get_size( self ): |
---|
108 | """Returns the size of the data on disk""" |
---|
109 | if self.file_size: |
---|
110 | return self.file_size |
---|
111 | else: |
---|
112 | try: |
---|
113 | return os.path.getsize( self.file_name ) |
---|
114 | except OSError: |
---|
115 | return 0 |
---|
116 | def set_size( self ): |
---|
117 | """Returns the size of the data on disk""" |
---|
118 | try: |
---|
119 | if not self.file_size: |
---|
120 | self.file_size = os.path.getsize( self.file_name ) |
---|
121 | except OSError: |
---|
122 | self.file_size = 0 |
---|
123 | def has_data( self ): |
---|
124 | """Detects whether there is any data""" |
---|
125 | return self.get_size() > 0 |
---|
126 | def mark_deleted( self, include_children=True ): |
---|
127 | self.deleted = True |
---|
128 | # FIXME: sqlalchemy will replace this |
---|
129 | def _delete(self): |
---|
130 | """Remove the file that corresponds to this data""" |
---|
131 | try: |
---|
132 | os.remove(self.data.file_name) |
---|
133 | except OSError, e: |
---|
134 | log.critical('%s delete error %s' % (self.__class__.__name__, e)) |
---|
135 | |
---|
136 | class DatasetInstance( object ): |
---|
137 | """A base class for all 'dataset instances', HDAs, LDAs, etc""" |
---|
138 | states = Dataset.states |
---|
139 | permitted_actions = Dataset.permitted_actions |
---|
140 | def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None, |
---|
141 | dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None, |
---|
142 | parent_id=None, validation_errors=None, visible=True, create_dataset = False ): |
---|
143 | self.name = name or "Unnamed dataset" |
---|
144 | self.id = id |
---|
145 | self.info = info |
---|
146 | self.blurb = blurb |
---|
147 | self.peek = peek |
---|
148 | self.extension = extension |
---|
149 | self.designation = designation |
---|
150 | self.metadata = metadata or dict() |
---|
151 | if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata' |
---|
152 | self.dbkey = dbkey |
---|
153 | self.deleted = deleted |
---|
154 | self.visible = visible |
---|
155 | # Relationships |
---|
156 | if not dataset and create_dataset: |
---|
157 | dataset = Dataset( state=Dataset.states.NEW ) |
---|
158 | context.add( dataset ) |
---|
159 | context.flush() |
---|
160 | self.dataset = dataset |
---|
161 | self.parent_id = parent_id |
---|
162 | self.validation_errors = validation_errors |
---|
163 | @property |
---|
164 | def ext( self ): |
---|
165 | return self.extension |
---|
166 | def get_dataset_state( self ): |
---|
167 | return self.dataset.state |
---|
168 | def set_dataset_state ( self, state ): |
---|
169 | self.dataset.state = state |
---|
170 | context.add( self.dataset ) |
---|
171 | context.flush() #flush here, because hda.flush() won't flush the Dataset object |
---|
172 | state = property( get_dataset_state, set_dataset_state ) |
---|
173 | def get_file_name( self ): |
---|
174 | return self.dataset.get_file_name() |
---|
175 | def set_file_name (self, filename): |
---|
176 | return self.dataset.set_file_name( filename ) |
---|
177 | file_name = property( get_file_name, set_file_name ) |
---|
178 | @property |
---|
179 | def extra_files_path( self ): |
---|
180 | return self.dataset.extra_files_path |
---|
181 | @property |
---|
182 | def datatype( self ): |
---|
183 | return datatypes_registry.get_datatype_by_extension( self.extension ) |
---|
184 | def get_metadata( self ): |
---|
185 | if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute? |
---|
186 | self._metadata_collection = MetadataCollection( self ) |
---|
187 | return self._metadata_collection |
---|
188 | def set_metadata( self, bunch ): |
---|
189 | # Needs to accept a MetadataCollection, a bunch, or a dict |
---|
190 | self._metadata = self.metadata.make_dict_copy( bunch ) |
---|
191 | metadata = property( get_metadata, set_metadata ) |
---|
192 | # This provide backwards compatibility with using the old dbkey |
---|
193 | # field in the database. That field now maps to "old_dbkey" (see mapping.py). |
---|
194 | def get_dbkey( self ): |
---|
195 | dbkey = self.metadata.dbkey |
---|
196 | if not isinstance(dbkey, list): dbkey = [dbkey] |
---|
197 | if dbkey in [[None], []]: return "?" |
---|
198 | return dbkey[0] |
---|
199 | def set_dbkey( self, value ): |
---|
200 | if "dbkey" in self.datatype.metadata_spec: |
---|
201 | if not isinstance(value, list): |
---|
202 | self.metadata.dbkey = [value] |
---|
203 | else: |
---|
204 | self.metadata.dbkey = value |
---|
205 | dbkey = property( get_dbkey, set_dbkey ) |
---|
206 | def change_datatype( self, new_ext ): |
---|
207 | self.clear_associated_files() |
---|
208 | datatypes_registry.change_datatype( self, new_ext ) |
---|
209 | def get_size( self ): |
---|
210 | """Returns the size of the data on disk""" |
---|
211 | return self.dataset.get_size() |
---|
212 | def set_size( self ): |
---|
213 | """Returns the size of the data on disk""" |
---|
214 | return self.dataset.set_size() |
---|
215 | def has_data( self ): |
---|
216 | """Detects whether there is any data""" |
---|
217 | return self.dataset.has_data() |
---|
218 | def get_raw_data( self ): |
---|
219 | """Returns the full data. To stream it open the file_name and read/write as needed""" |
---|
220 | return self.datatype.get_raw_data( self ) |
---|
221 | def write_from_stream( self, stream ): |
---|
222 | """Writes data from a stream""" |
---|
223 | self.datatype.write_from_stream(self, stream) |
---|
224 | def set_raw_data( self, data ): |
---|
225 | """Saves the data on the disc""" |
---|
226 | self.datatype.set_raw_data(self, data) |
---|
227 | def get_mime( self ): |
---|
228 | """Returns the mime type of the data""" |
---|
229 | return datatypes_registry.get_mimetype_by_extension( self.extension.lower() ) |
---|
230 | def set_peek( self, is_multi_byte=False ): |
---|
231 | return self.datatype.set_peek( self, is_multi_byte=is_multi_byte ) |
---|
232 | def init_meta( self, copy_from=None ): |
---|
233 | return self.datatype.init_meta( self, copy_from=copy_from ) |
---|
234 | def set_meta( self, **kwd ): |
---|
235 | self.clear_associated_files( metadata_safe = True ) |
---|
236 | return self.datatype.set_meta( self, **kwd ) |
---|
237 | def missing_meta( self, **kwd ): |
---|
238 | return self.datatype.missing_meta( self, **kwd ) |
---|
239 | def as_display_type( self, type, **kwd ): |
---|
240 | return self.datatype.as_display_type( self, type, **kwd ) |
---|
241 | def display_peek( self ): |
---|
242 | return self.datatype.display_peek( self ) |
---|
243 | def display_name( self ): |
---|
244 | return self.datatype.display_name( self ) |
---|
245 | def display_info( self ): |
---|
246 | return self.datatype.display_info( self ) |
---|
247 | def get_converted_files_by_type( self, file_type ): |
---|
248 | valid = [] |
---|
249 | for assoc in self.implicitly_converted_datasets: |
---|
250 | if not assoc.deleted and assoc.type == file_type: |
---|
251 | valid.append( assoc.dataset ) |
---|
252 | return valid |
---|
253 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
254 | raise 'Unimplemented' |
---|
255 | def get_child_by_designation(self, designation): |
---|
256 | for child in self.children: |
---|
257 | if child.designation == designation: |
---|
258 | return child |
---|
259 | return None |
---|
260 | def get_converter_types(self): |
---|
261 | return self.datatype.get_converter_types( self, datatypes_registry) |
---|
262 | def find_conversion_destination( self, accepted_formats, **kwd ): |
---|
263 | """Returns ( target_ext, exisiting converted dataset )""" |
---|
264 | return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd ) |
---|
265 | def add_validation_error( self, validation_error ): |
---|
266 | self.validation_errors.append( validation_error ) |
---|
267 | def extend_validation_errors( self, validation_errors ): |
---|
268 | self.validation_errors.extend(validation_errors) |
---|
269 | def mark_deleted( self, include_children=True ): |
---|
270 | self.deleted = True |
---|
271 | if include_children: |
---|
272 | for child in self.children: |
---|
273 | child.mark_deleted() |
---|
274 | def mark_undeleted( self, include_children=True ): |
---|
275 | self.deleted = False |
---|
276 | if include_children: |
---|
277 | for child in self.children: |
---|
278 | child.mark_undeleted() |
---|
279 | def undeletable( self ): |
---|
280 | if self.purged: |
---|
281 | return False |
---|
282 | return True |
---|
283 | @property |
---|
284 | def source_library_dataset( self ): |
---|
285 | def get_source( dataset ): |
---|
286 | if isinstance( dataset, LibraryDatasetDatasetAssociation ): |
---|
287 | if dataset.library_dataset: |
---|
288 | return ( dataset, dataset.library_dataset ) |
---|
289 | if dataset.copied_from_library_dataset_dataset_association: |
---|
290 | source = get_source( dataset.copied_from_library_dataset_dataset_association ) |
---|
291 | if source: |
---|
292 | return source |
---|
293 | if dataset.copied_from_history_dataset_association: |
---|
294 | source = get_source( dataset.copied_from_history_dataset_association ) |
---|
295 | if source: |
---|
296 | return source |
---|
297 | return ( None, None ) |
---|
298 | return get_source( self ) |
---|
299 | |
---|
300 | |
---|
301 | class HistoryDatasetAssociation( DatasetInstance ): |
---|
302 | def __init__( self, |
---|
303 | hid = None, |
---|
304 | history = None, |
---|
305 | copied_from_history_dataset_association = None, |
---|
306 | copied_from_library_dataset_dataset_association = None, |
---|
307 | **kwd ): |
---|
308 | DatasetInstance.__init__( self, **kwd ) |
---|
309 | self.hid = hid |
---|
310 | # Relationships |
---|
311 | self.history = history |
---|
312 | self.copied_from_history_dataset_association = copied_from_history_dataset_association |
---|
313 | self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association |
---|
314 | def copy( self, copy_children = False, parent_id = None, target_history = None ): |
---|
315 | hda = HistoryDatasetAssociation( hid=self.hid, |
---|
316 | name=self.name, |
---|
317 | info=self.info, |
---|
318 | blurb=self.blurb, |
---|
319 | peek=self.peek, |
---|
320 | extension=self.extension, |
---|
321 | dbkey=self.dbkey, |
---|
322 | dataset = self.dataset, |
---|
323 | visible=self.visible, |
---|
324 | deleted=self.deleted, |
---|
325 | parent_id=parent_id, |
---|
326 | copied_from_history_dataset_association=self, |
---|
327 | history = target_history ) |
---|
328 | context.add( hda ) |
---|
329 | context.flush() |
---|
330 | hda.set_size() |
---|
331 | # Need to set after flushed, as MetadataFiles require dataset.id |
---|
332 | hda.metadata = self.metadata |
---|
333 | if copy_children: |
---|
334 | for child in self.children: |
---|
335 | child_copy = child.copy( copy_children = copy_children, parent_id = hda.id ) |
---|
336 | if not self.datatype.copy_safe_peek: |
---|
337 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
338 | hda.set_peek() |
---|
339 | context.flush() |
---|
340 | return hda |
---|
341 | def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ): |
---|
342 | if replace_dataset: |
---|
343 | # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version. |
---|
344 | library_dataset = replace_dataset |
---|
345 | else: |
---|
346 | # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new |
---|
347 | # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset. |
---|
348 | library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info ) |
---|
349 | context.add( library_dataset ) |
---|
350 | context.flush() |
---|
351 | ldda = LibraryDatasetDatasetAssociation( name=self.name, |
---|
352 | info=self.info, |
---|
353 | blurb=self.blurb, |
---|
354 | peek=self.peek, |
---|
355 | extension=self.extension, |
---|
356 | dbkey=self.dbkey, |
---|
357 | dataset=self.dataset, |
---|
358 | library_dataset=library_dataset, |
---|
359 | visible=self.visible, |
---|
360 | deleted=self.deleted, |
---|
361 | parent_id=parent_id, |
---|
362 | copied_from_history_dataset_association=self, |
---|
363 | user=self.history.user ) |
---|
364 | context.add( ldda ) |
---|
365 | context.flush() |
---|
366 | # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset |
---|
367 | # Must set metadata after ldda flushed, as MetadataFiles require ldda.id |
---|
368 | ldda.metadata = self.metadata |
---|
369 | if not replace_dataset: |
---|
370 | target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey ) |
---|
371 | context.add( target_folder ) |
---|
372 | context.flush() |
---|
373 | library_dataset.library_dataset_dataset_association_id = ldda.id |
---|
374 | context.add( library_dataset ) |
---|
375 | context.flush() |
---|
376 | for child in self.children: |
---|
377 | child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id ) |
---|
378 | if not self.datatype.copy_safe_peek: |
---|
379 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
380 | ldda.set_peek() |
---|
381 | context.flush() |
---|
382 | return ldda |
---|
383 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
384 | # metadata_safe = True means to only clear when assoc.metadata_safe == False |
---|
385 | for assoc in self.implicitly_converted_datasets: |
---|
386 | if not metadata_safe or not assoc.metadata_safe: |
---|
387 | assoc.clear( purge = purge ) |
---|
388 | |
---|
389 | |
---|
390 | |
---|
391 | class LibraryDatasetDatasetAssociation( DatasetInstance ): |
---|
392 | def __init__( self, |
---|
393 | copied_from_history_dataset_association=None, |
---|
394 | copied_from_library_dataset_dataset_association=None, |
---|
395 | library_dataset=None, |
---|
396 | user=None, |
---|
397 | **kwd ): |
---|
398 | DatasetInstance.__init__( self, **kwd ) |
---|
399 | self.copied_from_history_dataset_association = copied_from_history_dataset_association |
---|
400 | self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association |
---|
401 | self.library_dataset = library_dataset |
---|
402 | self.user = user |
---|
403 | def to_history_dataset_association( self, target_history, parent_id=None ): |
---|
404 | hid = target_history._next_hid() |
---|
405 | hda = HistoryDatasetAssociation( name=self.name, |
---|
406 | info=self.info, |
---|
407 | blurb=self.blurb, |
---|
408 | peek=self.peek, |
---|
409 | extension=self.extension, |
---|
410 | dbkey=self.dbkey, |
---|
411 | dataset=self.dataset, |
---|
412 | visible=self.visible, |
---|
413 | deleted=self.deleted, |
---|
414 | parent_id=parent_id, |
---|
415 | copied_from_library_dataset_dataset_association=self, |
---|
416 | history=target_history, |
---|
417 | hid=hid ) |
---|
418 | context.flush() |
---|
419 | hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id |
---|
420 | for child in self.children: |
---|
421 | child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id ) |
---|
422 | if not self.datatype.copy_safe_peek: |
---|
423 | hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
424 | context.add( hda ) |
---|
425 | context.flush() |
---|
426 | return hda |
---|
427 | def copy( self, copy_children = False, parent_id = None, target_folder = None ): |
---|
428 | ldda = LibraryDatasetDatasetAssociation( name=self.name, |
---|
429 | info=self.info, |
---|
430 | blurb=self.blurb, |
---|
431 | peek=self.peek, |
---|
432 | extension=self.extension, |
---|
433 | dbkey=self.dbkey, |
---|
434 | dataset=self.dataset, |
---|
435 | visible=self.visible, |
---|
436 | deleted=self.deleted, |
---|
437 | parent_id=parent_id, |
---|
438 | copied_from_library_dataset_dataset_association=self, |
---|
439 | folder=target_folder ) |
---|
440 | context.add( ldda ) |
---|
441 | context.flush() |
---|
442 | # Need to set after flushed, as MetadataFiles require dataset.id |
---|
443 | ldda.metadata = self.metadata |
---|
444 | if copy_children: |
---|
445 | for child in self.children: |
---|
446 | child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id ) |
---|
447 | if not self.datatype.copy_safe_peek: |
---|
448 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
449 | ldda.set_peek() |
---|
450 | context.flush() |
---|
451 | return ldda |
---|
452 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
453 | return |
---|
454 | def get_library_item_info_templates( self, template_list=[], restrict=False ): |
---|
455 | # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation |
---|
456 | if self.library_dataset_dataset_info_template_associations: |
---|
457 | template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] ) |
---|
458 | self.library_dataset.get_library_item_info_templates( template_list, restrict ) |
---|
459 | return template_list |
---|
460 | |
---|
461 | |
---|
462 | |
---|
463 | class LibraryDataset( object ): |
---|
464 | # This class acts as a proxy to the currently selected LDDA |
---|
465 | def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ): |
---|
466 | self.folder = folder |
---|
467 | self.order_id = order_id |
---|
468 | self.name = name |
---|
469 | self.info = info |
---|
470 | self.library_dataset_dataset_association = library_dataset_dataset_association |
---|
471 | def set_library_dataset_dataset_association( self, ldda ): |
---|
472 | self.library_dataset_dataset_association = ldda |
---|
473 | ldda.library_dataset = self |
---|
474 | context.add_all( ( self, ldda ) ) |
---|
475 | context.flush() |
---|
476 | def get_info( self ): |
---|
477 | if self.library_dataset_dataset_association: |
---|
478 | return self.library_dataset_dataset_association.info |
---|
479 | elif self._info: |
---|
480 | return self._info |
---|
481 | else: |
---|
482 | return 'no info' |
---|
483 | def set_info( self, info ): |
---|
484 | self._info = info |
---|
485 | info = property( get_info, set_info ) |
---|
486 | def get_name( self ): |
---|
487 | if self.library_dataset_dataset_association: |
---|
488 | return self.library_dataset_dataset_association.name |
---|
489 | elif self._name: |
---|
490 | return self._name |
---|
491 | else: |
---|
492 | return 'Unnamed dataset' |
---|
493 | def set_name( self, name ): |
---|
494 | self._name = name |
---|
495 | name = property( get_name, set_name ) |
---|
496 | def display_name( self ): |
---|
497 | self.library_dataset_dataset_association.display_name() |
---|
498 | def get_purged( self ): |
---|
499 | return self.library_dataset_dataset_association.dataset.purged |
---|
500 | def set_purged( self, purged ): |
---|
501 | if purged: |
---|
502 | raise Exception( "Not implemented" ) |
---|
503 | if not purged and self.purged: |
---|
504 | raise Exception( "Cannot unpurge once purged" ) |
---|
505 | purged = property( get_purged, set_purged ) |
---|
506 | def get_library_item_info_templates( self, template_list=[], restrict=False ): |
---|
507 | # If restrict is True, we'll return only those templates directly associated with this LibraryDataset |
---|
508 | if self.library_dataset_info_template_associations: |
---|
509 | template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] ) |
---|
510 | if restrict not in [ 'True', True ]: |
---|
511 | self.folder.get_library_item_info_templates( template_list, restrict ) |
---|
512 | return template_list |
---|
513 | |
---|
514 | ##tables |
---|
515 | |
---|
516 | |
---|
517 | Dataset.table = Table( "dataset", metadata, |
---|
518 | Column( "id", Integer, primary_key=True ), |
---|
519 | Column( "create_time", DateTime, default=now ), |
---|
520 | Column( "update_time", DateTime, index=True, default=now, onupdate=now ), |
---|
521 | Column( "state", TrimmedString( 64 ) ), |
---|
522 | Column( "deleted", Boolean, index=True, default=False ), |
---|
523 | Column( "purged", Boolean, index=True, default=False ), |
---|
524 | Column( "purgable", Boolean, default=True ), |
---|
525 | Column( "external_filename" , TEXT ), |
---|
526 | Column( "_extra_files_path", TEXT ), |
---|
527 | Column( 'file_size', Numeric( 15, 0 ) ) ) |
---|
528 | |
---|
529 | |
---|
530 | |
---|
531 | HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata, |
---|
532 | Column( "id", Integer, primary_key=True ), |
---|
533 | Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), |
---|
534 | Column( "create_time", DateTime, default=now ), |
---|
535 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
536 | Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), |
---|
537 | Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), |
---|
538 | Column( "hid", Integer ), |
---|
539 | Column( "name", TrimmedString( 255 ) ), |
---|
540 | Column( "info", TrimmedString( 255 ) ), |
---|
541 | Column( "blurb", TrimmedString( 255 ) ), |
---|
542 | Column( "peek" , TEXT ), |
---|
543 | Column( "extension", TrimmedString( 64 ) ), |
---|
544 | Column( "metadata", MetadataType(), key="_metadata" ), |
---|
545 | Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), |
---|
546 | Column( "designation", TrimmedString( 255 ) ), |
---|
547 | Column( "deleted", Boolean, index=True, default=False ), |
---|
548 | Column( "visible", Boolean ) ) |
---|
549 | |
---|
550 | |
---|
551 | LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata, |
---|
552 | Column( "id", Integer, primary_key=True ), |
---|
553 | Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ), |
---|
554 | Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), |
---|
555 | Column( "create_time", DateTime, default=now ), |
---|
556 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
557 | Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ), |
---|
558 | Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ), |
---|
559 | Column( "name", TrimmedString( 255 ) ), |
---|
560 | Column( "info", TrimmedString( 255 ) ), |
---|
561 | Column( "blurb", TrimmedString( 255 ) ), |
---|
562 | Column( "peek" , TEXT ), |
---|
563 | Column( "extension", TrimmedString( 64 ) ), |
---|
564 | Column( "metadata", MetadataType(), key="_metadata" ), |
---|
565 | Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), |
---|
566 | Column( "designation", TrimmedString( 255 ) ), |
---|
567 | Column( "deleted", Boolean, index=True, default=False ), |
---|
568 | Column( "visible", Boolean ), |
---|
569 | Column( "message", TrimmedString( 255 ) ) ) |
---|
570 | |
---|
571 | LibraryDataset.table = Table( "library_dataset", metadata, |
---|
572 | Column( "id", Integer, primary_key=True ), |
---|
573 | Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected |
---|
574 | Column( "order_id", Integer ), |
---|
575 | Column( "create_time", DateTime, default=now ), |
---|
576 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
577 | Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?) |
---|
578 | Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?) |
---|
579 | Column( "deleted", Boolean, index=True, default=False ) ) |
---|
580 | |
---|
581 | |
---|
582 | |
---|
583 | ##mappers |
---|
584 | |
---|
585 | |
---|
586 | assign_mapper( context, Dataset, Dataset.table, |
---|
587 | properties=dict( |
---|
588 | history_associations=relation( |
---|
589 | HistoryDatasetAssociation, |
---|
590 | primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ), |
---|
591 | active_history_associations=relation( |
---|
592 | HistoryDatasetAssociation, |
---|
593 | primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ), |
---|
594 | library_associations=relation( |
---|
595 | LibraryDatasetDatasetAssociation, |
---|
596 | primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ), |
---|
597 | active_library_associations=relation( |
---|
598 | LibraryDatasetDatasetAssociation, |
---|
599 | primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) ) |
---|
600 | ) ) |
---|
601 | |
---|
602 | |
---|
603 | assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table, |
---|
604 | properties=dict( |
---|
605 | dataset=relation( |
---|
606 | Dataset, |
---|
607 | primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ), |
---|
608 | # .history defined in History mapper |
---|
609 | copied_to_history_dataset_associations=relation( |
---|
610 | HistoryDatasetAssociation, |
---|
611 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), |
---|
612 | backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), |
---|
613 | copied_to_library_dataset_dataset_associations=relation( |
---|
614 | LibraryDatasetDatasetAssociation, |
---|
615 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
616 | backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), |
---|
617 | children=relation( |
---|
618 | HistoryDatasetAssociation, |
---|
619 | primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), |
---|
620 | backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), |
---|
621 | visible_children=relation( |
---|
622 | HistoryDatasetAssociation, |
---|
623 | primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) ) |
---|
624 | ) ) |
---|
625 | |
---|
626 | assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table, |
---|
627 | properties=dict( |
---|
628 | dataset=relation( Dataset ), |
---|
629 | library_dataset = relation( LibraryDataset, |
---|
630 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ), |
---|
631 | copied_to_library_dataset_dataset_associations=relation( |
---|
632 | LibraryDatasetDatasetAssociation, |
---|
633 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
634 | backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), |
---|
635 | copied_to_history_dataset_associations=relation( |
---|
636 | HistoryDatasetAssociation, |
---|
637 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
638 | backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), |
---|
639 | children=relation( |
---|
640 | LibraryDatasetDatasetAssociation, |
---|
641 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
642 | backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), |
---|
643 | visible_children=relation( |
---|
644 | LibraryDatasetDatasetAssociation, |
---|
645 | primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ) |
---|
646 | ) ) |
---|
647 | |
---|
648 | assign_mapper( context, LibraryDataset, LibraryDataset.table, |
---|
649 | properties=dict( |
---|
650 | library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ), |
---|
651 | expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True ) |
---|
652 | ) ) |
---|
653 | |
---|
654 | |
---|
655 | def __guess_dataset_by_filename( filename ): |
---|
656 | """Return a guessed dataset by filename""" |
---|
657 | try: |
---|
658 | fields = os.path.split( filename ) |
---|
659 | if fields: |
---|
660 | if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat |
---|
661 | return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) ) |
---|
662 | except: |
---|
663 | pass #some parsing error, we can't guess Dataset |
---|
664 | return None |
---|
665 | |
---|
666 | def upgrade(): |
---|
667 | log.debug( "Fixing a discrepancy concerning deleted shared history items." ) |
---|
668 | affected_items = 0 |
---|
669 | start_time = time.time() |
---|
670 | for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ): |
---|
671 | for dataset_instance in dataset.history_associations + dataset.library_associations: |
---|
672 | if not dataset_instance.deleted: |
---|
673 | dataset.deleted = False |
---|
674 | if dataset.file_size in [ None, 0 ]: |
---|
675 | dataset.set_size() #Restore filesize |
---|
676 | affected_items += 1 |
---|
677 | break |
---|
678 | context.flush() |
---|
679 | log.debug( "%i items affected, and restored." % ( affected_items ) ) |
---|
680 | log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) |
---|
681 | |
---|
682 | #fix share before hda |
---|
683 | log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." ) |
---|
684 | dataset_by_filename = {} |
---|
685 | changed_associations = 0 |
---|
686 | start_time = time.time() |
---|
687 | for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ): |
---|
688 | if dataset.file_name in dataset_by_filename: |
---|
689 | guessed_dataset = dataset_by_filename[ dataset.file_name ] |
---|
690 | else: |
---|
691 | guessed_dataset = __guess_dataset_by_filename( dataset.file_name ) |
---|
692 | if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ): |
---|
693 | guessed_dataset = None |
---|
694 | dataset_by_filename[ dataset.file_name ] = guessed_dataset |
---|
695 | |
---|
696 | if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset? |
---|
697 | for dataset_instance in dataset.history_associations + dataset.library_associations: |
---|
698 | dataset_instance.dataset = guessed_dataset |
---|
699 | changed_associations += 1 |
---|
700 | #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents |
---|
701 | dataset.deleted = True |
---|
702 | dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id ) |
---|
703 | dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct |
---|
704 | context.flush() |
---|
705 | log.debug( "%i items affected, and restored." % ( changed_associations ) ) |
---|
706 | log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) |
---|
707 | |
---|
708 | def downgrade(): |
---|
709 | log.debug( "Downgrade is not possible." ) |
---|
710 | |
---|