[2] | 1 | import sys, logging, os, time, datetime, errno |
---|
| 2 | |
---|
| 3 | log = logging.getLogger( __name__ ) |
---|
| 4 | log.setLevel(logging.DEBUG) |
---|
| 5 | handler = logging.StreamHandler( sys.stdout ) |
---|
| 6 | format = "%(name)s %(levelname)s %(asctime)s %(message)s" |
---|
| 7 | formatter = logging.Formatter( format ) |
---|
| 8 | handler.setFormatter( formatter ) |
---|
| 9 | log.addHandler( handler ) |
---|
| 10 | |
---|
| 11 | from migrate import migrate_engine |
---|
| 12 | from sqlalchemy import and_ |
---|
| 13 | |
---|
| 14 | from sqlalchemy import * |
---|
| 15 | now = datetime.datetime.utcnow |
---|
| 16 | from sqlalchemy.orm import * |
---|
| 17 | |
---|
| 18 | from galaxy.model.orm.ext.assignmapper import assign_mapper |
---|
| 19 | |
---|
| 20 | from galaxy.model.custom_types import * |
---|
| 21 | |
---|
| 22 | from galaxy.util.bunch import Bunch |
---|
| 23 | |
---|
| 24 | |
---|
| 25 | metadata = MetaData( migrate_engine ) |
---|
| 26 | context = scoped_session( sessionmaker( autoflush=False, autocommit=True ) ) |
---|
| 27 | |
---|
| 28 | |
---|
| 29 | ## classes |
---|
| 30 | def get_permitted_actions( **kwds ): |
---|
| 31 | return Bunch() |
---|
| 32 | |
---|
| 33 | def directory_hash_id( id ): |
---|
| 34 | s = str( id ) |
---|
| 35 | l = len( s ) |
---|
| 36 | # Shortcut -- ids 0-999 go under ../000/ |
---|
| 37 | if l < 4: |
---|
| 38 | return [ "000" ] |
---|
| 39 | # Pad with zeros until a multiple of three |
---|
| 40 | padded = ( ( 3 - len( s ) % 3 ) * "0" ) + s |
---|
| 41 | # Drop the last three digits -- 1000 files per directory |
---|
| 42 | padded = padded[:-3] |
---|
| 43 | # Break into chunks of three |
---|
| 44 | return [ padded[i*3:(i+1)*3] for i in range( len( padded ) // 3 ) ] |
---|
| 45 | |
---|
| 46 | |
---|
| 47 | class Dataset( object ): |
---|
| 48 | states = Bunch( NEW = 'new', |
---|
| 49 | UPLOAD = 'upload', |
---|
| 50 | QUEUED = 'queued', |
---|
| 51 | RUNNING = 'running', |
---|
| 52 | OK = 'ok', |
---|
| 53 | EMPTY = 'empty', |
---|
| 54 | ERROR = 'error', |
---|
| 55 | DISCARDED = 'discarded' ) |
---|
| 56 | permitted_actions = get_permitted_actions( filter='DATASET' ) |
---|
| 57 | file_path = "/tmp/" |
---|
| 58 | engine = None |
---|
| 59 | def __init__( self, id=None, state=None, external_filename=None, extra_files_path=None, file_size=None, purgable=True ): |
---|
| 60 | self.id = id |
---|
| 61 | self.state = state |
---|
| 62 | self.deleted = False |
---|
| 63 | self.purged = False |
---|
| 64 | self.purgable = purgable |
---|
| 65 | self.external_filename = external_filename |
---|
| 66 | self._extra_files_path = extra_files_path |
---|
| 67 | self.file_size = file_size |
---|
| 68 | def get_file_name( self ): |
---|
| 69 | if not self.external_filename: |
---|
| 70 | assert self.id is not None, "ID must be set before filename used (commit the object)" |
---|
| 71 | # First try filename directly under file_path |
---|
| 72 | filename = os.path.join( self.file_path, "dataset_%d.dat" % self.id ) |
---|
| 73 | # Only use that filename if it already exists (backward compatibility), |
---|
| 74 | # otherwise construct hashed path |
---|
| 75 | if not os.path.exists( filename ): |
---|
| 76 | dir = os.path.join( self.file_path, *directory_hash_id( self.id ) ) |
---|
| 77 | # Create directory if it does not exist |
---|
| 78 | try: |
---|
| 79 | os.makedirs( dir ) |
---|
| 80 | except OSError, e: |
---|
| 81 | # File Exists is okay, otherwise reraise |
---|
| 82 | if e.errno != errno.EEXIST: |
---|
| 83 | raise |
---|
| 84 | # Return filename inside hashed directory |
---|
| 85 | return os.path.abspath( os.path.join( dir, "dataset_%d.dat" % self.id ) ) |
---|
| 86 | else: |
---|
| 87 | filename = self.external_filename |
---|
| 88 | # Make filename absolute |
---|
| 89 | return os.path.abspath( filename ) |
---|
| 90 | def set_file_name ( self, filename ): |
---|
| 91 | if not filename: |
---|
| 92 | self.external_filename = None |
---|
| 93 | else: |
---|
| 94 | self.external_filename = filename |
---|
| 95 | file_name = property( get_file_name, set_file_name ) |
---|
| 96 | @property |
---|
| 97 | def extra_files_path( self ): |
---|
| 98 | if self._extra_files_path: |
---|
| 99 | path = self._extra_files_path |
---|
| 100 | else: |
---|
| 101 | path = os.path.join( self.file_path, "dataset_%d_files" % self.id ) |
---|
| 102 | #only use path directly under self.file_path if it exists |
---|
| 103 | if not os.path.exists( path ): |
---|
| 104 | path = os.path.join( os.path.join( self.file_path, *directory_hash_id( self.id ) ), "dataset_%d_files" % self.id ) |
---|
| 105 | # Make path absolute |
---|
| 106 | return os.path.abspath( path ) |
---|
| 107 | def get_size( self ): |
---|
| 108 | """Returns the size of the data on disk""" |
---|
| 109 | if self.file_size: |
---|
| 110 | return self.file_size |
---|
| 111 | else: |
---|
| 112 | try: |
---|
| 113 | return os.path.getsize( self.file_name ) |
---|
| 114 | except OSError: |
---|
| 115 | return 0 |
---|
| 116 | def set_size( self ): |
---|
| 117 | """Returns the size of the data on disk""" |
---|
| 118 | try: |
---|
| 119 | if not self.file_size: |
---|
| 120 | self.file_size = os.path.getsize( self.file_name ) |
---|
| 121 | except OSError: |
---|
| 122 | self.file_size = 0 |
---|
| 123 | def has_data( self ): |
---|
| 124 | """Detects whether there is any data""" |
---|
| 125 | return self.get_size() > 0 |
---|
| 126 | def mark_deleted( self, include_children=True ): |
---|
| 127 | self.deleted = True |
---|
| 128 | # FIXME: sqlalchemy will replace this |
---|
| 129 | def _delete(self): |
---|
| 130 | """Remove the file that corresponds to this data""" |
---|
| 131 | try: |
---|
| 132 | os.remove(self.data.file_name) |
---|
| 133 | except OSError, e: |
---|
| 134 | log.critical('%s delete error %s' % (self.__class__.__name__, e)) |
---|
| 135 | |
---|
| 136 | class DatasetInstance( object ): |
---|
| 137 | """A base class for all 'dataset instances', HDAs, LDAs, etc""" |
---|
| 138 | states = Dataset.states |
---|
| 139 | permitted_actions = Dataset.permitted_actions |
---|
| 140 | def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None, |
---|
| 141 | dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None, |
---|
| 142 | parent_id=None, validation_errors=None, visible=True, create_dataset = False ): |
---|
| 143 | self.name = name or "Unnamed dataset" |
---|
| 144 | self.id = id |
---|
| 145 | self.info = info |
---|
| 146 | self.blurb = blurb |
---|
| 147 | self.peek = peek |
---|
| 148 | self.extension = extension |
---|
| 149 | self.designation = designation |
---|
| 150 | self.metadata = metadata or dict() |
---|
| 151 | if dbkey: #dbkey is stored in metadata, only set if non-zero, or else we could clobber one supplied by input 'metadata' |
---|
| 152 | self.dbkey = dbkey |
---|
| 153 | self.deleted = deleted |
---|
| 154 | self.visible = visible |
---|
| 155 | # Relationships |
---|
| 156 | if not dataset and create_dataset: |
---|
| 157 | dataset = Dataset( state=Dataset.states.NEW ) |
---|
| 158 | context.add( dataset ) |
---|
| 159 | context.flush() |
---|
| 160 | self.dataset = dataset |
---|
| 161 | self.parent_id = parent_id |
---|
| 162 | self.validation_errors = validation_errors |
---|
| 163 | @property |
---|
| 164 | def ext( self ): |
---|
| 165 | return self.extension |
---|
| 166 | def get_dataset_state( self ): |
---|
| 167 | return self.dataset.state |
---|
| 168 | def set_dataset_state ( self, state ): |
---|
| 169 | self.dataset.state = state |
---|
| 170 | context.add( self.dataset ) |
---|
| 171 | context.flush() #flush here, because hda.flush() won't flush the Dataset object |
---|
| 172 | state = property( get_dataset_state, set_dataset_state ) |
---|
| 173 | def get_file_name( self ): |
---|
| 174 | return self.dataset.get_file_name() |
---|
| 175 | def set_file_name (self, filename): |
---|
| 176 | return self.dataset.set_file_name( filename ) |
---|
| 177 | file_name = property( get_file_name, set_file_name ) |
---|
| 178 | @property |
---|
| 179 | def extra_files_path( self ): |
---|
| 180 | return self.dataset.extra_files_path |
---|
| 181 | @property |
---|
| 182 | def datatype( self ): |
---|
| 183 | return datatypes_registry.get_datatype_by_extension( self.extension ) |
---|
| 184 | def get_metadata( self ): |
---|
| 185 | if not hasattr( self, '_metadata_collection' ) or self._metadata_collection.parent != self: #using weakref to store parent (to prevent circ ref), does a context.clear() cause parent to be invalidated, while still copying over this non-database attribute? |
---|
| 186 | self._metadata_collection = MetadataCollection( self ) |
---|
| 187 | return self._metadata_collection |
---|
| 188 | def set_metadata( self, bunch ): |
---|
| 189 | # Needs to accept a MetadataCollection, a bunch, or a dict |
---|
| 190 | self._metadata = self.metadata.make_dict_copy( bunch ) |
---|
| 191 | metadata = property( get_metadata, set_metadata ) |
---|
| 192 | # This provide backwards compatibility with using the old dbkey |
---|
| 193 | # field in the database. That field now maps to "old_dbkey" (see mapping.py). |
---|
| 194 | def get_dbkey( self ): |
---|
| 195 | dbkey = self.metadata.dbkey |
---|
| 196 | if not isinstance(dbkey, list): dbkey = [dbkey] |
---|
| 197 | if dbkey in [[None], []]: return "?" |
---|
| 198 | return dbkey[0] |
---|
| 199 | def set_dbkey( self, value ): |
---|
| 200 | if "dbkey" in self.datatype.metadata_spec: |
---|
| 201 | if not isinstance(value, list): |
---|
| 202 | self.metadata.dbkey = [value] |
---|
| 203 | else: |
---|
| 204 | self.metadata.dbkey = value |
---|
| 205 | dbkey = property( get_dbkey, set_dbkey ) |
---|
| 206 | def change_datatype( self, new_ext ): |
---|
| 207 | self.clear_associated_files() |
---|
| 208 | datatypes_registry.change_datatype( self, new_ext ) |
---|
| 209 | def get_size( self ): |
---|
| 210 | """Returns the size of the data on disk""" |
---|
| 211 | return self.dataset.get_size() |
---|
| 212 | def set_size( self ): |
---|
| 213 | """Returns the size of the data on disk""" |
---|
| 214 | return self.dataset.set_size() |
---|
| 215 | def has_data( self ): |
---|
| 216 | """Detects whether there is any data""" |
---|
| 217 | return self.dataset.has_data() |
---|
| 218 | def get_raw_data( self ): |
---|
| 219 | """Returns the full data. To stream it open the file_name and read/write as needed""" |
---|
| 220 | return self.datatype.get_raw_data( self ) |
---|
| 221 | def write_from_stream( self, stream ): |
---|
| 222 | """Writes data from a stream""" |
---|
| 223 | self.datatype.write_from_stream(self, stream) |
---|
| 224 | def set_raw_data( self, data ): |
---|
| 225 | """Saves the data on the disc""" |
---|
| 226 | self.datatype.set_raw_data(self, data) |
---|
| 227 | def get_mime( self ): |
---|
| 228 | """Returns the mime type of the data""" |
---|
| 229 | return datatypes_registry.get_mimetype_by_extension( self.extension.lower() ) |
---|
| 230 | def set_peek( self, is_multi_byte=False ): |
---|
| 231 | return self.datatype.set_peek( self, is_multi_byte=is_multi_byte ) |
---|
| 232 | def init_meta( self, copy_from=None ): |
---|
| 233 | return self.datatype.init_meta( self, copy_from=copy_from ) |
---|
| 234 | def set_meta( self, **kwd ): |
---|
| 235 | self.clear_associated_files( metadata_safe = True ) |
---|
| 236 | return self.datatype.set_meta( self, **kwd ) |
---|
| 237 | def missing_meta( self, **kwd ): |
---|
| 238 | return self.datatype.missing_meta( self, **kwd ) |
---|
| 239 | def as_display_type( self, type, **kwd ): |
---|
| 240 | return self.datatype.as_display_type( self, type, **kwd ) |
---|
| 241 | def display_peek( self ): |
---|
| 242 | return self.datatype.display_peek( self ) |
---|
| 243 | def display_name( self ): |
---|
| 244 | return self.datatype.display_name( self ) |
---|
| 245 | def display_info( self ): |
---|
| 246 | return self.datatype.display_info( self ) |
---|
| 247 | def get_converted_files_by_type( self, file_type ): |
---|
| 248 | valid = [] |
---|
| 249 | for assoc in self.implicitly_converted_datasets: |
---|
| 250 | if not assoc.deleted and assoc.type == file_type: |
---|
| 251 | valid.append( assoc.dataset ) |
---|
| 252 | return valid |
---|
| 253 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
| 254 | raise 'Unimplemented' |
---|
| 255 | def get_child_by_designation(self, designation): |
---|
| 256 | for child in self.children: |
---|
| 257 | if child.designation == designation: |
---|
| 258 | return child |
---|
| 259 | return None |
---|
| 260 | def get_converter_types(self): |
---|
| 261 | return self.datatype.get_converter_types( self, datatypes_registry) |
---|
| 262 | def find_conversion_destination( self, accepted_formats, **kwd ): |
---|
| 263 | """Returns ( target_ext, exisiting converted dataset )""" |
---|
| 264 | return self.datatype.find_conversion_destination( self, accepted_formats, datatypes_registry, **kwd ) |
---|
| 265 | def add_validation_error( self, validation_error ): |
---|
| 266 | self.validation_errors.append( validation_error ) |
---|
| 267 | def extend_validation_errors( self, validation_errors ): |
---|
| 268 | self.validation_errors.extend(validation_errors) |
---|
| 269 | def mark_deleted( self, include_children=True ): |
---|
| 270 | self.deleted = True |
---|
| 271 | if include_children: |
---|
| 272 | for child in self.children: |
---|
| 273 | child.mark_deleted() |
---|
| 274 | def mark_undeleted( self, include_children=True ): |
---|
| 275 | self.deleted = False |
---|
| 276 | if include_children: |
---|
| 277 | for child in self.children: |
---|
| 278 | child.mark_undeleted() |
---|
| 279 | def undeletable( self ): |
---|
| 280 | if self.purged: |
---|
| 281 | return False |
---|
| 282 | return True |
---|
| 283 | @property |
---|
| 284 | def source_library_dataset( self ): |
---|
| 285 | def get_source( dataset ): |
---|
| 286 | if isinstance( dataset, LibraryDatasetDatasetAssociation ): |
---|
| 287 | if dataset.library_dataset: |
---|
| 288 | return ( dataset, dataset.library_dataset ) |
---|
| 289 | if dataset.copied_from_library_dataset_dataset_association: |
---|
| 290 | source = get_source( dataset.copied_from_library_dataset_dataset_association ) |
---|
| 291 | if source: |
---|
| 292 | return source |
---|
| 293 | if dataset.copied_from_history_dataset_association: |
---|
| 294 | source = get_source( dataset.copied_from_history_dataset_association ) |
---|
| 295 | if source: |
---|
| 296 | return source |
---|
| 297 | return ( None, None ) |
---|
| 298 | return get_source( self ) |
---|
| 299 | |
---|
| 300 | |
---|
| 301 | class HistoryDatasetAssociation( DatasetInstance ): |
---|
| 302 | def __init__( self, |
---|
| 303 | hid = None, |
---|
| 304 | history = None, |
---|
| 305 | copied_from_history_dataset_association = None, |
---|
| 306 | copied_from_library_dataset_dataset_association = None, |
---|
| 307 | **kwd ): |
---|
| 308 | DatasetInstance.__init__( self, **kwd ) |
---|
| 309 | self.hid = hid |
---|
| 310 | # Relationships |
---|
| 311 | self.history = history |
---|
| 312 | self.copied_from_history_dataset_association = copied_from_history_dataset_association |
---|
| 313 | self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association |
---|
| 314 | def copy( self, copy_children = False, parent_id = None, target_history = None ): |
---|
| 315 | hda = HistoryDatasetAssociation( hid=self.hid, |
---|
| 316 | name=self.name, |
---|
| 317 | info=self.info, |
---|
| 318 | blurb=self.blurb, |
---|
| 319 | peek=self.peek, |
---|
| 320 | extension=self.extension, |
---|
| 321 | dbkey=self.dbkey, |
---|
| 322 | dataset = self.dataset, |
---|
| 323 | visible=self.visible, |
---|
| 324 | deleted=self.deleted, |
---|
| 325 | parent_id=parent_id, |
---|
| 326 | copied_from_history_dataset_association=self, |
---|
| 327 | history = target_history ) |
---|
| 328 | context.add( hda ) |
---|
| 329 | context.flush() |
---|
| 330 | hda.set_size() |
---|
| 331 | # Need to set after flushed, as MetadataFiles require dataset.id |
---|
| 332 | hda.metadata = self.metadata |
---|
| 333 | if copy_children: |
---|
| 334 | for child in self.children: |
---|
| 335 | child_copy = child.copy( copy_children = copy_children, parent_id = hda.id ) |
---|
| 336 | if not self.datatype.copy_safe_peek: |
---|
| 337 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
| 338 | hda.set_peek() |
---|
| 339 | context.flush() |
---|
| 340 | return hda |
---|
| 341 | def to_library_dataset_dataset_association( self, target_folder, replace_dataset=None, parent_id=None ): |
---|
| 342 | if replace_dataset: |
---|
| 343 | # The replace_dataset param ( when not None ) refers to a LibraryDataset that is being replaced with a new version. |
---|
| 344 | library_dataset = replace_dataset |
---|
| 345 | else: |
---|
| 346 | # If replace_dataset is None, the Library level permissions will be taken from the folder and applied to the new |
---|
| 347 | # LibraryDataset, and the current user's DefaultUserPermissions will be applied to the associated Dataset. |
---|
| 348 | library_dataset = LibraryDataset( folder=target_folder, name=self.name, info=self.info ) |
---|
| 349 | context.add( library_dataset ) |
---|
| 350 | context.flush() |
---|
| 351 | ldda = LibraryDatasetDatasetAssociation( name=self.name, |
---|
| 352 | info=self.info, |
---|
| 353 | blurb=self.blurb, |
---|
| 354 | peek=self.peek, |
---|
| 355 | extension=self.extension, |
---|
| 356 | dbkey=self.dbkey, |
---|
| 357 | dataset=self.dataset, |
---|
| 358 | library_dataset=library_dataset, |
---|
| 359 | visible=self.visible, |
---|
| 360 | deleted=self.deleted, |
---|
| 361 | parent_id=parent_id, |
---|
| 362 | copied_from_history_dataset_association=self, |
---|
| 363 | user=self.history.user ) |
---|
| 364 | context.add( ldda ) |
---|
| 365 | context.flush() |
---|
| 366 | # Permissions must be the same on the LibraryDatasetDatasetAssociation and the associated LibraryDataset |
---|
| 367 | # Must set metadata after ldda flushed, as MetadataFiles require ldda.id |
---|
| 368 | ldda.metadata = self.metadata |
---|
| 369 | if not replace_dataset: |
---|
| 370 | target_folder.add_library_dataset( library_dataset, genome_build=ldda.dbkey ) |
---|
| 371 | context.add( target_folder ) |
---|
| 372 | context.flush() |
---|
| 373 | library_dataset.library_dataset_dataset_association_id = ldda.id |
---|
| 374 | context.add( library_dataset ) |
---|
| 375 | context.flush() |
---|
| 376 | for child in self.children: |
---|
| 377 | child_copy = child.to_library_dataset_dataset_association( target_folder=target_folder, replace_dataset=replace_dataset, parent_id=ldda.id ) |
---|
| 378 | if not self.datatype.copy_safe_peek: |
---|
| 379 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
| 380 | ldda.set_peek() |
---|
| 381 | context.flush() |
---|
| 382 | return ldda |
---|
| 383 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
| 384 | # metadata_safe = True means to only clear when assoc.metadata_safe == False |
---|
| 385 | for assoc in self.implicitly_converted_datasets: |
---|
| 386 | if not metadata_safe or not assoc.metadata_safe: |
---|
| 387 | assoc.clear( purge = purge ) |
---|
| 388 | |
---|
| 389 | |
---|
| 390 | |
---|
| 391 | class LibraryDatasetDatasetAssociation( DatasetInstance ): |
---|
| 392 | def __init__( self, |
---|
| 393 | copied_from_history_dataset_association=None, |
---|
| 394 | copied_from_library_dataset_dataset_association=None, |
---|
| 395 | library_dataset=None, |
---|
| 396 | user=None, |
---|
| 397 | **kwd ): |
---|
| 398 | DatasetInstance.__init__( self, **kwd ) |
---|
| 399 | self.copied_from_history_dataset_association = copied_from_history_dataset_association |
---|
| 400 | self.copied_from_library_dataset_dataset_association = copied_from_library_dataset_dataset_association |
---|
| 401 | self.library_dataset = library_dataset |
---|
| 402 | self.user = user |
---|
| 403 | def to_history_dataset_association( self, target_history, parent_id=None ): |
---|
| 404 | hid = target_history._next_hid() |
---|
| 405 | hda = HistoryDatasetAssociation( name=self.name, |
---|
| 406 | info=self.info, |
---|
| 407 | blurb=self.blurb, |
---|
| 408 | peek=self.peek, |
---|
| 409 | extension=self.extension, |
---|
| 410 | dbkey=self.dbkey, |
---|
| 411 | dataset=self.dataset, |
---|
| 412 | visible=self.visible, |
---|
| 413 | deleted=self.deleted, |
---|
| 414 | parent_id=parent_id, |
---|
| 415 | copied_from_library_dataset_dataset_association=self, |
---|
| 416 | history=target_history, |
---|
| 417 | hid=hid ) |
---|
| 418 | context.flush() |
---|
| 419 | hda.metadata = self.metadata #need to set after flushed, as MetadataFiles require dataset.id |
---|
| 420 | for child in self.children: |
---|
| 421 | child_copy = child.to_history_dataset_association( target_history=target_history, parent_id=hda.id ) |
---|
| 422 | if not self.datatype.copy_safe_peek: |
---|
| 423 | hda.set_peek() #in some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
| 424 | context.add( hda ) |
---|
| 425 | context.flush() |
---|
| 426 | return hda |
---|
| 427 | def copy( self, copy_children = False, parent_id = None, target_folder = None ): |
---|
| 428 | ldda = LibraryDatasetDatasetAssociation( name=self.name, |
---|
| 429 | info=self.info, |
---|
| 430 | blurb=self.blurb, |
---|
| 431 | peek=self.peek, |
---|
| 432 | extension=self.extension, |
---|
| 433 | dbkey=self.dbkey, |
---|
| 434 | dataset=self.dataset, |
---|
| 435 | visible=self.visible, |
---|
| 436 | deleted=self.deleted, |
---|
| 437 | parent_id=parent_id, |
---|
| 438 | copied_from_library_dataset_dataset_association=self, |
---|
| 439 | folder=target_folder ) |
---|
| 440 | context.add( ldda ) |
---|
| 441 | context.flush() |
---|
| 442 | # Need to set after flushed, as MetadataFiles require dataset.id |
---|
| 443 | ldda.metadata = self.metadata |
---|
| 444 | if copy_children: |
---|
| 445 | for child in self.children: |
---|
| 446 | child_copy = child.copy( copy_children = copy_children, parent_id = ldda.id ) |
---|
| 447 | if not self.datatype.copy_safe_peek: |
---|
| 448 | # In some instances peek relies on dataset_id, i.e. gmaj.zip for viewing MAFs |
---|
| 449 | ldda.set_peek() |
---|
| 450 | context.flush() |
---|
| 451 | return ldda |
---|
| 452 | def clear_associated_files( self, metadata_safe = False, purge = False ): |
---|
| 453 | return |
---|
| 454 | def get_library_item_info_templates( self, template_list=[], restrict=False ): |
---|
| 455 | # If restrict is True, we'll return only those templates directly associated with this LibraryDatasetDatasetAssociation |
---|
| 456 | if self.library_dataset_dataset_info_template_associations: |
---|
| 457 | template_list.extend( [ lddita.library_item_info_template for lddita in self.library_dataset_dataset_info_template_associations if lddita.library_item_info_template not in template_list ] ) |
---|
| 458 | self.library_dataset.get_library_item_info_templates( template_list, restrict ) |
---|
| 459 | return template_list |
---|
| 460 | |
---|
| 461 | |
---|
| 462 | |
---|
| 463 | class LibraryDataset( object ): |
---|
| 464 | # This class acts as a proxy to the currently selected LDDA |
---|
| 465 | def __init__( self, folder=None, order_id=None, name=None, info=None, library_dataset_dataset_association=None, **kwd ): |
---|
| 466 | self.folder = folder |
---|
| 467 | self.order_id = order_id |
---|
| 468 | self.name = name |
---|
| 469 | self.info = info |
---|
| 470 | self.library_dataset_dataset_association = library_dataset_dataset_association |
---|
| 471 | def set_library_dataset_dataset_association( self, ldda ): |
---|
| 472 | self.library_dataset_dataset_association = ldda |
---|
| 473 | ldda.library_dataset = self |
---|
| 474 | context.add_all( ( self, ldda ) ) |
---|
| 475 | context.flush() |
---|
| 476 | def get_info( self ): |
---|
| 477 | if self.library_dataset_dataset_association: |
---|
| 478 | return self.library_dataset_dataset_association.info |
---|
| 479 | elif self._info: |
---|
| 480 | return self._info |
---|
| 481 | else: |
---|
| 482 | return 'no info' |
---|
| 483 | def set_info( self, info ): |
---|
| 484 | self._info = info |
---|
| 485 | info = property( get_info, set_info ) |
---|
| 486 | def get_name( self ): |
---|
| 487 | if self.library_dataset_dataset_association: |
---|
| 488 | return self.library_dataset_dataset_association.name |
---|
| 489 | elif self._name: |
---|
| 490 | return self._name |
---|
| 491 | else: |
---|
| 492 | return 'Unnamed dataset' |
---|
| 493 | def set_name( self, name ): |
---|
| 494 | self._name = name |
---|
| 495 | name = property( get_name, set_name ) |
---|
| 496 | def display_name( self ): |
---|
| 497 | self.library_dataset_dataset_association.display_name() |
---|
| 498 | def get_purged( self ): |
---|
| 499 | return self.library_dataset_dataset_association.dataset.purged |
---|
| 500 | def set_purged( self, purged ): |
---|
| 501 | if purged: |
---|
| 502 | raise Exception( "Not implemented" ) |
---|
| 503 | if not purged and self.purged: |
---|
| 504 | raise Exception( "Cannot unpurge once purged" ) |
---|
| 505 | purged = property( get_purged, set_purged ) |
---|
| 506 | def get_library_item_info_templates( self, template_list=[], restrict=False ): |
---|
| 507 | # If restrict is True, we'll return only those templates directly associated with this LibraryDataset |
---|
| 508 | if self.library_dataset_info_template_associations: |
---|
| 509 | template_list.extend( [ ldita.library_item_info_template for ldita in self.library_dataset_info_template_associations if ldita.library_item_info_template not in template_list ] ) |
---|
| 510 | if restrict not in [ 'True', True ]: |
---|
| 511 | self.folder.get_library_item_info_templates( template_list, restrict ) |
---|
| 512 | return template_list |
---|
| 513 | |
---|
| 514 | ##tables |
---|
| 515 | |
---|
| 516 | |
---|
| 517 | Dataset.table = Table( "dataset", metadata, |
---|
| 518 | Column( "id", Integer, primary_key=True ), |
---|
| 519 | Column( "create_time", DateTime, default=now ), |
---|
| 520 | Column( "update_time", DateTime, index=True, default=now, onupdate=now ), |
---|
| 521 | Column( "state", TrimmedString( 64 ) ), |
---|
| 522 | Column( "deleted", Boolean, index=True, default=False ), |
---|
| 523 | Column( "purged", Boolean, index=True, default=False ), |
---|
| 524 | Column( "purgable", Boolean, default=True ), |
---|
| 525 | Column( "external_filename" , TEXT ), |
---|
| 526 | Column( "_extra_files_path", TEXT ), |
---|
| 527 | Column( 'file_size', Numeric( 15, 0 ) ) ) |
---|
| 528 | |
---|
| 529 | |
---|
| 530 | |
---|
| 531 | HistoryDatasetAssociation.table = Table( "history_dataset_association", metadata, |
---|
| 532 | Column( "id", Integer, primary_key=True ), |
---|
| 533 | Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), |
---|
| 534 | Column( "create_time", DateTime, default=now ), |
---|
| 535 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
| 536 | Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), |
---|
| 537 | Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), |
---|
| 538 | Column( "hid", Integer ), |
---|
| 539 | Column( "name", TrimmedString( 255 ) ), |
---|
| 540 | Column( "info", TrimmedString( 255 ) ), |
---|
| 541 | Column( "blurb", TrimmedString( 255 ) ), |
---|
| 542 | Column( "peek" , TEXT ), |
---|
| 543 | Column( "extension", TrimmedString( 64 ) ), |
---|
| 544 | Column( "metadata", MetadataType(), key="_metadata" ), |
---|
| 545 | Column( "parent_id", Integer, ForeignKey( "history_dataset_association.id" ), nullable=True ), |
---|
| 546 | Column( "designation", TrimmedString( 255 ) ), |
---|
| 547 | Column( "deleted", Boolean, index=True, default=False ), |
---|
| 548 | Column( "visible", Boolean ) ) |
---|
| 549 | |
---|
| 550 | |
---|
| 551 | LibraryDatasetDatasetAssociation.table = Table( "library_dataset_dataset_association", metadata, |
---|
| 552 | Column( "id", Integer, primary_key=True ), |
---|
| 553 | Column( "library_dataset_id", Integer, ForeignKey( "library_dataset.id" ), index=True ), |
---|
| 554 | Column( "dataset_id", Integer, ForeignKey( "dataset.id" ), index=True ), |
---|
| 555 | Column( "create_time", DateTime, default=now ), |
---|
| 556 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
| 557 | Column( "copied_from_history_dataset_association_id", Integer, ForeignKey( "history_dataset_association.id", use_alter=True, name='history_dataset_association_dataset_id_fkey' ), nullable=True ), |
---|
| 558 | Column( "copied_from_library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name='library_dataset_dataset_association_id_fkey' ), nullable=True ), |
---|
| 559 | Column( "name", TrimmedString( 255 ) ), |
---|
| 560 | Column( "info", TrimmedString( 255 ) ), |
---|
| 561 | Column( "blurb", TrimmedString( 255 ) ), |
---|
| 562 | Column( "peek" , TEXT ), |
---|
| 563 | Column( "extension", TrimmedString( 64 ) ), |
---|
| 564 | Column( "metadata", MetadataType(), key="_metadata" ), |
---|
| 565 | Column( "parent_id", Integer, ForeignKey( "library_dataset_dataset_association.id" ), nullable=True ), |
---|
| 566 | Column( "designation", TrimmedString( 255 ) ), |
---|
| 567 | Column( "deleted", Boolean, index=True, default=False ), |
---|
| 568 | Column( "visible", Boolean ), |
---|
| 569 | Column( "message", TrimmedString( 255 ) ) ) |
---|
| 570 | |
---|
| 571 | LibraryDataset.table = Table( "library_dataset", metadata, |
---|
| 572 | Column( "id", Integer, primary_key=True ), |
---|
| 573 | Column( "library_dataset_dataset_association_id", Integer, ForeignKey( "library_dataset_dataset_association.id", use_alter=True, name="library_dataset_dataset_association_id_fk" ), nullable=True, index=True ),#current version of dataset, if null, there is not a current version selected |
---|
| 574 | Column( "order_id", Integer ), |
---|
| 575 | Column( "create_time", DateTime, default=now ), |
---|
| 576 | Column( "update_time", DateTime, default=now, onupdate=now ), |
---|
| 577 | Column( "name", TrimmedString( 255 ), key="_name" ), #when not None/null this will supercede display in library (but not when imported into user's history?) |
---|
| 578 | Column( "info", TrimmedString( 255 ), key="_info" ), #when not None/null this will supercede display in library (but not when imported into user's history?) |
---|
| 579 | Column( "deleted", Boolean, index=True, default=False ) ) |
---|
| 580 | |
---|
| 581 | |
---|
| 582 | |
---|
| 583 | ##mappers |
---|
| 584 | |
---|
| 585 | |
---|
| 586 | assign_mapper( context, Dataset, Dataset.table, |
---|
| 587 | properties=dict( |
---|
| 588 | history_associations=relation( |
---|
| 589 | HistoryDatasetAssociation, |
---|
| 590 | primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) ), |
---|
| 591 | active_history_associations=relation( |
---|
| 592 | HistoryDatasetAssociation, |
---|
| 593 | primaryjoin=( ( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ) & ( HistoryDatasetAssociation.table.c.deleted == False ) ) ), |
---|
| 594 | library_associations=relation( |
---|
| 595 | LibraryDatasetDatasetAssociation, |
---|
| 596 | primaryjoin=( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) ), |
---|
| 597 | active_library_associations=relation( |
---|
| 598 | LibraryDatasetDatasetAssociation, |
---|
| 599 | primaryjoin=( ( Dataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.dataset_id ) & ( LibraryDatasetDatasetAssociation.table.c.deleted == False ) ) ) |
---|
| 600 | ) ) |
---|
| 601 | |
---|
| 602 | |
---|
| 603 | assign_mapper( context, HistoryDatasetAssociation, HistoryDatasetAssociation.table, |
---|
| 604 | properties=dict( |
---|
| 605 | dataset=relation( |
---|
| 606 | Dataset, |
---|
| 607 | primaryjoin=( Dataset.table.c.id == HistoryDatasetAssociation.table.c.dataset_id ), lazy=False ), |
---|
| 608 | # .history defined in History mapper |
---|
| 609 | copied_to_history_dataset_associations=relation( |
---|
| 610 | HistoryDatasetAssociation, |
---|
| 611 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), |
---|
| 612 | backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_history_dataset_association_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), |
---|
| 613 | copied_to_library_dataset_dataset_associations=relation( |
---|
| 614 | LibraryDatasetDatasetAssociation, |
---|
| 615 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
| 616 | backref=backref( "copied_from_history_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), |
---|
| 617 | children=relation( |
---|
| 618 | HistoryDatasetAssociation, |
---|
| 619 | primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), |
---|
| 620 | backref=backref( "parent", primaryjoin=( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ), remote_side=[HistoryDatasetAssociation.table.c.id], uselist=False ) ), |
---|
| 621 | visible_children=relation( |
---|
| 622 | HistoryDatasetAssociation, |
---|
| 623 | primaryjoin=( ( HistoryDatasetAssociation.table.c.parent_id == HistoryDatasetAssociation.table.c.id ) & ( HistoryDatasetAssociation.table.c.visible == True ) ) ) |
---|
| 624 | ) ) |
---|
| 625 | |
---|
| 626 | assign_mapper( context, LibraryDatasetDatasetAssociation, LibraryDatasetDatasetAssociation.table, |
---|
| 627 | properties=dict( |
---|
| 628 | dataset=relation( Dataset ), |
---|
| 629 | library_dataset = relation( LibraryDataset, |
---|
| 630 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.library_dataset_id == LibraryDataset.table.c.id ) ), |
---|
| 631 | copied_to_library_dataset_dataset_associations=relation( |
---|
| 632 | LibraryDatasetDatasetAssociation, |
---|
| 633 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
| 634 | backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), |
---|
| 635 | copied_to_history_dataset_associations=relation( |
---|
| 636 | HistoryDatasetAssociation, |
---|
| 637 | primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
| 638 | backref=backref( "copied_from_library_dataset_dataset_association", primaryjoin=( HistoryDatasetAssociation.table.c.copied_from_library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id], uselist=False ) ), |
---|
| 639 | children=relation( |
---|
| 640 | LibraryDatasetDatasetAssociation, |
---|
| 641 | primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), |
---|
| 642 | backref=backref( "parent", primaryjoin=( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ), remote_side=[LibraryDatasetDatasetAssociation.table.c.id] ) ), |
---|
| 643 | visible_children=relation( |
---|
| 644 | LibraryDatasetDatasetAssociation, |
---|
| 645 | primaryjoin=( ( LibraryDatasetDatasetAssociation.table.c.parent_id == LibraryDatasetDatasetAssociation.table.c.id ) & ( LibraryDatasetDatasetAssociation.table.c.visible == True ) ) ) |
---|
| 646 | ) ) |
---|
| 647 | |
---|
| 648 | assign_mapper( context, LibraryDataset, LibraryDataset.table, |
---|
| 649 | properties=dict( |
---|
| 650 | library_dataset_dataset_association=relation( LibraryDatasetDatasetAssociation, primaryjoin=( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ), |
---|
| 651 | expired_datasets = relation( LibraryDatasetDatasetAssociation, foreign_keys=[LibraryDataset.table.c.id,LibraryDataset.table.c.library_dataset_dataset_association_id ], primaryjoin=( ( LibraryDataset.table.c.id == LibraryDatasetDatasetAssociation.table.c.library_dataset_id ) & ( not_( LibraryDataset.table.c.library_dataset_dataset_association_id == LibraryDatasetDatasetAssociation.table.c.id ) ) ), viewonly=True, uselist=True ) |
---|
| 652 | ) ) |
---|
| 653 | |
---|
| 654 | |
---|
| 655 | def __guess_dataset_by_filename( filename ): |
---|
| 656 | """Return a guessed dataset by filename""" |
---|
| 657 | try: |
---|
| 658 | fields = os.path.split( filename ) |
---|
| 659 | if fields: |
---|
| 660 | if fields[-1].startswith( 'dataset_' ) and fields[-1].endswith( '.dat' ): #dataset_%d.dat |
---|
| 661 | return Dataset.get( int( fields[-1][ len( 'dataset_' ): -len( '.dat' ) ] ) ) |
---|
| 662 | except: |
---|
| 663 | pass #some parsing error, we can't guess Dataset |
---|
| 664 | return None |
---|
| 665 | |
---|
| 666 | def upgrade(): |
---|
| 667 | log.debug( "Fixing a discrepancy concerning deleted shared history items." ) |
---|
| 668 | affected_items = 0 |
---|
| 669 | start_time = time.time() |
---|
| 670 | for dataset in context.query( Dataset ).filter( and_( Dataset.deleted == True, Dataset.purged == False ) ): |
---|
| 671 | for dataset_instance in dataset.history_associations + dataset.library_associations: |
---|
| 672 | if not dataset_instance.deleted: |
---|
| 673 | dataset.deleted = False |
---|
| 674 | if dataset.file_size in [ None, 0 ]: |
---|
| 675 | dataset.set_size() #Restore filesize |
---|
| 676 | affected_items += 1 |
---|
| 677 | break |
---|
| 678 | context.flush() |
---|
| 679 | log.debug( "%i items affected, and restored." % ( affected_items ) ) |
---|
| 680 | log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) |
---|
| 681 | |
---|
| 682 | #fix share before hda |
---|
| 683 | log.debug( "Fixing a discrepancy concerning cleaning up deleted history items shared before HDAs." ) |
---|
| 684 | dataset_by_filename = {} |
---|
| 685 | changed_associations = 0 |
---|
| 686 | start_time = time.time() |
---|
| 687 | for dataset in context.query( Dataset ).filter( Dataset.external_filename.like( '%dataset_%.dat' ) ): |
---|
| 688 | if dataset.file_name in dataset_by_filename: |
---|
| 689 | guessed_dataset = dataset_by_filename[ dataset.file_name ] |
---|
| 690 | else: |
---|
| 691 | guessed_dataset = __guess_dataset_by_filename( dataset.file_name ) |
---|
| 692 | if guessed_dataset and dataset.file_name != guessed_dataset.file_name:#not os.path.samefile( dataset.file_name, guessed_dataset.file_name ): |
---|
| 693 | guessed_dataset = None |
---|
| 694 | dataset_by_filename[ dataset.file_name ] = guessed_dataset |
---|
| 695 | |
---|
| 696 | if guessed_dataset is not None and guessed_dataset.id != dataset.id: #could we have a self referential dataset? |
---|
| 697 | for dataset_instance in dataset.history_associations + dataset.library_associations: |
---|
| 698 | dataset_instance.dataset = guessed_dataset |
---|
| 699 | changed_associations += 1 |
---|
| 700 | #mark original Dataset as deleted and purged, it is no longer in use, but do not delete file_name contents |
---|
| 701 | dataset.deleted = True |
---|
| 702 | dataset.external_filename = "Dataset was result of share before HDA, and has been replaced: %s mapped to Dataset %s" % ( dataset.external_filename, guessed_dataset.id ) |
---|
| 703 | dataset.purged = True #we don't really purge the file here, but we mark it as purged, since this dataset is now defunct |
---|
| 704 | context.flush() |
---|
| 705 | log.debug( "%i items affected, and restored." % ( changed_associations ) ) |
---|
| 706 | log.debug( "Time elapsed: %s" % ( time.time() - start_time ) ) |
---|
| 707 | |
---|
| 708 | def downgrade(): |
---|
| 709 | log.debug( "Downgrade is not possible." ) |
---|
| 710 | |
---|