[2] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | import os, sys |
---|
| 4 | |
---|
| 5 | new_path = [ os.path.join( os.getcwd(), "lib" ) ] |
---|
| 6 | new_path.extend( sys.path[1:] ) # remove scripts/ from the path |
---|
| 7 | sys.path = new_path |
---|
| 8 | |
---|
| 9 | from galaxy import eggs |
---|
| 10 | import pkg_resources |
---|
| 11 | pkg_resources.require( "SQLAlchemy >= 0.4" ) |
---|
| 12 | |
---|
| 13 | import time, ConfigParser, shutil |
---|
| 14 | from datetime import datetime, timedelta |
---|
| 15 | from time import strftime |
---|
| 16 | from optparse import OptionParser |
---|
| 17 | |
---|
| 18 | import galaxy.model.mapping |
---|
| 19 | import sqlalchemy as sa |
---|
| 20 | from galaxy.model.orm import and_, eagerload |
---|
| 21 | |
---|
| 22 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 23 | |
---|
| 24 | def main(): |
---|
| 25 | parser = OptionParser() |
---|
| 26 | parser.add_option( "-d", "--days", dest="days", action="store", type="int", help="number of days (60)", default=60 ) |
---|
| 27 | parser.add_option( "-r", "--remove_from_disk", action="store_true", dest="remove_from_disk", help="remove datasets from disk when purged", default=False ) |
---|
| 28 | parser.add_option( "-i", "--info_only", action="store_true", dest="info_only", help="info about the requested action", default=False ) |
---|
| 29 | parser.add_option( "-f", "--force_retry", action="store_true", dest="force_retry", help="performs the requested actions, but ignores whether it might have been done before. Useful when -r wasn't used, but should have been", default=False ) |
---|
| 30 | parser.add_option( "-1", "--delete_userless_histories", action="store_true", dest="delete_userless_histories", default=False, help="delete userless histories and datasets" ) |
---|
| 31 | parser.add_option( "-2", "--purge_histories", action="store_true", dest="purge_histories", default=False, help="purge deleted histories" ) |
---|
| 32 | parser.add_option( "-3", "--purge_datasets", action="store_true", dest="purge_datasets", default=False, help="purge deleted datasets" ) |
---|
| 33 | parser.add_option( "-4", "--purge_libraries", action="store_true", dest="purge_libraries", default=False, help="purge deleted libraries" ) |
---|
| 34 | parser.add_option( "-5", "--purge_folders", action="store_true", dest="purge_folders", default=False, help="purge deleted library folders" ) |
---|
| 35 | parser.add_option( "-6", "--delete_datasets", action="store_true", dest="delete_datasets", default=False, help="mark deletable datasets as deleted and purge associated dataset instances" ) |
---|
| 36 | |
---|
| 37 | ( options, args ) = parser.parse_args() |
---|
| 38 | ini_file = args[0] |
---|
| 39 | |
---|
| 40 | if not ( options.purge_folders ^ options.delete_userless_histories ^ \ |
---|
| 41 | options.purge_libraries ^ options.purge_histories ^ \ |
---|
| 42 | options.purge_datasets ^ options.delete_datasets ): |
---|
| 43 | parser.print_help() |
---|
| 44 | sys.exit(0) |
---|
| 45 | |
---|
| 46 | if options.remove_from_disk and options.info_only: |
---|
| 47 | parser.error( "remove_from_disk and info_only are mutually exclusive" ) |
---|
| 48 | |
---|
| 49 | conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} ) |
---|
| 50 | conf_parser.read( ini_file ) |
---|
| 51 | configuration = {} |
---|
| 52 | for key, value in conf_parser.items( "app:main" ): |
---|
| 53 | configuration[key] = value |
---|
| 54 | |
---|
| 55 | if 'database_connection' in configuration: |
---|
| 56 | database_connection = configuration['database_connection'] |
---|
| 57 | else: |
---|
| 58 | database_connection = "sqlite:///%s?isolation_level=IMMEDIATE" % configuration["database_file"] |
---|
| 59 | file_path = configuration.get('file_path', "database/files") |
---|
| 60 | app = CleanupDatasetsApplication( database_connection=database_connection, file_path=file_path ) |
---|
| 61 | cutoff_time = datetime.utcnow() - timedelta( days=options.days ) |
---|
| 62 | now = strftime( "%Y-%m-%d %H:%M:%S" ) |
---|
| 63 | |
---|
| 64 | print "##########################################" |
---|
| 65 | print "\n# %s - Handling stuff older than %i days" % ( now, options.days ) |
---|
| 66 | |
---|
| 67 | if options.info_only: |
---|
| 68 | print "# Displaying info only ( --info_only )\n" |
---|
| 69 | elif options.remove_from_disk: |
---|
| 70 | print "Datasets will be removed from disk.\n" |
---|
| 71 | else: |
---|
| 72 | print "Datasets will NOT be removed from disk.\n" |
---|
| 73 | |
---|
| 74 | if options.delete_userless_histories: |
---|
| 75 | delete_userless_histories( app, cutoff_time, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 76 | elif options.purge_histories: |
---|
| 77 | purge_histories( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 78 | elif options.purge_datasets: |
---|
| 79 | purge_datasets( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 80 | elif options.purge_libraries: |
---|
| 81 | purge_libraries( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 82 | elif options.purge_folders: |
---|
| 83 | purge_folders( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 84 | elif options.delete_datasets: |
---|
| 85 | delete_datasets( app, cutoff_time, options.remove_from_disk, info_only = options.info_only, force_retry = options.force_retry ) |
---|
| 86 | |
---|
| 87 | sys.exit(0) |
---|
| 88 | |
---|
| 89 | def delete_userless_histories( app, cutoff_time, info_only = False, force_retry = False ): |
---|
| 90 | # Deletes userless histories whose update_time value is older than the cutoff_time. |
---|
| 91 | # The purge history script will handle marking DatasetInstances as deleted. |
---|
| 92 | # Nothing is removed from disk yet. |
---|
| 93 | history_count = 0 |
---|
| 94 | start = time.time() |
---|
| 95 | if force_retry: |
---|
| 96 | histories = app.sa_session.query( app.model.History ) \ |
---|
| 97 | .filter( and_( app.model.History.table.c.user_id==None, |
---|
| 98 | app.model.History.table.c.update_time < cutoff_time ) ) |
---|
| 99 | else: |
---|
| 100 | histories = app.sa_session.query( app.model.History ) \ |
---|
| 101 | .filter( and_( app.model.History.table.c.user_id==None, |
---|
| 102 | app.model.History.table.c.deleted==False, |
---|
| 103 | app.model.History.table.c.update_time < cutoff_time ) ) |
---|
| 104 | for history in histories: |
---|
| 105 | if not info_only: |
---|
| 106 | print "Deleting history id ", history.id |
---|
| 107 | history.deleted = True |
---|
| 108 | app.sa_session.add( history ) |
---|
| 109 | app.sa_session.flush() |
---|
| 110 | history_count += 1 |
---|
| 111 | stop = time.time() |
---|
| 112 | print "Deleted %d histories" % history_count |
---|
| 113 | print "Elapsed time: ", stop - start |
---|
| 114 | print "##########################################" |
---|
| 115 | |
---|
| 116 | def purge_histories( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): |
---|
| 117 | # Purges deleted histories whose update_time is older than the cutoff_time. |
---|
| 118 | # The dataset associations of each history are also marked as deleted. |
---|
| 119 | # The Purge Dataset method will purge each Dataset as necessary |
---|
| 120 | # history.purged == True simply means that it can no longer be undeleted |
---|
| 121 | # i.e. all associated datasets are marked as deleted |
---|
| 122 | history_count = 0 |
---|
| 123 | start = time.time() |
---|
| 124 | if force_retry: |
---|
| 125 | histories = app.sa_session.query( app.model.History ) \ |
---|
| 126 | .filter( and_( app.model.History.table.c.deleted==True, |
---|
| 127 | app.model.History.table.c.update_time < cutoff_time ) ) \ |
---|
| 128 | .options( eagerload( 'datasets' ) ) |
---|
| 129 | else: |
---|
| 130 | histories = app.sa_session.query( app.model.History ) \ |
---|
| 131 | .filter( and_( app.model.History.table.c.deleted==True, |
---|
| 132 | app.model.History.table.c.purged==False, |
---|
| 133 | app.model.History.table.c.update_time < cutoff_time ) ) \ |
---|
| 134 | .options( eagerload( 'datasets' ) ) |
---|
| 135 | for history in histories: |
---|
| 136 | for dataset_assoc in history.datasets: |
---|
| 137 | _purge_dataset_instance( dataset_assoc, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable |
---|
| 138 | if not info_only: |
---|
| 139 | # TODO: should the Delete DefaultHistoryPermissions be deleted here? This was incorrectly |
---|
| 140 | # done in the _list_delete() method of the history controller, so copied it here. Not sure |
---|
| 141 | # if we should ever delete info like this from the db though, so commented out for now... |
---|
| 142 | #for dhp in history.default_permissions: |
---|
| 143 | # dhp.delete() |
---|
| 144 | print "Purging history id ", history.id |
---|
| 145 | history.purged = True |
---|
| 146 | app.sa_session.add( history ) |
---|
| 147 | app.sa_session.flush() |
---|
| 148 | history_count += 1 |
---|
| 149 | stop = time.time() |
---|
| 150 | print 'Purged %d histories.' % history_count |
---|
| 151 | print "Elapsed time: ", stop - start |
---|
| 152 | print "##########################################" |
---|
| 153 | |
---|
| 154 | def purge_libraries( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): |
---|
| 155 | # Purges deleted libraries whose update_time is older than the cutoff_time. |
---|
| 156 | # The dataset associations of each library are also marked as deleted. |
---|
| 157 | # The Purge Dataset method will purge each Dataset as necessary |
---|
| 158 | # library.purged == True simply means that it can no longer be undeleted |
---|
| 159 | # i.e. all associated LibraryDatasets/folders are marked as deleted |
---|
| 160 | library_count = 0 |
---|
| 161 | start = time.time() |
---|
| 162 | if force_retry: |
---|
| 163 | libraries = app.sa_session.query( app.model.Library ) \ |
---|
| 164 | .filter( and_( app.model.Library.table.c.deleted==True, |
---|
| 165 | app.model.Library.table.c.update_time < cutoff_time ) ) |
---|
| 166 | else: |
---|
| 167 | libraries = app.sa_session.query( app.model.Library ) \ |
---|
| 168 | .filter( and_( app.model.Library.table.c.deleted==True, |
---|
| 169 | app.model.Library.table.c.purged==False, |
---|
| 170 | app.model.Library.table.c.update_time < cutoff_time ) ) |
---|
| 171 | for library in libraries: |
---|
| 172 | _purge_folder( library.root_folder, app, remove_from_disk, info_only = info_only ) |
---|
| 173 | if not info_only: |
---|
| 174 | print "Purging library id ", library.id |
---|
| 175 | library.purged = True |
---|
| 176 | app.sa_session.add( library ) |
---|
| 177 | app.sa_session.flush() |
---|
| 178 | library_count += 1 |
---|
| 179 | stop = time.time() |
---|
| 180 | print '# Purged %d libraries .' % library_count |
---|
| 181 | print "Elapsed time: ", stop - start |
---|
| 182 | print "##########################################" |
---|
| 183 | |
---|
| 184 | def purge_folders( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): |
---|
| 185 | # Purges deleted folders whose update_time is older than the cutoff_time. |
---|
| 186 | # The dataset associations of each folder are also marked as deleted. |
---|
| 187 | # The Purge Dataset method will purge each Dataset as necessary |
---|
| 188 | # libraryFolder.purged == True simply means that it can no longer be undeleted |
---|
| 189 | # i.e. all associated LibraryDatasets/folders are marked as deleted |
---|
| 190 | folder_count = 0 |
---|
| 191 | start = time.time() |
---|
| 192 | if force_retry: |
---|
| 193 | folders = app.sa_session.query( app.model.LibraryFolder ) \ |
---|
| 194 | .filter( and_( app.model.LibraryFolder.table.c.deleted==True, |
---|
| 195 | app.model.LibraryFolder.table.c.update_time < cutoff_time ) ) |
---|
| 196 | else: |
---|
| 197 | folders = app.sa_session.query( app.model.LibraryFolder ) \ |
---|
| 198 | .filter( and_( app.model.LibraryFolder.table.c.deleted==True, |
---|
| 199 | app.model.LibraryFolder.table.c.purged==False, |
---|
| 200 | app.model.LibraryFolder.table.c.update_time < cutoff_time ) ) |
---|
| 201 | for folder in folders: |
---|
| 202 | _purge_folder( folder, app, remove_from_disk, info_only = info_only ) |
---|
| 203 | folder_count += 1 |
---|
| 204 | stop = time.time() |
---|
| 205 | print '# Purged %d folders.' % folder_count |
---|
| 206 | print "Elapsed time: ", stop - start |
---|
| 207 | print "##########################################" |
---|
| 208 | |
---|
| 209 | def delete_datasets( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): |
---|
| 210 | # Marks datasets as deleted if associated items are all deleted. |
---|
| 211 | start = time.time() |
---|
| 212 | if force_retry: |
---|
| 213 | history_dataset_ids_query = sa.select( ( app.model.Dataset.table.c.id, |
---|
| 214 | app.model.Dataset.table.c.state ), |
---|
| 215 | whereclause = app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time, |
---|
| 216 | from_obj = [ sa.outerjoin( app.model.Dataset.table, |
---|
| 217 | app.model.HistoryDatasetAssociation.table ) ] ) |
---|
| 218 | library_dataset_ids_query = sa.select( ( app.model.Dataset.table.c.id, |
---|
| 219 | app.model.Dataset.table.c.state ), |
---|
| 220 | whereclause = app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time, |
---|
| 221 | from_obj = [ sa.outerjoin( app.model.Dataset.table, |
---|
| 222 | app.model.LibraryDatasetDatasetAssociation.table ) ] ) |
---|
| 223 | else: |
---|
| 224 | # We really only need the id column here, but sqlalchemy barfs when trying to select only 1 column |
---|
| 225 | history_dataset_ids_query = sa.select( ( app.model.Dataset.table.c.id, |
---|
| 226 | app.model.Dataset.table.c.state ), |
---|
| 227 | whereclause = sa.and_( app.model.Dataset.table.c.deleted == False, |
---|
| 228 | app.model.HistoryDatasetAssociation.table.c.update_time < cutoff_time, |
---|
| 229 | app.model.HistoryDatasetAssociation.table.c.deleted == True ), |
---|
| 230 | from_obj = [ sa.outerjoin( app.model.Dataset.table, |
---|
| 231 | app.model.HistoryDatasetAssociation.table ) ] ) |
---|
| 232 | library_dataset_ids_query = sa.select( ( app.model.Dataset.table.c.id, |
---|
| 233 | app.model.Dataset.table.c.state ), |
---|
| 234 | whereclause = sa.and_( app.model.Dataset.table.c.deleted == False, |
---|
| 235 | app.model.LibraryDatasetDatasetAssociation.table.c.update_time < cutoff_time, |
---|
| 236 | app.model.LibraryDatasetDatasetAssociation.table.c.deleted == True ), |
---|
| 237 | from_obj = [ sa.outerjoin( app.model.Dataset.table, |
---|
| 238 | app.model.LibraryDatasetDatasetAssociation.table ) ] ) |
---|
| 239 | history_dataset_ids = [ row.id for row in history_dataset_ids_query.execute() ] |
---|
| 240 | library_dataset_ids = [ row.id for row in library_dataset_ids_query.execute() ] |
---|
| 241 | dataset_ids = history_dataset_ids + library_dataset_ids |
---|
| 242 | skip = [] |
---|
| 243 | deleted_dataset_count = 0 |
---|
| 244 | deleted_instance_count = 0 |
---|
| 245 | for dataset_id in dataset_ids: |
---|
| 246 | print "######### Processing dataset id:", dataset_id |
---|
| 247 | dataset = app.sa_session.query( app.model.Dataset ).get( dataset_id ) |
---|
| 248 | if dataset.id not in skip and _dataset_is_deletable( dataset ): |
---|
| 249 | deleted_dataset_count += 1 |
---|
| 250 | for dataset_instance in dataset.history_associations + dataset.library_associations: |
---|
| 251 | print "Associated Dataset instance: ", dataset_instance.__class__.__name__, dataset_instance.id |
---|
| 252 | _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children=True, info_only=info_only, is_deletable=True ) |
---|
| 253 | deleted_instance_count += 1 |
---|
| 254 | skip.append( dataset.id ) |
---|
| 255 | stop = time.time() |
---|
| 256 | print "Examined %d datasets, marked %d as deleted and purged %d dataset instances" % ( len( skip ), deleted_dataset_count, deleted_instance_count ) |
---|
| 257 | print "Total elapsed time: ", stop - start |
---|
| 258 | print "##########################################" |
---|
| 259 | |
---|
| 260 | def purge_datasets( app, cutoff_time, remove_from_disk, info_only = False, force_retry = False ): |
---|
| 261 | # Purges deleted datasets whose update_time is older than cutoff_time. Files may or may |
---|
| 262 | # not be removed from disk. |
---|
| 263 | dataset_count = 0 |
---|
| 264 | disk_space = 0 |
---|
| 265 | start = time.time() |
---|
| 266 | if force_retry: |
---|
| 267 | datasets = app.sa_session.query( app.model.Dataset ) \ |
---|
| 268 | .filter( and_( app.model.Dataset.table.c.deleted==True, |
---|
| 269 | app.model.Dataset.table.c.purgable==True, |
---|
| 270 | app.model.Dataset.table.c.update_time < cutoff_time ) ) |
---|
| 271 | else: |
---|
| 272 | datasets = app.sa_session.query( app.model.Dataset ) \ |
---|
| 273 | .filter( and_( app.model.Dataset.table.c.deleted==True, |
---|
| 274 | app.model.Dataset.table.c.purgable==True, |
---|
| 275 | app.model.Dataset.table.c.purged==False, |
---|
| 276 | app.model.Dataset.table.c.update_time < cutoff_time ) ) |
---|
| 277 | for dataset in datasets: |
---|
| 278 | file_size = dataset.file_size |
---|
| 279 | _purge_dataset( app, dataset, remove_from_disk, info_only = info_only ) |
---|
| 280 | dataset_count += 1 |
---|
| 281 | try: |
---|
| 282 | disk_space += file_size |
---|
| 283 | except: |
---|
| 284 | pass |
---|
| 285 | stop = time.time() |
---|
| 286 | print 'Purged %d datasets' % dataset_count |
---|
| 287 | if remove_from_disk: |
---|
| 288 | print 'Freed disk space: ', disk_space |
---|
| 289 | print "Elapsed time: ", stop - start |
---|
| 290 | print "##########################################" |
---|
| 291 | |
---|
| 292 | def _purge_dataset_instance( dataset_instance, app, remove_from_disk, include_children=True, info_only=False, is_deletable=False ): |
---|
| 293 | # A dataset_instance is either a HDA or an LDDA. Purging a dataset instance marks the instance as deleted, |
---|
| 294 | # and marks the associated dataset as deleted if it is not associated with another active DatsetInstance. |
---|
| 295 | if not info_only: |
---|
| 296 | print "Deleting dataset_instance ", str( dataset_instance ), " id ", dataset_instance.id |
---|
| 297 | dataset_instance.mark_deleted( include_children = include_children ) |
---|
| 298 | dataset_instance.clear_associated_files() |
---|
| 299 | app.sa_session.add( dataset_instance ) |
---|
| 300 | app.sa_session.flush() |
---|
| 301 | app.sa_session.refresh( dataset_instance.dataset ) |
---|
| 302 | if is_deletable or _dataset_is_deletable( dataset_instance.dataset ): |
---|
| 303 | # Calling methods may have already checked _dataset_is_deletable, if so, is_deletable should be True |
---|
| 304 | _delete_dataset( dataset_instance.dataset, app, remove_from_disk, info_only=info_only, is_deletable=is_deletable ) |
---|
| 305 | #need to purge children here |
---|
| 306 | if include_children: |
---|
| 307 | for child in dataset_instance.children: |
---|
| 308 | _purge_dataset_instance( child, app, remove_from_disk, include_children = include_children, info_only = info_only ) |
---|
| 309 | |
---|
| 310 | def _dataset_is_deletable( dataset ): |
---|
| 311 | #a dataset is deletable when it no longer has any non-deleted associations |
---|
| 312 | return not bool( dataset.active_history_associations or dataset.active_library_associations ) |
---|
| 313 | |
---|
| 314 | def _delete_dataset( dataset, app, remove_from_disk, info_only=False, is_deletable=False ): |
---|
| 315 | #marks a base dataset as deleted, hdas/ldas associated with dataset can no longer be undeleted |
---|
| 316 | #metadata files attached to associated dataset Instances is removed now |
---|
| 317 | if not is_deletable and not _dataset_is_deletable( dataset ): |
---|
| 318 | print "This Dataset (%i) is not deletable, associated Metadata Files will not be removed.\n" % ( dataset.id ) |
---|
| 319 | else: |
---|
| 320 | # Mark all associated MetadataFiles as deleted and purged and remove them from disk |
---|
| 321 | metadata_files = [] |
---|
| 322 | #lets create a list of metadata files, then perform actions on them |
---|
| 323 | for hda in dataset.history_associations: |
---|
| 324 | for metadata_file in app.sa_session.query( app.model.MetadataFile ) \ |
---|
| 325 | .filter( app.model.MetadataFile.table.c.hda_id==hda.id ): |
---|
| 326 | metadata_files.append( metadata_file ) |
---|
| 327 | for lda in dataset.library_associations: |
---|
| 328 | for metadata_file in app.sa_session.query( app.model.MetadataFile ) \ |
---|
| 329 | .filter( app.model.MetadataFile.table.c.lda_id==lda.id ): |
---|
| 330 | metadata_files.append( metadata_file ) |
---|
| 331 | for metadata_file in metadata_files: |
---|
| 332 | print "The following metadata files attached to associations of Dataset '%s' have been purged:" % dataset.id |
---|
| 333 | if not info_only: |
---|
| 334 | if remove_from_disk: |
---|
| 335 | try: |
---|
| 336 | print "Removing disk file ", metadata_file.file_name |
---|
| 337 | os.unlink( metadata_file.file_name ) |
---|
| 338 | except Exception, e: |
---|
| 339 | print "Error, exception: %s caught attempting to purge metadata file %s\n" %( str( e ), metadata_file.file_name ) |
---|
| 340 | metadata_file.purged = True |
---|
| 341 | app.sa_session.add( metadata_file ) |
---|
| 342 | app.sa_session.flush() |
---|
| 343 | metadata_file.deleted = True |
---|
| 344 | app.sa_session.add( metadata_file ) |
---|
| 345 | app.sa_session.flush() |
---|
| 346 | print "%s" % metadata_file.file_name |
---|
| 347 | print "Deleting dataset id", dataset.id |
---|
| 348 | dataset.deleted = True |
---|
| 349 | app.sa_session.add( dataset ) |
---|
| 350 | app.sa_session.flush() |
---|
| 351 | |
---|
| 352 | def _purge_dataset( app, dataset, remove_from_disk, info_only = False ): |
---|
| 353 | if dataset.deleted: |
---|
| 354 | try: |
---|
| 355 | if dataset.purgable and _dataset_is_deletable( dataset ): |
---|
| 356 | if not info_only: |
---|
| 357 | # Remove files from disk and update the database |
---|
| 358 | if remove_from_disk: |
---|
| 359 | # TODO: should permissions on the dataset be deleted here? |
---|
| 360 | print "Removing disk, file ", dataset.file_name |
---|
| 361 | os.unlink( dataset.file_name ) |
---|
| 362 | # Remove associated extra files from disk if they exist |
---|
| 363 | if dataset.extra_files_path and os.path.exists( dataset.extra_files_path ): |
---|
| 364 | shutil.rmtree( dataset.extra_files_path ) #we need to delete the directory and its contents; os.unlink would always fail on a directory |
---|
| 365 | print "Purging dataset id", dataset.id |
---|
| 366 | dataset.purged = True |
---|
| 367 | app.sa_session.add( dataset ) |
---|
| 368 | app.sa_session.flush() |
---|
| 369 | else: |
---|
| 370 | print "This dataset (%i) is not purgable, the file (%s) will not be removed.\n" % ( dataset.id, dataset.file_name ) |
---|
| 371 | except OSError, exc: |
---|
| 372 | print "Error, dataset file has already been removed: %s" % str( exc ) |
---|
| 373 | print "Purging dataset id", dataset.id |
---|
| 374 | dataset.purged = True |
---|
| 375 | app.sa_session.add( dataset ) |
---|
| 376 | app.sa_session.flush() |
---|
| 377 | except Exception, exc: |
---|
| 378 | print "Error attempting to purge data file: ", dataset.file_name, " error: ", str( exc ) |
---|
| 379 | else: |
---|
| 380 | print "Error: '%s' has not previously been deleted, so it cannot be purged\n" % dataset.file_name |
---|
| 381 | |
---|
| 382 | def _purge_folder( folder, app, remove_from_disk, info_only = False ): |
---|
| 383 | """Purges a folder and its contents, recursively""" |
---|
| 384 | for ld in folder.datasets: |
---|
| 385 | print "Deleting library dataset id ", ld.id |
---|
| 386 | ld.deleted = True |
---|
| 387 | for ldda in [ld.library_dataset_dataset_association] + ld.expired_datasets: |
---|
| 388 | _purge_dataset_instance( ldda, app, remove_from_disk, info_only = info_only ) #mark a DatasetInstance as deleted, clear associated files, and mark the Dataset as deleted if it is deletable |
---|
| 389 | for sub_folder in folder.folders: |
---|
| 390 | _purge_folder( sub_folder, app, remove_from_disk, info_only = info_only ) |
---|
| 391 | if not info_only: |
---|
| 392 | # TODO: should the folder permissions be deleted here? |
---|
| 393 | print "Purging folder id ", folder.id |
---|
| 394 | folder.purged = True |
---|
| 395 | app.sa_session.add( folder ) |
---|
| 396 | app.sa_session.flush() |
---|
| 397 | |
---|
| 398 | class CleanupDatasetsApplication( object ): |
---|
| 399 | """Encapsulates the state of a Universe application""" |
---|
| 400 | def __init__( self, database_connection=None, file_path=None ): |
---|
| 401 | if database_connection is None: |
---|
| 402 | raise Exception( "CleanupDatasetsApplication requires a database_connection value" ) |
---|
| 403 | if file_path is None: |
---|
| 404 | raise Exception( "CleanupDatasetsApplication requires a file_path value" ) |
---|
| 405 | self.database_connection = database_connection |
---|
| 406 | self.file_path = file_path |
---|
| 407 | # Setup the database engine and ORM |
---|
| 408 | self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False ) |
---|
| 409 | @property |
---|
| 410 | def sa_session( self ): |
---|
| 411 | """ |
---|
| 412 | Returns a SQLAlchemy session -- currently just gets the current |
---|
| 413 | session from the threadlocal session context, but this is provided |
---|
| 414 | to allow migration toward a more SQLAlchemy 0.4 style of use. |
---|
| 415 | """ |
---|
| 416 | return self.model.context.current |
---|
| 417 | |
---|
| 418 | if __name__ == "__main__": main() |
---|