[2] | 1 | #!/usr/bin/env python
|
---|
| 2 |
|
---|
| 3 | #This script removes deleted dataset files.
|
---|
| 4 | #Takes 3 arguments:
|
---|
| 5 | # 1: database directory to clean
|
---|
| 6 | # 2: postgres database name
|
---|
| 7 | # 3 (optional): number of days to allow as a buffer, defaults to 2
|
---|
| 8 | #python cleanup_datasets.py /home/universe/server-home/wsgi-postgres/database/files/ galaxy_test 2
|
---|
| 9 |
|
---|
| 10 | import sys, os, tempfile, time
|
---|
| 11 | try:
|
---|
| 12 | database_dir = sys.argv[1]
|
---|
| 13 | database_name = sys.argv[2]
|
---|
| 14 | num_days = 2
|
---|
| 15 | try:
|
---|
| 16 | num_days = int(sys.argv[3])
|
---|
| 17 | except:
|
---|
| 18 | print "Using Default of 2 days buffer on delete"
|
---|
| 19 | except:
|
---|
| 20 | print "Usage: python %s path_to_files:/home/universe/server-home/wsgi-postgres/database/files/ database_name:galaxy_test [num_days_buffer:2]" % sys.argv[0]
|
---|
| 21 | sys.exit(0)
|
---|
| 22 | id_file = tempfile.NamedTemporaryFile('w')
|
---|
| 23 | id_filename = id_file.name
|
---|
| 24 | id_file.close()
|
---|
| 25 | ids = []
|
---|
| 26 |
|
---|
| 27 | command = "psql -d %s -c \"select id from dataset;\" -o %s" % (database_name, id_filename)
|
---|
| 28 | print "Getting IDs:", command
|
---|
| 29 | id_file = os.popen(command)
|
---|
| 30 | id_file.close()
|
---|
| 31 | for line in open(id_filename,'r'):
|
---|
| 32 | try:
|
---|
| 33 | ids.append(int(line.strip()))
|
---|
| 34 | except:
|
---|
| 35 | print line.strip(),"is not a valid id, skipping."
|
---|
| 36 | os.unlink(id_filename)
|
---|
| 37 | if len(ids) < 1:
|
---|
| 38 | print "Less than 1 IDs have been found! Deleting proccess has been canceled."
|
---|
| 39 | sys.exit(0)
|
---|
| 40 | print "-----%i IDs Retrieved -----" % len(ids)
|
---|
| 41 | print "----- Checking database directory for deleted ids: %s -----" % database_dir
|
---|
| 42 | file_size = 0
|
---|
| 43 | num_delete = 0
|
---|
| 44 | for result in os.walk(database_dir):
|
---|
| 45 | this_base_dir,sub_dirs,files = result
|
---|
| 46 | for file in files:
|
---|
| 47 | if file.startswith("dataset_") and file.endswith(".dat"):
|
---|
| 48 | id = int(file.replace("dataset_","").replace(".dat",""))
|
---|
| 49 | file_name = os.path.join(this_base_dir,file)
|
---|
| 50 | if id not in ids:
|
---|
| 51 | file_time = os.path.getctime(file_name)
|
---|
| 52 | if time.time() > file_time + (num_days*60*60*24): #num_days (default=2) days buffer room
|
---|
| 53 | num_delete += 1
|
---|
| 54 | size = os.path.getsize(file_name)
|
---|
| 55 | file_size += size
|
---|
| 56 | os.unlink(file_name)
|
---|
| 57 | print file_size, "bytes"
|
---|
| 58 | print float(file_size) / 1024, "kilobytes"
|
---|
| 59 | print float(file_size) / 1024 / 1024, "Megabytes"
|
---|
| 60 | print float(file_size) / 1024 / 1024 / 1024, "Gigabytes"
|
---|
| 61 | print "%i files deleted" % num_delete
|
---|
| 62 |
|
---|
| 63 | sys.exit(0) |
---|