1 | #!/usr/bin/env python
|
---|
2 |
|
---|
3 | #This script removes deleted dataset files.
|
---|
4 | #Takes 3 arguments:
|
---|
5 | # 1: database directory to clean
|
---|
6 | # 2: postgres database name
|
---|
7 | # 3 (optional): number of days to allow as a buffer, defaults to 2
|
---|
8 | #python cleanup_datasets.py /home/universe/server-home/wsgi-postgres/database/files/ galaxy_test 2
|
---|
9 |
|
---|
10 | import sys, os, tempfile, time
|
---|
11 | try:
|
---|
12 | database_dir = sys.argv[1]
|
---|
13 | database_name = sys.argv[2]
|
---|
14 | num_days = 2
|
---|
15 | try:
|
---|
16 | num_days = int(sys.argv[3])
|
---|
17 | except:
|
---|
18 | print "Using Default of 2 days buffer on delete"
|
---|
19 | except:
|
---|
20 | print "Usage: python %s path_to_files:/home/universe/server-home/wsgi-postgres/database/files/ database_name:galaxy_test [num_days_buffer:2]" % sys.argv[0]
|
---|
21 | sys.exit(0)
|
---|
22 | id_file = tempfile.NamedTemporaryFile('w')
|
---|
23 | id_filename = id_file.name
|
---|
24 | id_file.close()
|
---|
25 | ids = []
|
---|
26 |
|
---|
27 | command = "psql -d %s -c \"select id from dataset;\" -o %s" % (database_name, id_filename)
|
---|
28 | print "Getting IDs:", command
|
---|
29 | id_file = os.popen(command)
|
---|
30 | id_file.close()
|
---|
31 | for line in open(id_filename,'r'):
|
---|
32 | try:
|
---|
33 | ids.append(int(line.strip()))
|
---|
34 | except:
|
---|
35 | print line.strip(),"is not a valid id, skipping."
|
---|
36 | os.unlink(id_filename)
|
---|
37 | if len(ids) < 1:
|
---|
38 | print "Less than 1 IDs have been found! Deleting proccess has been canceled."
|
---|
39 | sys.exit(0)
|
---|
40 | print "-----%i IDs Retrieved -----" % len(ids)
|
---|
41 | print "----- Checking database directory for deleted ids: %s -----" % database_dir
|
---|
42 | file_size = 0
|
---|
43 | num_delete = 0
|
---|
44 | for result in os.walk(database_dir):
|
---|
45 | this_base_dir,sub_dirs,files = result
|
---|
46 | for file in files:
|
---|
47 | if file.startswith("dataset_") and file.endswith(".dat"):
|
---|
48 | id = int(file.replace("dataset_","").replace(".dat",""))
|
---|
49 | file_name = os.path.join(this_base_dir,file)
|
---|
50 | if id not in ids:
|
---|
51 | file_time = os.path.getctime(file_name)
|
---|
52 | if time.time() > file_time + (num_days*60*60*24): #num_days (default=2) days buffer room
|
---|
53 | num_delete += 1
|
---|
54 | size = os.path.getsize(file_name)
|
---|
55 | file_size += size
|
---|
56 | os.unlink(file_name)
|
---|
57 | print file_size, "bytes"
|
---|
58 | print float(file_size) / 1024, "kilobytes"
|
---|
59 | print float(file_size) / 1024 / 1024, "Megabytes"
|
---|
60 | print float(file_size) / 1024 / 1024 / 1024, "Gigabytes"
|
---|
61 | print "%i files deleted" % num_delete
|
---|
62 |
|
---|
63 | sys.exit(0) |
---|