root/galaxy-central/cron/cleanup_datasets.py

リビジョン 2, 2.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3#This script removes deleted dataset files.
4#Takes 3 arguments:
5#   1: database directory to clean
6#   2: postgres database name
7#   3 (optional): number of days to allow as a buffer, defaults to 2
8#python cleanup_datasets.py /home/universe/server-home/wsgi-postgres/database/files/ galaxy_test 2
9
10import sys, os, tempfile, time
11try:
12    database_dir = sys.argv[1]
13    database_name = sys.argv[2]
14    num_days = 2
15    try:
16        num_days = int(sys.argv[3])
17    except:
18        print "Using Default of 2 days buffer on delete"
19except:
20    print "Usage: python %s path_to_files:/home/universe/server-home/wsgi-postgres/database/files/ database_name:galaxy_test [num_days_buffer:2]" % sys.argv[0]
21    sys.exit(0)
22id_file = tempfile.NamedTemporaryFile('w')
23id_filename = id_file.name
24id_file.close()
25ids = []
26
27command = "psql -d %s -c \"select id from dataset;\" -o %s" % (database_name, id_filename)
28print "Getting IDs:", command
29id_file = os.popen(command)
30id_file.close()
31for line in open(id_filename,'r'):
32    try:
33        ids.append(int(line.strip()))
34    except:
35        print line.strip(),"is not a valid id, skipping."
36os.unlink(id_filename)
37if len(ids) < 1:
38    print "Less than 1 IDs have been found! Deleting proccess has been canceled."
39    sys.exit(0)
40print "-----%i IDs Retrieved -----" % len(ids)
41print "----- Checking database directory for deleted ids: %s -----" % database_dir
42file_size = 0
43num_delete = 0
44for result in os.walk(database_dir):
45    this_base_dir,sub_dirs,files = result
46    for file in files:
47        if file.startswith("dataset_") and file.endswith(".dat"):
48            id = int(file.replace("dataset_","").replace(".dat",""))
49            file_name = os.path.join(this_base_dir,file)
50            if id not in ids:
51                file_time = os.path.getctime(file_name)
52                if time.time() > file_time + (num_days*60*60*24): #num_days (default=2) days buffer room
53                    num_delete += 1
54                    size = os.path.getsize(file_name)
55                    file_size += size
56                    os.unlink(file_name)
57print file_size, "bytes"
58print float(file_size) / 1024, "kilobytes"
59print float(file_size) / 1024 / 1024, "Megabytes"
60print float(file_size) / 1024 / 1024 / 1024, "Gigabytes"
61print "%i files deleted" % num_delete
62
63sys.exit(0)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。