1 | #!/usr/bin/env python |
---|
2 | """ |
---|
3 | Export a history to an archive file using attribute files. |
---|
4 | |
---|
5 | usage: %prog history_attrs dataset_attrs job_attrs out_file |
---|
6 | -G, --gzip: gzip archive file |
---|
7 | """ |
---|
8 | |
---|
9 | from galaxy import eggs |
---|
10 | from galaxy.util.json import * |
---|
11 | import optparse, sys, os, tempfile, tarfile |
---|
12 | |
---|
13 | def create_archive( history_attrs_file, datasets_attrs_file, jobs_attrs_file, out_file, gzip=False ): |
---|
14 | """ Create archive from the given attribute/metadata files and save it to out_file. """ |
---|
15 | tarfile_mode = "w" |
---|
16 | if gzip: |
---|
17 | tarfile_mode += ":gz" |
---|
18 | try: |
---|
19 | |
---|
20 | history_archive = tarfile.open( out_file, tarfile_mode ) |
---|
21 | |
---|
22 | # Read datasets attributes from file. |
---|
23 | datasets_attr_in = open( datasets_attrs_file, 'rb' ) |
---|
24 | datasets_attr_str = '' |
---|
25 | buffsize = 1048576 |
---|
26 | try: |
---|
27 | while True: |
---|
28 | datasets_attr_str += datasets_attr_in.read( buffsize ) |
---|
29 | if not datasets_attr_str or len( datasets_attr_str ) % buffsize != 0: |
---|
30 | break |
---|
31 | except OverflowError: |
---|
32 | pass |
---|
33 | datasets_attr_in.close() |
---|
34 | datasets_attrs = from_json_string( datasets_attr_str ) |
---|
35 | |
---|
36 | # Add datasets to archive and update dataset attributes. |
---|
37 | # TODO: security check to ensure that files added are in Galaxy dataset directory? |
---|
38 | for dataset_attrs in datasets_attrs: |
---|
39 | dataset_file_name = dataset_attrs[ 'file_name' ] # Full file name. |
---|
40 | dataset_archive_name = os.path.join( "datasets", os.path.split( dataset_file_name )[-1] ) |
---|
41 | history_archive.add( dataset_file_name, arcname=dataset_archive_name ) |
---|
42 | # Update dataset filename to be archive name. |
---|
43 | dataset_attrs[ 'file_name' ] = dataset_archive_name |
---|
44 | |
---|
45 | # Rewrite dataset attributes file. |
---|
46 | datasets_attrs_out = open( datasets_attrs_file, 'w' ) |
---|
47 | datasets_attrs_out.write( to_json_string( datasets_attrs ) ) |
---|
48 | datasets_attrs_out.close() |
---|
49 | |
---|
50 | # Finish archive. |
---|
51 | history_archive.add( history_attrs_file, arcname="history_attrs.txt" ) |
---|
52 | history_archive.add( datasets_attrs_file, arcname="datasets_attrs.txt" ) |
---|
53 | history_archive.add( jobs_attrs_file, arcname="jobs_attrs.txt" ) |
---|
54 | history_archive.close() |
---|
55 | |
---|
56 | # Status. |
---|
57 | return 'Created history archive.' |
---|
58 | except Exception, e: |
---|
59 | return 'Error creating history archive: %s' % str( e ), sys.stderr |
---|
60 | |
---|
61 | if __name__ == "__main__": |
---|
62 | # Parse command line. |
---|
63 | parser = optparse.OptionParser() |
---|
64 | parser.add_option( '-G', '--gzip', dest='gzip', action="store_true", help='Compress archive using gzip.' ) |
---|
65 | (options, args) = parser.parse_args() |
---|
66 | gzip = bool( options.gzip ) |
---|
67 | history_attrs, dataset_attrs, job_attrs, out_file = args |
---|
68 | |
---|
69 | # Create archive. |
---|
70 | status = create_archive( history_attrs, dataset_attrs, job_attrs, out_file, gzip ) |
---|
71 | print status |
---|