1 | #TODO: Set dbkey to proper UCSC build, if known |
---|
2 | import urllib |
---|
3 | |
---|
4 | from galaxy import datatypes, config |
---|
5 | import tempfile, shutil |
---|
6 | |
---|
7 | def exec_before_job( app, inp_data, out_data, param_dict, tool=None): |
---|
8 | """Sets the name of the data""" |
---|
9 | data_name = param_dict.get( 'name', 'HbVar query' ) |
---|
10 | data_type = param_dict.get( 'type', 'txt' ) |
---|
11 | if data_type == 'txt': data_type='interval' #All data is TSV, assume interval |
---|
12 | name, data = out_data.items()[0] |
---|
13 | data = app.datatypes_registry.change_datatype(data, data_type) |
---|
14 | data.name = data_name |
---|
15 | out_data[name] = data |
---|
16 | |
---|
17 | def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): |
---|
18 | """Verifies the data after the run""" |
---|
19 | |
---|
20 | URL = param_dict.get( 'URL', None ) |
---|
21 | URL = URL + '&_export=1&GALAXY_URL=0' |
---|
22 | if not URL: |
---|
23 | raise Exception('Datasource has not sent back a URL parameter') |
---|
24 | |
---|
25 | CHUNK_SIZE = 2**20 # 1Mb |
---|
26 | MAX_SIZE = CHUNK_SIZE * 100 |
---|
27 | |
---|
28 | try: |
---|
29 | page = urllib.urlopen(URL) |
---|
30 | except Exception, exc: |
---|
31 | raise Exception('Problems connecting to %s (%s)' % (URL, exc) ) |
---|
32 | |
---|
33 | name, data = out_data.items()[0] |
---|
34 | |
---|
35 | fp = open(data.file_name, 'wb') |
---|
36 | size = 0 |
---|
37 | while 1: |
---|
38 | chunk = page.read(CHUNK_SIZE) |
---|
39 | if not chunk: |
---|
40 | break |
---|
41 | if size > MAX_SIZE: |
---|
42 | raise Exception('----- maximum datasize exceeded ---') |
---|
43 | size += len(chunk) |
---|
44 | fp.write(chunk) |
---|
45 | |
---|
46 | fp.close() |
---|
47 | #Set meta data, format file to be valid interval type |
---|
48 | if isinstance(data.datatype, datatypes.interval.Interval): |
---|
49 | data.set_meta(first_line_is_header=True) |
---|
50 | #check for missing meta data, if all there, comment first line and process file |
---|
51 | if not data.missing_meta(): |
---|
52 | line_ctr = -1 |
---|
53 | temp = tempfile.NamedTemporaryFile('w') |
---|
54 | temp_filename = temp.name |
---|
55 | temp.close() |
---|
56 | temp = open(temp_filename,'w') |
---|
57 | chromCol = int(data.metadata.chromCol) - 1 |
---|
58 | startCol = int(data.metadata.startCol) - 1 |
---|
59 | strandCol = int(data.metadata.strandCol) - 1 |
---|
60 | |
---|
61 | |
---|
62 | for line in open(data.file_name, 'r'): |
---|
63 | line_ctr += 1 |
---|
64 | |
---|
65 | fields = line.strip().split('\t') |
---|
66 | |
---|
67 | temp.write("%s\n" % '\t'.join(fields)) |
---|
68 | |
---|
69 | temp.close() |
---|
70 | shutil.move(temp_filename,data.file_name) |
---|
71 | |
---|
72 | else: |
---|
73 | data = app.datatypes_registry.change_datatype(data, 'tabular') |
---|
74 | data.set_size() |
---|
75 | data.set_peek() |
---|
76 | app.model.context.add( data ) |
---|
77 | app.model.context.flush() |
---|