[2] | 1 | #TODO: Set dbkey to proper UCSC build, if known |
---|
| 2 | import urllib |
---|
| 3 | |
---|
| 4 | from galaxy import datatypes, config |
---|
| 5 | import tempfile, shutil |
---|
| 6 | |
---|
| 7 | def exec_before_job( app, inp_data, out_data, param_dict, tool=None): |
---|
| 8 | """Sets the name of the data""" |
---|
| 9 | data_name = param_dict.get( 'name', 'HbVar query' ) |
---|
| 10 | data_type = param_dict.get( 'type', 'txt' ) |
---|
| 11 | if data_type == 'txt': data_type='interval' #All data is TSV, assume interval |
---|
| 12 | name, data = out_data.items()[0] |
---|
| 13 | data = app.datatypes_registry.change_datatype(data, data_type) |
---|
| 14 | data.name = data_name |
---|
| 15 | out_data[name] = data |
---|
| 16 | |
---|
| 17 | def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None): |
---|
| 18 | """Verifies the data after the run""" |
---|
| 19 | |
---|
| 20 | URL = param_dict.get( 'URL', None ) |
---|
| 21 | URL = URL + '&_export=1&GALAXY_URL=0' |
---|
| 22 | if not URL: |
---|
| 23 | raise Exception('Datasource has not sent back a URL parameter') |
---|
| 24 | |
---|
| 25 | CHUNK_SIZE = 2**20 # 1Mb |
---|
| 26 | MAX_SIZE = CHUNK_SIZE * 100 |
---|
| 27 | |
---|
| 28 | try: |
---|
| 29 | page = urllib.urlopen(URL) |
---|
| 30 | except Exception, exc: |
---|
| 31 | raise Exception('Problems connecting to %s (%s)' % (URL, exc) ) |
---|
| 32 | |
---|
| 33 | name, data = out_data.items()[0] |
---|
| 34 | |
---|
| 35 | fp = open(data.file_name, 'wb') |
---|
| 36 | size = 0 |
---|
| 37 | while 1: |
---|
| 38 | chunk = page.read(CHUNK_SIZE) |
---|
| 39 | if not chunk: |
---|
| 40 | break |
---|
| 41 | if size > MAX_SIZE: |
---|
| 42 | raise Exception('----- maximum datasize exceeded ---') |
---|
| 43 | size += len(chunk) |
---|
| 44 | fp.write(chunk) |
---|
| 45 | |
---|
| 46 | fp.close() |
---|
| 47 | #Set meta data, format file to be valid interval type |
---|
| 48 | if isinstance(data.datatype, datatypes.interval.Interval): |
---|
| 49 | data.set_meta(first_line_is_header=True) |
---|
| 50 | #check for missing meta data, if all there, comment first line and process file |
---|
| 51 | if not data.missing_meta(): |
---|
| 52 | line_ctr = -1 |
---|
| 53 | temp = tempfile.NamedTemporaryFile('w') |
---|
| 54 | temp_filename = temp.name |
---|
| 55 | temp.close() |
---|
| 56 | temp = open(temp_filename,'w') |
---|
| 57 | chromCol = int(data.metadata.chromCol) - 1 |
---|
| 58 | startCol = int(data.metadata.startCol) - 1 |
---|
| 59 | strandCol = int(data.metadata.strandCol) - 1 |
---|
| 60 | |
---|
| 61 | |
---|
| 62 | for line in open(data.file_name, 'r'): |
---|
| 63 | line_ctr += 1 |
---|
| 64 | |
---|
| 65 | fields = line.strip().split('\t') |
---|
| 66 | |
---|
| 67 | temp.write("%s\n" % '\t'.join(fields)) |
---|
| 68 | |
---|
| 69 | temp.close() |
---|
| 70 | shutil.move(temp_filename,data.file_name) |
---|
| 71 | |
---|
| 72 | else: |
---|
| 73 | data = app.datatypes_registry.change_datatype(data, 'tabular') |
---|
| 74 | data.set_size() |
---|
| 75 | data.set_peek() |
---|
| 76 | app.model.context.add( data ) |
---|
| 77 | app.model.context.flush() |
---|