#!/usr/bin/env python # Retrieves data from external data source applications and stores in a dataset file. # Data source application parameters are temporarily stored in the dataset file. import socket, urllib, sys, os, gzip, tempfile, shutil from galaxy import eggs from galaxy.util import gzip_magic assert sys.version_info[:2] >= ( 2, 4 ) def stop_err( msg ): sys.stderr.write( msg ) sys.exit() def check_gzip( filename ): # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py ) temp = open( filename, "U" ) magic_check = temp.read( 2 ) temp.close() if magic_check != gzip_magic: return False return True def __main__(): filename = sys.argv[1] try: max_file_size = int( sys.argv[2] ) except: max_file_size = 0 params = {} for line in open( filename, 'r' ): try: line = line.strip() fields = line.split( '\t' ) params[ fields[0] ] = fields[1] except: continue URL = params.get( 'URL', None ) if not URL: open( filename, 'w' ).write( "" ) stop_err( 'The remote data source application has not sent back a URL parameter in the request.' ) URL_method = params.get( 'URL_method', None ) CHUNK_SIZE = 2**20 # 1Mb # The Python support for fetching resources from the web is layered. urllib uses the httplib # library, which in turn uses the socket library. As of Python 2.3 you can specify how long # a socket should wait for a response before timing out. By default the socket module has no # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2 # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by # doing the following. socket.setdefaulttimeout( 600 ) # The following calls to urllib2.urlopen() will use the above default timeout try: if not URL_method or URL_method == 'get': page = urllib.urlopen( URL ) elif URL_method == 'post': page = urllib.urlopen( URL, urllib.urlencode( params ) ) except Exception, e: stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) ) if max_file_size: file_size = int( page.info().get( 'Content-Length', 0 ) ) if file_size > max_file_size: stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) ) out = open( filename, 'w' ) while 1: chunk = page.read( CHUNK_SIZE ) if not chunk: break out.write( chunk ) out.close() if check_gzip( filename ): # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py ) fd, uncompressed = tempfile.mkstemp() gzipped_file = gzip.GzipFile( filename ) while 1: try: chunk = gzipped_file.read( CHUNK_SIZE ) except IOError: os.close( fd ) os.remove( uncompressed ) gzipped_file.close() stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' ) if not chunk: break os.write( fd, chunk ) os.close( fd ) gzipped_file.close() # Replace the gzipped file with the uncompressed file shutil.move( uncompressed, filename ) if __name__ == "__main__": __main__()