root/galaxy-central/tools/data_source/data_source.py

リビジョン 2, 3.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2# Retrieves data from external data source applications and stores in a dataset file.
3# Data source application parameters are temporarily stored in the dataset file.
4import socket, urllib, sys, os, gzip, tempfile, shutil
5from galaxy import eggs
6from galaxy.util import gzip_magic
7
8assert sys.version_info[:2] >= ( 2, 4 )
9
10def stop_err( msg ):
11    sys.stderr.write( msg )
12    sys.exit()
13
14def check_gzip( filename ):
15    # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py )
16    temp = open( filename, "U" )
17    magic_check = temp.read( 2 )
18    temp.close()
19    if magic_check != gzip_magic:
20        return False
21    return True
22
23def __main__():
24    filename = sys.argv[1]
25    try:
26        max_file_size = int( sys.argv[2] )
27    except:
28        max_file_size = 0
29    params = {}
30    for line in open( filename, 'r' ):
31        try:
32            line = line.strip()
33            fields = line.split( '\t' )
34            params[ fields[0] ] = fields[1]
35        except:
36            continue
37    URL = params.get( 'URL', None )
38    if not URL:
39        open( filename, 'w' ).write( "" )
40        stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
41    URL_method = params.get( 'URL_method', None )
42    CHUNK_SIZE = 2**20 # 1Mb
43    # The Python support for fetching resources from the web is layered. urllib uses the httplib
44    # library, which in turn uses the socket library.  As of Python 2.3 you can specify how long
45    # a socket should wait for a response before timing out. By default the socket module has no
46    # timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
47    # levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
48    # doing the following.
49    socket.setdefaulttimeout( 600 )
50    # The following calls to urllib2.urlopen() will use the above default timeout
51    try:
52        if not URL_method or URL_method == 'get':
53            page = urllib.urlopen( URL )
54        elif URL_method == 'post':
55            page = urllib.urlopen( URL, urllib.urlencode( params ) )
56    except Exception, e:
57        stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
58    if max_file_size:
59        file_size = int( page.info().get( 'Content-Length', 0 ) )
60        if file_size > max_file_size:
61            stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
62    out = open( filename, 'w' )
63    while 1:
64        chunk = page.read( CHUNK_SIZE )
65        if not chunk:
66            break
67        out.write( chunk )
68    out.close()
69    if check_gzip( filename ):
70        # TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py )
71        fd, uncompressed = tempfile.mkstemp()
72        gzipped_file = gzip.GzipFile( filename )
73        while 1:
74            try:
75                chunk = gzipped_file.read( CHUNK_SIZE )
76            except IOError:
77                os.close( fd )
78                os.remove( uncompressed )
79                gzipped_file.close()
80                stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' )
81            if not chunk:
82                break
83            os.write( fd, chunk )
84        os.close( fd )
85        gzipped_file.close()
86        # Replace the gzipped file with the uncompressed file
87        shutil.move( uncompressed, filename )       
88   
89if __name__ == "__main__": __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。