Context Navigation

data_source.py

リビジョン 2, 3.6 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

行番号
1	#!/usr/bin/env python
2	# Retrieves data from external data source applications and stores in a dataset file.
3	# Data source application parameters are temporarily stored in the dataset file.
4	import socket, urllib, sys, os, gzip, tempfile, shutil
5	from galaxy import eggs
6	from galaxy.util import gzip_magic
7
8	assert sys.version_info[:2] >= ( 2, 4 )
9
10	def stop_err( msg ):
11	sys.stderr.write( msg )
12	sys.exit()
13
14	def check_gzip( filename ):
15	# TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py )
16	temp = open( filename, "U" )
17	magic_check = temp.read( 2 )
18	temp.close()
19	if magic_check != gzip_magic:
20	return False
21	return True
22
23	def __main__():
24	filename = sys.argv[1]
25	try:
26	max_file_size = int( sys.argv[2] )
27	except:
28	max_file_size = 0
29	params = {}
30	for line in open( filename, 'r' ):
31	try:
32	line = line.strip()
33	fields = line.split( '\t' )
34	params[ fields[0] ] = fields[1]
35	except:
36	continue
37	URL = params.get( 'URL', None )
38	if not URL:
39	open( filename, 'w' ).write( "" )
40	stop_err( 'The remote data source application has not sent back a URL parameter in the request.' )
41	URL_method = params.get( 'URL_method', None )
42	CHUNK_SIZE = 2**20 # 1Mb
43	# The Python support for fetching resources from the web is layered. urllib uses the httplib
44	# library, which in turn uses the socket library. As of Python 2.3 you can specify how long
45	# a socket should wait for a response before timing out. By default the socket module has no
46	# timeout and can hang. Currently, the socket timeout is not exposed at the httplib or urllib2
47	# levels. However, you can set the default timeout ( in seconds ) globally for all sockets by
48	# doing the following.
49	socket.setdefaulttimeout( 600 )
50	# The following calls to urllib2.urlopen() will use the above default timeout
51	try:
52	if not URL_method or URL_method == 'get':
53	page = urllib.urlopen( URL )
54	elif URL_method == 'post':
55	page = urllib.urlopen( URL, urllib.urlencode( params ) )
56	except Exception, e:
57	stop_err( 'The remote data source application may be off line, please try again later. Error: %s' % str( e ) )
58	if max_file_size:
59	file_size = int( page.info().get( 'Content-Length', 0 ) )
60	if file_size > max_file_size:
61	stop_err( 'The size of the data (%d bytes) you have requested exceeds the maximum allowed (%d bytes) on this server.' % ( file_size, max_file_size ) )
62	out = open( filename, 'w' )
63	while 1:
64	chunk = page.read( CHUNK_SIZE )
65	if not chunk:
66	break
67	out.write( chunk )
68	out.close()
69	if check_gzip( filename ):
70	# TODO: This needs to check for BAM files since they are compressed and must remain so ( see upload.py )
71	fd, uncompressed = tempfile.mkstemp()
72	gzipped_file = gzip.GzipFile( filename )
73	while 1:
74	try:
75	chunk = gzipped_file.read( CHUNK_SIZE )
76	except IOError:
77	os.close( fd )
78	os.remove( uncompressed )
79	gzipped_file.close()
80	stop_err( 'Problem uncompressing gzipped data, please try retrieving the data uncompressed.' )
81	if not chunk:
82	break
83	os.write( fd, chunk )
84	os.close( fd )
85	gzipped_file.close()
86	# Replace the gzipped file with the uncompressed file
87	shutil.move( uncompressed, filename )
88
89	if __name__ == "__main__": __main__()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/data_source/data_source.py

異なるフォーマットでダウンロード: