1 | """ |
---|
2 | Provides mapping between extensions and datatypes, mime-types, etc. |
---|
3 | """ |
---|
4 | import os, tempfile |
---|
5 | import logging |
---|
6 | import data, tabular, interval, images, sequence, qualityscore, genetics, xml, coverage, tracks, chrominfo, binary, assembly, ngsindex |
---|
7 | import galaxy.util |
---|
8 | from galaxy.util.odict import odict |
---|
9 | from display_applications.application import DisplayApplication |
---|
10 | |
---|
11 | class ConfigurationError( Exception ): |
---|
12 | pass |
---|
13 | |
---|
14 | class Registry( object ): |
---|
15 | def __init__( self, root_dir=None, config=None ): |
---|
16 | self.log = logging.getLogger(__name__) |
---|
17 | self.log.addHandler( logging.NullHandler() ) |
---|
18 | self.datatypes_by_extension = {} |
---|
19 | self.mimetypes_by_extension = {} |
---|
20 | self.datatype_converters = odict() |
---|
21 | self.datatype_indexers = odict() |
---|
22 | self.converters = [] |
---|
23 | self.converter_deps = {} |
---|
24 | self.available_tracks = [] |
---|
25 | self.set_external_metadata_tool = None |
---|
26 | self.indexers = [] |
---|
27 | self.sniff_order = [] |
---|
28 | self.upload_file_formats = [] |
---|
29 | self.display_applications = odict() #map a display application id to a display application |
---|
30 | inherit_display_application_by_class = [] |
---|
31 | if root_dir and config: |
---|
32 | # Parse datatypes_conf.xml |
---|
33 | tree = galaxy.util.parse_xml( config ) |
---|
34 | root = tree.getroot() |
---|
35 | # Load datatypes and converters from config |
---|
36 | self.log.debug( 'Loading datatypes from %s' % config ) |
---|
37 | registration = root.find( 'registration' ) |
---|
38 | self.datatype_converters_path = os.path.join( root_dir, registration.get( 'converters_path', 'lib/galaxy/datatypes/converters' ) ) |
---|
39 | self.datatype_indexers_path = os.path.join( root_dir, registration.get( 'indexers_path', 'lib/galaxy/datatypes/indexers' ) ) |
---|
40 | self.display_applications_path = os.path.join( root_dir, registration.get( 'display_path', 'display_applications' ) ) |
---|
41 | if not os.path.isdir( self.datatype_converters_path ): |
---|
42 | raise ConfigurationError( "Directory does not exist: %s" % self.datatype_converters_path ) |
---|
43 | if not os.path.isdir( self.datatype_indexers_path ): |
---|
44 | raise ConfigurationError( "Directory does not exist: %s" % self.datatype_indexers_path ) |
---|
45 | for elem in registration.findall( 'datatype' ): |
---|
46 | try: |
---|
47 | extension = elem.get( 'extension', None ) |
---|
48 | dtype = elem.get( 'type', None ) |
---|
49 | mimetype = elem.get( 'mimetype', None ) |
---|
50 | display_in_upload = elem.get( 'display_in_upload', False ) |
---|
51 | if extension and dtype: |
---|
52 | fields = dtype.split( ':' ) |
---|
53 | datatype_module = fields[0] |
---|
54 | datatype_class = fields[1] |
---|
55 | fields = datatype_module.split( '.' ) |
---|
56 | module = __import__( fields.pop(0) ) |
---|
57 | for mod in fields: |
---|
58 | module = getattr( module, mod ) |
---|
59 | self.datatypes_by_extension[extension] = getattr( module, datatype_class )() |
---|
60 | if mimetype is None: |
---|
61 | # Use default mime type as per datatype spec |
---|
62 | mimetype = self.datatypes_by_extension[extension].get_mime() |
---|
63 | self.mimetypes_by_extension[extension] = mimetype |
---|
64 | if hasattr( getattr( module, datatype_class ), "get_track_type" ): |
---|
65 | self.available_tracks.append( extension ) |
---|
66 | if display_in_upload: |
---|
67 | self.upload_file_formats.append( extension ) |
---|
68 | #max file size cut off for setting optional metadata |
---|
69 | self.datatypes_by_extension[extension].max_optional_metadata_filesize = elem.get( 'max_optional_metadata_filesize', None ) |
---|
70 | for converter in elem.findall( 'converter' ): |
---|
71 | # Build the list of datatype converters which will later be loaded |
---|
72 | # into the calling app's toolbox. |
---|
73 | converter_config = converter.get( 'file', None ) |
---|
74 | target_datatype = converter.get( 'target_datatype', None ) |
---|
75 | depends_on = converter.get( 'depends_on', None ) |
---|
76 | if depends_on and target_datatype: |
---|
77 | if extension not in self.converter_deps: |
---|
78 | self.converter_deps[extension] = {} |
---|
79 | self.converter_deps[extension][target_datatype] = depends_on.split(',') |
---|
80 | if converter_config and target_datatype: |
---|
81 | self.converters.append( ( converter_config, extension, target_datatype ) ) |
---|
82 | for indexer in elem.findall( 'indexer' ): |
---|
83 | # Build the list of datatype indexers for track building |
---|
84 | indexer_config = indexer.get( 'file', None ) |
---|
85 | if indexer_config: |
---|
86 | self.indexers.append( (indexer_config, extension) ) |
---|
87 | for composite_file in elem.findall( 'composite_file' ): |
---|
88 | # add composite files |
---|
89 | name = composite_file.get( 'name', None ) |
---|
90 | if name is None: |
---|
91 | self.log.warning( "You must provide a name for your composite_file (%s)." % composite_file ) |
---|
92 | optional = composite_file.get( 'optional', False ) |
---|
93 | mimetype = composite_file.get( 'mimetype', None ) |
---|
94 | self.datatypes_by_extension[extension].add_composite_file( name, optional=optional, mimetype=mimetype ) |
---|
95 | for display_app in elem.findall( 'display' ): |
---|
96 | display_file = os.path.join( self.display_applications_path, display_app.get( 'file', None ) ) |
---|
97 | try: |
---|
98 | inherit = galaxy.util.string_as_bool( display_app.get( 'inherit', 'False' ) ) |
---|
99 | display_app = DisplayApplication.from_file( display_file, self ) |
---|
100 | if display_app: |
---|
101 | if display_app.id in self.display_applications: |
---|
102 | #if we already loaded this display application, we'll use the first one again |
---|
103 | display_app = self.display_applications[ display_app.id ] |
---|
104 | self.log.debug( "Loaded display application '%s' for datatype '%s', inherit=%s" % ( display_app.id, extension, inherit ) ) |
---|
105 | self.display_applications[ display_app.id ] = display_app #Display app by id |
---|
106 | self.datatypes_by_extension[ extension ].add_display_application( display_app ) |
---|
107 | if inherit and ( self.datatypes_by_extension[extension], display_app ) not in inherit_display_application_by_class: |
---|
108 | #subclass inheritance will need to wait until all datatypes have been loaded |
---|
109 | inherit_display_application_by_class.append( ( self.datatypes_by_extension[extension], display_app ) ) |
---|
110 | except: |
---|
111 | self.log.exception( "error reading display application from path: %s" % display_file ) |
---|
112 | except Exception, e: |
---|
113 | self.log.warning( 'Error loading datatype "%s", problem: %s' % ( extension, str( e ) ) ) |
---|
114 | # Handle display_application subclass inheritance here: |
---|
115 | for ext, d_type1 in self.datatypes_by_extension.iteritems(): |
---|
116 | for d_type2, display_app in inherit_display_application_by_class: |
---|
117 | current_app = d_type1.get_display_application( display_app.id, None ) |
---|
118 | if current_app is None and isinstance( d_type1, type( d_type2 ) ): |
---|
119 | d_type1.add_display_application( display_app ) |
---|
120 | # Load datatype sniffers from the config |
---|
121 | sniff_order = [] |
---|
122 | sniffers = root.find( 'sniffers' ) |
---|
123 | for elem in sniffers.findall( 'sniffer' ): |
---|
124 | dtype = elem.get( 'type', None ) |
---|
125 | if dtype: |
---|
126 | sniff_order.append( dtype ) |
---|
127 | for dtype in sniff_order: |
---|
128 | try: |
---|
129 | fields = dtype.split( ":" ) |
---|
130 | datatype_module = fields[0] |
---|
131 | datatype_class = fields[1] |
---|
132 | fields = datatype_module.split( "." ) |
---|
133 | module = __import__( fields.pop(0) ) |
---|
134 | for mod in fields: |
---|
135 | module = getattr( module, mod ) |
---|
136 | aclass = getattr( module, datatype_class )() |
---|
137 | included = False |
---|
138 | for atype in self.sniff_order: |
---|
139 | if not issubclass( atype.__class__, aclass.__class__ ) and isinstance( atype, aclass.__class__ ): |
---|
140 | included = True |
---|
141 | break |
---|
142 | if not included: |
---|
143 | self.sniff_order.append( aclass ) |
---|
144 | self.log.debug( 'Loaded sniffer for datatype: %s' % dtype ) |
---|
145 | except Exception, exc: |
---|
146 | self.log.warning( 'Error appending datatype %s to sniff_order, problem: %s' % ( dtype, str( exc ) ) ) |
---|
147 | #default values |
---|
148 | if len(self.datatypes_by_extension) < 1: |
---|
149 | self.datatypes_by_extension = { |
---|
150 | 'ab1' : binary.Ab1(), |
---|
151 | 'axt' : sequence.Axt(), |
---|
152 | 'bam' : binary.Bam(), |
---|
153 | 'bed' : interval.Bed(), |
---|
154 | 'blastxml' : xml.BlastXml(), |
---|
155 | 'coverage' : coverage.LastzCoverage(), |
---|
156 | 'customtrack' : interval.CustomTrack(), |
---|
157 | 'csfasta' : sequence.csFasta(), |
---|
158 | 'fasta' : sequence.Fasta(), |
---|
159 | 'fastq' : sequence.Fastq(), |
---|
160 | 'fastqsanger' : sequence.FastqSanger(), |
---|
161 | 'gtf' : interval.Gtf(), |
---|
162 | 'gff' : interval.Gff(), |
---|
163 | 'gff3' : interval.Gff3(), |
---|
164 | 'genetrack' : tracks.GeneTrack(), |
---|
165 | 'interval' : interval.Interval(), |
---|
166 | 'laj' : images.Laj(), |
---|
167 | 'lav' : sequence.Lav(), |
---|
168 | 'maf' : sequence.Maf(), |
---|
169 | 'pileup' : tabular.Pileup(), |
---|
170 | 'qualsolid' : qualityscore.QualityScoreSOLiD(), |
---|
171 | 'qualsolexa' : qualityscore.QualityScoreSolexa(), |
---|
172 | 'qual454' : qualityscore.QualityScore454(), |
---|
173 | 'sam' : tabular.Sam(), |
---|
174 | 'scf' : binary.Scf(), |
---|
175 | 'sff' : binary.Sff(), |
---|
176 | 'tabular' : tabular.Tabular(), |
---|
177 | 'taxonomy' : tabular.Taxonomy(), |
---|
178 | 'txt' : data.Text(), |
---|
179 | 'wig' : interval.Wiggle() |
---|
180 | } |
---|
181 | self.mimetypes_by_extension = { |
---|
182 | 'ab1' : 'application/octet-stream', |
---|
183 | 'axt' : 'text/plain', |
---|
184 | 'bam' : 'application/octet-stream', |
---|
185 | 'bed' : 'text/plain', |
---|
186 | 'blastxml' : 'text/plain', |
---|
187 | 'customtrack' : 'text/plain', |
---|
188 | 'csfasta' : 'text/plain', |
---|
189 | 'fasta' : 'text/plain', |
---|
190 | 'fastq' : 'text/plain', |
---|
191 | 'fastqsanger' : 'text/plain', |
---|
192 | 'gtf' : 'text/plain', |
---|
193 | 'gff' : 'text/plain', |
---|
194 | 'gff3' : 'text/plain', |
---|
195 | 'interval' : 'text/plain', |
---|
196 | 'laj' : 'text/plain', |
---|
197 | 'lav' : 'text/plain', |
---|
198 | 'maf' : 'text/plain', |
---|
199 | 'pileup' : 'text/plain', |
---|
200 | 'qualsolid' : 'text/plain', |
---|
201 | 'qualsolexa' : 'text/plain', |
---|
202 | 'qual454' : 'text/plain', |
---|
203 | 'sam' : 'text/plain', |
---|
204 | 'scf' : 'application/octet-stream', |
---|
205 | 'sff' : 'application/octet-stream', |
---|
206 | 'tabular' : 'text/plain', |
---|
207 | 'taxonomy' : 'text/plain', |
---|
208 | 'txt' : 'text/plain', |
---|
209 | 'wig' : 'text/plain' |
---|
210 | } |
---|
211 | # Default values - the order in which we attempt to determine data types is critical |
---|
212 | # because some formats are much more flexibly defined than others. |
---|
213 | if len(self.sniff_order) < 1: |
---|
214 | self.sniff_order = [ |
---|
215 | binary.Bam(), |
---|
216 | binary.Sff(), |
---|
217 | xml.BlastXml(), |
---|
218 | sequence.Maf(), |
---|
219 | sequence.Lav(), |
---|
220 | sequence.csFasta(), |
---|
221 | qualityscore.QualityScoreSOLiD(), |
---|
222 | qualityscore.QualityScore454(), |
---|
223 | sequence.Fasta(), |
---|
224 | sequence.Fastq(), |
---|
225 | interval.Wiggle(), |
---|
226 | images.Html(), |
---|
227 | sequence.Axt(), |
---|
228 | interval.Bed(), |
---|
229 | interval.CustomTrack(), |
---|
230 | interval.Gtf(), |
---|
231 | interval.Gff(), |
---|
232 | interval.Gff3(), |
---|
233 | tabular.Pileup(), |
---|
234 | interval.Interval(), |
---|
235 | tabular.Sam() |
---|
236 | ] |
---|
237 | def append_to_sniff_order(): |
---|
238 | # Just in case any supported data types are not included in the config's sniff_order section. |
---|
239 | for ext in self.datatypes_by_extension: |
---|
240 | datatype = self.datatypes_by_extension[ext] |
---|
241 | included = False |
---|
242 | for atype in self.sniff_order: |
---|
243 | if isinstance(atype, datatype.__class__): |
---|
244 | included = True |
---|
245 | break |
---|
246 | if not included: |
---|
247 | self.sniff_order.append(datatype) |
---|
248 | append_to_sniff_order() |
---|
249 | |
---|
250 | def get_available_tracks(self): |
---|
251 | return self.available_tracks |
---|
252 | |
---|
253 | def get_mimetype_by_extension(self, ext, default = 'application/octet-stream' ): |
---|
254 | """Returns a mimetype based on an extension""" |
---|
255 | try: |
---|
256 | mimetype = self.mimetypes_by_extension[ext] |
---|
257 | except KeyError: |
---|
258 | #datatype was never declared |
---|
259 | mimetype = default |
---|
260 | self.log.warning('unknown mimetype in data factory %s' % ext) |
---|
261 | return mimetype |
---|
262 | |
---|
263 | def get_datatype_by_extension(self, ext ): |
---|
264 | """Returns a datatype based on an extension""" |
---|
265 | try: |
---|
266 | builder = self.datatypes_by_extension[ext] |
---|
267 | except KeyError: |
---|
268 | builder = data.Text() |
---|
269 | return builder |
---|
270 | |
---|
271 | def change_datatype(self, data, ext, set_meta = True ): |
---|
272 | data.extension = ext |
---|
273 | # call init_meta and copy metadata from itself. The datatype |
---|
274 | # being converted *to* will handle any metadata copying and |
---|
275 | # initialization. |
---|
276 | if data.has_data(): |
---|
277 | data.set_size() |
---|
278 | data.init_meta( copy_from=data ) |
---|
279 | if set_meta: |
---|
280 | #metadata is being set internally |
---|
281 | data.set_meta( overwrite = False ) |
---|
282 | data.set_peek() |
---|
283 | return data |
---|
284 | |
---|
285 | def old_change_datatype(self, data, ext): |
---|
286 | """Creates and returns a new datatype based on an existing data and an extension""" |
---|
287 | newdata = factory(ext)(id=data.id) |
---|
288 | for key, value in data.__dict__.items(): |
---|
289 | setattr(newdata, key, value) |
---|
290 | newdata.ext = ext |
---|
291 | return newdata |
---|
292 | |
---|
293 | def load_datatype_converters( self, toolbox ): |
---|
294 | """Adds datatype converters from self.converters to the calling app's toolbox""" |
---|
295 | for elem in self.converters: |
---|
296 | tool_config = elem[0] |
---|
297 | source_datatype = elem[1] |
---|
298 | target_datatype = elem[2] |
---|
299 | converter_path = os.path.join( self.datatype_converters_path, tool_config ) |
---|
300 | try: |
---|
301 | converter = toolbox.load_tool( converter_path ) |
---|
302 | toolbox.tools_by_id[converter.id] = converter |
---|
303 | if source_datatype not in self.datatype_converters: |
---|
304 | self.datatype_converters[source_datatype] = odict() |
---|
305 | self.datatype_converters[source_datatype][target_datatype] = converter |
---|
306 | self.log.debug( "Loaded converter: %s", converter.id ) |
---|
307 | except: |
---|
308 | self.log.exception( "error reading converter from path: %s" % converter_path ) |
---|
309 | |
---|
310 | def load_external_metadata_tool( self, toolbox ): |
---|
311 | """Adds a tool which is used to set external metadata""" |
---|
312 | #we need to be able to add a job to the queue to set metadata. The queue will currently only accept jobs with an associated tool. |
---|
313 | #We'll create a special tool to be used for Auto-Detecting metadata; this is less than ideal, but effective |
---|
314 | #Properly building a tool without relying on parsing an XML file is near impossible...so we'll create a temporary file |
---|
315 | tool_xml_text = """ |
---|
316 | <tool id="__SET_METADATA__" name="Set External Metadata" version="1.0.1" tool_type="set_metadata"> |
---|
317 | <type class="SetMetadataTool" module="galaxy.tools"/> |
---|
318 | <action module="galaxy.tools.actions.metadata" class="SetMetadataToolAction"/> |
---|
319 | <command>$__SET_EXTERNAL_METADATA_COMMAND_LINE__</command> |
---|
320 | <inputs> |
---|
321 | <param format="data" name="input1" type="data" label="File to set metadata on."/> |
---|
322 | <param name="__ORIGINAL_DATASET_STATE__" type="hidden" value=""/> |
---|
323 | <param name="__SET_EXTERNAL_METADATA_COMMAND_LINE__" type="hidden" value=""/> |
---|
324 | </inputs> |
---|
325 | </tool> |
---|
326 | """ |
---|
327 | tmp_name = tempfile.NamedTemporaryFile() |
---|
328 | tmp_name.write( tool_xml_text ) |
---|
329 | tmp_name.flush() |
---|
330 | set_meta_tool = toolbox.load_tool( tmp_name.name ) |
---|
331 | toolbox.tools_by_id[ set_meta_tool.id ] = set_meta_tool |
---|
332 | self.set_external_metadata_tool = set_meta_tool |
---|
333 | self.log.debug( "Loaded external metadata tool: %s", self.set_external_metadata_tool.id ) |
---|
334 | |
---|
335 | def load_datatype_indexers( self, toolbox ): |
---|
336 | """Adds indexers from self.indexers to the toolbox from app""" |
---|
337 | for elem in self.indexers: |
---|
338 | tool_config = elem[0] |
---|
339 | datatype = elem[1] |
---|
340 | indexer = toolbox.load_tool( os.path.join( self.datatype_indexers_path, tool_config ) ) |
---|
341 | toolbox.tools_by_id[indexer.id] = indexer |
---|
342 | self.datatype_indexers[datatype] = indexer |
---|
343 | self.log.debug( "Loaded indexer: %s", indexer.id ) |
---|
344 | |
---|
345 | def get_converters_by_datatype(self, ext): |
---|
346 | """Returns available converters by source type""" |
---|
347 | converters = odict() |
---|
348 | source_datatype = type(self.get_datatype_by_extension(ext)) |
---|
349 | for ext2, dict in self.datatype_converters.items(): |
---|
350 | converter_datatype = type(self.get_datatype_by_extension(ext2)) |
---|
351 | if issubclass(source_datatype, converter_datatype): |
---|
352 | converters.update(dict) |
---|
353 | #Ensure ext-level converters are present |
---|
354 | if ext in self.datatype_converters.keys(): |
---|
355 | converters.update(self.datatype_converters[ext]) |
---|
356 | return converters |
---|
357 | |
---|
358 | def get_indexers_by_datatype( self, ext ): |
---|
359 | """Returns indexers based on datatype""" |
---|
360 | class_chain = list() |
---|
361 | source_datatype = type(self.get_datatype_by_extension(ext)) |
---|
362 | for ext_spec in self.datatype_indexers.keys(): |
---|
363 | datatype = type(self.get_datatype_by_extension(ext_spec)) |
---|
364 | if issubclass( source_datatype, datatype ): |
---|
365 | class_chain.append( ext_spec ) |
---|
366 | # Prioritize based on class chain |
---|
367 | ext2type = lambda x: self.get_datatype_by_extension(x) |
---|
368 | class_chain = sorted(class_chain, lambda x,y: issubclass(ext2type(x),ext2type(y)) and -1 or 1) |
---|
369 | return [self.datatype_indexers[x] for x in class_chain] |
---|
370 | |
---|
371 | def get_converter_by_target_type(self, source_ext, target_ext): |
---|
372 | """Returns a converter based on source and target datatypes""" |
---|
373 | converters = self.get_converters_by_datatype(source_ext) |
---|
374 | if target_ext in converters.keys(): |
---|
375 | return converters[target_ext] |
---|
376 | return None |
---|
377 | |
---|
378 | def find_conversion_destination_for_dataset_by_extensions( self, dataset, accepted_formats, converter_safe = True ): |
---|
379 | """Returns ( target_ext, existing converted dataset )""" |
---|
380 | for convert_ext in self.get_converters_by_datatype( dataset.ext ): |
---|
381 | if isinstance( self.get_datatype_by_extension( convert_ext ), accepted_formats ): |
---|
382 | dataset = dataset.get_converted_files_by_type( convert_ext ) |
---|
383 | if dataset: |
---|
384 | ret_data = dataset |
---|
385 | elif not converter_safe: |
---|
386 | continue |
---|
387 | else: |
---|
388 | ret_data = None |
---|
389 | return ( convert_ext, ret_data ) |
---|
390 | return ( None, None ) |
---|
391 | |
---|
392 | def get_composite_extensions( self ): |
---|
393 | return [ ext for ( ext, d_type ) in self.datatypes_by_extension.iteritems() if d_type.composite_type is not None ] |
---|
394 | |
---|
395 | def get_upload_metadata_params( self, context, group, tool ): |
---|
396 | """Returns dict of case value:inputs for metadata conditional for upload tool""" |
---|
397 | rval = {} |
---|
398 | for ext, d_type in self.datatypes_by_extension.iteritems(): |
---|
399 | inputs = [] |
---|
400 | for meta_name, meta_spec in d_type.metadata_spec.iteritems(): |
---|
401 | if meta_spec.set_in_upload: |
---|
402 | help_txt = meta_spec.desc |
---|
403 | if not help_txt or help_txt == meta_name: |
---|
404 | help_txt = "" |
---|
405 | inputs.append( '<param type="text" name="%s" label="Set metadata value for "%s"" value="%s" help="%s"/>' % ( meta_name, meta_name, meta_spec.default, help_txt ) ) |
---|
406 | rval[ ext ] = "\n".join( inputs ) |
---|
407 | if 'auto' not in rval and 'txt' in rval: #need to manually add 'auto' datatype |
---|
408 | rval[ 'auto' ] = rval[ 'txt' ] |
---|
409 | return rval |
---|
410 | |
---|