root/galaxy-central/lib/galaxy/tools/__init__.py @ 2

リビジョン 2, 99.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1"""
2Classes encapsulating galaxy tools and tool configuration.
3"""
4import pkg_resources;
5
6pkg_resources.require( "simplejson" )
7
8import logging, os, string, sys, tempfile, glob, shutil, types, urllib
9import simplejson
10import binascii
11from UserDict import DictMixin
12from galaxy.util.odict import odict
13from galaxy.util.bunch import Bunch
14from galaxy.util.template import fill_template
15from galaxy import util, jobs, model
16from elementtree import ElementTree
17from parameters import *
18from parameters.grouping import *
19from parameters.output import ToolOutputActionGroup
20from parameters.validation import LateValidationError
21from parameters.input_translation import ToolInputTranslator
22from galaxy.util.expressions import ExpressionContext
23from galaxy.tools.test import ToolTestBuilder
24from galaxy.tools.actions import DefaultToolAction
25from galaxy.tools.deps import DependencyManager
26from galaxy.model import directory_hash_id
27from galaxy.util.none_like import NoneDataset
28from galaxy.datatypes import sniff
29from cgi import FieldStorage
30from galaxy.util.hash_util import *
31
32log = logging.getLogger( __name__ )
33
34class ToolNotFoundException( Exception ):
35    pass
36
37class ToolBox( object ):
38    """
39    Container for a collection of tools
40    """
41
42    def __init__( self, config_filename, tool_root_dir, app ):
43        """
44        Create a toolbox from the config file names by `config_filename`,
45        using `tool_root_directory` as the base directory for finding
46        individual tool config files.
47        """
48        self.tools_by_id = {}
49        self.workflows_by_id = {}
50        self.tool_panel = odict()
51        self.tool_root_dir = tool_root_dir
52        self.app = app
53        self.init_dependency_manager()
54        try:
55            self.init_tools( config_filename )
56        except:
57            log.exception( "ToolBox error reading %s", config_filename )
58
59    def init_tools( self, config_filename ):
60        """
61        Read the configuration file and load each tool.
62        The following tags are currently supported:
63        <toolbox>
64            <tool file="data_source/upload.xml"/>            # tools outside sections
65            <label text="Basic Tools" id="basic_tools" />    # labels outside sections
66            <workflow id="529fd61ab1c6cc36" />               # workflows outside sections
67            <section name="Get Data" id="getext">            # sections
68                <tool file="data_source/biomart.xml" />      # tools inside sections
69                <label text="In Section" id="in_section" />  # labels inside sections
70                <workflow id="adb5f5c93f827949" />           # workflows inside sections
71            </section>
72        </toolbox>
73        """
74        def load_tool( elem, panel_dict ):
75            try:
76                path = elem.get( "file" )
77                tool = self.load_tool( os.path.join( self.tool_root_dir, path ) )
78                self.tools_by_id[ tool.id ] = tool
79                key = 'tool_' + tool.id
80                panel_dict[ key ] = tool
81                log.debug( "Loaded tool: %s %s" % ( tool.id, tool.version ) )
82            except:
83                log.exception( "error reading tool from path: %s" % path )
84        def load_workflow( elem, panel_dict ):
85            try:
86                # TODO: should id be encoded?
87                workflow_id = elem.get( 'id' )
88                workflow = self.load_workflow( workflow_id )
89                self.workflows_by_id[ workflow_id ] = workflow
90                key = 'workflow_' + workflow_id
91                panel_dict[ key ] = workflow
92                log.debug( "Loaded workflow: %s %s" % ( workflow_id, workflow.name ) )
93            except:
94                log.exception( "error loading workflow: %s" % workflow_id )
95        def load_label( elem, panel_dict ):
96            label = ToolSectionLabel( elem )
97            key = 'label_' + label.id
98            panel_dict[ key ] = label
99        def load_section( elem, panel_dict ):
100            section = ToolSection( elem )
101            log.debug( "Loading section: %s" % section.name )
102            for section_elem in elem:
103                if section_elem.tag == 'tool':
104                    load_tool( section_elem, section.elems )
105                elif section_elem.tag == 'workflow':
106                    load_workflow( section_elem, section.elems )
107                elif section_elem.tag == 'label':
108                    load_label( section_elem, section.elems )
109            key = 'section_' + section.id
110            panel_dict[ key ] = section
111               
112        log.info("parsing the tool configuration")
113        tree = util.parse_xml( config_filename )
114        root = tree.getroot()
115        for elem in root:
116            if elem.tag == 'tool':
117                load_tool( elem, self.tool_panel )
118            elif elem.tag == 'workflow':
119                load_workflow( elem, self.tool_panel )
120            elif elem.tag == 'section' :
121                load_section( elem, self.tool_panel )
122            elif elem.tag == 'label':
123                load_label( elem, self.tool_panel )
124       
125    def load_tool( self, config_file ):
126        """
127        Load a single tool from the file named by `config_file` and return
128        an instance of `Tool`.
129        """
130        # Parse XML configuration file and get the root element
131        tree = util.parse_xml( config_file )
132        root = tree.getroot()
133        # Allow specifying a different tool subclass to instantiate
134        if root.find( "type" ) is not None:
135            type_elem = root.find( "type" )
136            module = type_elem.get( 'module', 'galaxy.tools' )
137            cls = type_elem.get( 'class' )
138            mod = __import__( module, globals(), locals(), [cls])
139            ToolClass = getattr( mod, cls )
140        elif root.get( 'tool_type', None ) is not None:
141            ToolClass = tool_types.get( root.get( 'tool_type' ) )
142        else:
143            ToolClass = Tool
144        return ToolClass( config_file, root, self.app )
145       
146    def reload( self, tool_id ):
147        """
148        Attempt to reload the tool identified by 'tool_id', if successful
149        replace the old tool.
150        """
151        if tool_id not in self.tools_by_id:
152            raise ToolNotFoundException( "No tool with id %s" % tool_id )
153        old_tool = self.tools_by_id[ tool_id ]
154        new_tool = self.load_tool( old_tool.config_file )
155        # Replace old_tool with new_tool in self.tool_panel
156        tool_key = 'tool_' + tool_id
157        for key, val in self.tool_panel.items():
158            if key == tool_key:
159                self.tool_panel[ key ] = new_tool
160                break
161            elif key.startswith( 'section' ):
162                section = val
163                for section_key, section_val in section.elems.items():
164                    if section_key == tool_key:
165                        self.tool_panel[ key ].elems[ section_key ] = new_tool
166                        break
167        self.tools_by_id[ tool_id ] = new_tool
168        log.debug( "Reloaded tool %s %s" %( old_tool.id, old_tool.version ) )
169
170    def load_workflow( self, workflow_id ):
171        """
172        Return an instance of 'Workflow' identified by `id`,
173        which is encoded in the tool panel.
174        """
175        id = self.app.security.decode_id( workflow_id )
176        stored = self.app.model.context.query( self.app.model.StoredWorkflow ).get( id )
177        return stored.latest_workflow
178
179    def init_dependency_manager( self ):
180        self.dependency_manager = None
181        if self.app.config.use_tool_dependencies:
182            self.dependency_manager = DependencyManager( [ self.app.config.tool_dependency_dir ] )
183
184class ToolSection( object ):
185    """
186    A group of tools with similar type/purpose that will be displayed as a
187    group in the user interface.
188    """
189    def __init__( self, elem ):
190        self.name = elem.get( "name" )
191        self.id = elem.get( "id" )
192        self.version = elem.get( "version" )
193        self.elems = odict()
194
195class ToolSectionLabel( object ):
196    """
197    A label for a set of tools that can be displayed above groups of tools
198    and sections in the user interface
199    """
200    def __init__( self, elem ):
201        self.text = elem.get( "text" )
202        self.id = elem.get( "id" )
203        self.version = elem.get( "version" )
204
205class DefaultToolState( object ):
206    """
207    Keeps track of the state of a users interaction with a tool between
208    requests. The default tool state keeps track of the current page (for
209    multipage "wizard" tools) and the values of all parameters.
210    """
211    def __init__( self ):
212        self.page = 0
213        self.inputs = None
214    def encode( self, tool, app, secure=True ):
215        """
216        Convert the data to a string
217        """
218        # Convert parameters to a dictionary of strings, and save curent
219        # page in that dict
220        value = params_to_strings( tool.inputs, self.inputs, app )
221        value["__page__"] = self.page
222        value = simplejson.dumps( value )
223        # Make it secure
224        if secure:
225            a = hmac_new( app.config.tool_secret, value )
226            b = binascii.hexlify( value )
227            return "%s:%s" % ( a, b )
228        else:
229            return value
230    def decode( self, value, tool, app, secure=True ):
231        """
232        Restore the state from a string
233        """
234        if secure:
235            # Extract and verify hash
236            a, b = value.split( ":" )
237            value = binascii.unhexlify( b )
238            test = hmac_new( app.config.tool_secret, value )
239            assert a == test
240        # Restore from string
241        values = json_fix( simplejson.loads( value ) )
242        self.page = values.pop( "__page__" )
243        self.inputs = params_from_strings( tool.inputs, values, app, ignore_errors=True )
244
245class ToolOutput( object ):
246    """
247    Represents an output datasets produced by a tool. For backward
248    compatibility this behaves as if it were the tuple:
249      (format, metadata_source, parent) 
250    """
251
252    def __init__( self, name, format=None, metadata_source=None,
253                  parent=None, label=None, filters = None, actions = None ):
254        self.name = name
255        self.format = format
256        self.metadata_source = metadata_source
257        self.parent = parent
258        self.label = label
259        self.filters = filters or []
260        self.actions = actions
261
262    # Tuple emulation
263
264    def __len__( self ):
265        return 3
266
267    def __getitem__( self, index ):
268        if index == 0:
269            return self.format
270        elif index == 1:
271            return self.metadata_source
272        elif index == 2:
273            return self.parent
274        else:
275            raise IndexError( index )
276
277    def __iter__( self ):
278        return iter( ( self.format, self.metadata_source, self.parent ) )
279
280class ToolRequirement( object ):
281    """
282    Represents an external requirement that must be available for the tool to
283    run (for example, a program, package, or library). Requirements can
284    optionally assert a specific version
285    """
286    def __init__( self ):
287        self.name = None
288        self.type = None
289        self.version = None
290
291class Tool:
292    """
293    Represents a computational tool that can be executed through Galaxy.
294    """
295   
296    tool_type = 'default'
297   
298    def __init__( self, config_file, root, app ):
299        """
300        Load a tool from the config named by `config_file`
301        """
302        # Determine the full path of the directory where the tool config is
303        self.config_file = config_file
304        self.tool_dir = os.path.dirname( config_file )
305        self.app = app
306        # Parse XML element containing configuration
307        self.parse( root )
308   
309    @property
310    def sa_session( self ):
311        """
312        Returns a SQLAlchemy session
313        """
314        return self.app.model.context
315   
316    def parse( self, root ):
317        """
318        Read tool configuration from the element `root` and fill in `self`.
319        """
320        # Get the (user visible) name of the tool
321        self.name = root.get( "name" )
322        if not self.name:
323            raise Exception, "Missing tool 'name'"
324        # Get the UNIQUE id for the tool
325        # TODO: can this be generated automatically?
326        self.id = root.get( "id" )
327        if not self.id:
328            raise Exception, "Missing tool 'id'"
329        self.version = root.get( "version" )
330        if not self.version:
331            # For backward compatibility, some tools may not have versions yet.
332            self.version = "1.0.0"
333        # Support multi-byte tools
334        self.is_multi_byte = util.string_as_bool( root.get( "is_multi_byte", False ) )
335        # Force history to fully refresh after job execution for this tool.
336        # Useful i.e. when an indeterminate number of outputs are created by
337        # a tool.
338        self.force_history_refresh = util.string_as_bool( root.get( 'force_history_refresh', 'False' ) )
339        # Load input translator, used by datasource tools to change
340        # names/values of incoming parameters
341        self.input_translator = root.find( "request_param_translation" )
342        if self.input_translator:
343            self.input_translator = ToolInputTranslator.from_element( self.input_translator )
344        # Command line (template). Optional for tools that do not invoke a
345        # local program 
346        command = root.find("command")
347        if command is not None and command.text is not None:
348            self.command = command.text.lstrip() # get rid of leading whitespace
349            interpreter  = command.get("interpreter")
350            if interpreter:
351                # TODO: path munging for cluster/dataset server relocatability
352                executable = self.command.split()[0]
353                abs_executable = os.path.abspath(os.path.join(self.tool_dir, executable))
354                self.command = self.command.replace(executable, abs_executable, 1)
355                self.command = interpreter + " " + self.command
356        else:
357            self.command = ''
358        # Parameters used to build URL for redirection to external app
359        redirect_url_params = root.find( "redirect_url_params" )
360        if redirect_url_params is not None and redirect_url_params.text is not None:
361            # get rid of leading / trailing white space
362            redirect_url_params = redirect_url_params.text.strip()
363            # Replace remaining white space with something we can safely split on later
364            # when we are building the params
365            self.redirect_url_params = redirect_url_params.replace( ' ', '**^**' )
366        else:
367            self.redirect_url_params = ''
368        # Short description of the tool
369        self.description = util.xml_text(root, "description")
370        # Job runner
371        if self.app.config.start_job_runners is None:
372            # Jobs are always local regardless of tool config if no additional
373            # runners are started
374            self.job_runner = "local:///"
375        else:
376            # Set job runner to the cluster default
377            self.job_runner = self.app.config.default_cluster_job_runner
378            for tup in self.app.config.tool_runners:
379                if tup[0] == self.id.lower():
380                    self.job_runner = tup[1]
381                    break
382        # Is this a 'hidden' tool (hidden in tool menu)
383        self.hidden = util.xml_text(root, "hidden")
384        if self.hidden: self.hidden = util.string_as_bool(self.hidden)
385        # Load any tool specific code (optional) Edit: INS 5/29/2007,
386        # allow code files to have access to the individual tool's
387        # "module" if it has one.  Allows us to reuse code files, etc.
388        self.code_namespace = dict()
389        self.hook_map = {}
390        for code_elem in root.findall("code"):
391            for hook_elem in code_elem.findall("hook"):
392                for key, value in hook_elem.items():
393                    # map hook to function
394                    self.hook_map[key]=value
395            file_name = code_elem.get("file")
396            code_path = os.path.join( self.tool_dir, file_name )
397            execfile( code_path, self.code_namespace )
398        # Load any tool specific options (optional)
399        self.options = dict( sanitize=True, refresh=False )
400        for option_elem in root.findall("options"):
401            for option, value in self.options.copy().items():
402                if isinstance(value, type(False)):
403                    self.options[option] = util.string_as_bool(option_elem.get(option, str(value)))
404                else:
405                    self.options[option] = option_elem.get(option, str(value))
406        self.options = Bunch(** self.options)
407        # Parse tool inputs (if there are any required)
408        self.parse_inputs( root )
409        # Parse tool help
410        self.parse_help( root )
411        # Description of outputs produced by an invocation of the tool
412        self.parse_outputs( root )
413        # Any extra generated config files for the tool
414        self.config_files = []
415        conf_parent_elem = root.find("configfiles")
416        if conf_parent_elem:
417            for conf_elem in conf_parent_elem.findall( "configfile" ):
418                name = conf_elem.get( "name" )
419                filename = conf_elem.get( "filename", None )
420                text = conf_elem.text
421                self.config_files.append( ( name, filename, text ) )
422        # Action
423        action_elem = root.find( "action" )
424        if action_elem is None:
425            self.tool_action = DefaultToolAction()
426        else:
427            module = action_elem.get( 'module' )
428            cls = action_elem.get( 'class' )
429            mod = __import__( module, globals(), locals(), [cls])
430            self.tool_action = getattr( mod, cls )()
431        # User interface hints
432        self.uihints = {}
433        uihints_elem = root.find( "uihints" )
434        if uihints_elem is not None:
435            for key, value in uihints_elem.attrib.iteritems():
436                self.uihints[ key ] = value
437        # Tests
438        tests_elem = root.find( "tests" )
439        if tests_elem:
440            try:
441                self.parse_tests( tests_elem )
442            except:
443                log.exception( "Failed to parse tool tests" )
444        else:
445            self.tests = None
446        # Requirements (dependencies)
447        self.requirements = []
448        requirements_elem = root.find( "requirements" )
449        if requirements_elem:
450            self.parse_requirements( requirements_elem )
451        # Determine if this tool can be used in workflows
452        self.is_workflow_compatible = self.check_workflow_compatible()
453           
454    def parse_inputs( self, root ):
455        """
456        Parse the "<inputs>" element and create appropriate `ToolParameter`s.
457        This implementation supports multiple pages and grouping constructs.
458        """
459        # Load parameters (optional)
460        input_elem = root.find("inputs")
461        if input_elem:
462            # Handle properties of the input form
463            self.check_values = util.string_as_bool( input_elem.get("check_values", "true") )
464            self.nginx_upload = util.string_as_bool( input_elem.get( "nginx_upload", "false" ) )
465            self.action = input_elem.get( 'action', '/tool_runner/index' )
466            # If we have an nginx upload, save the action as a tuple instead of
467            # a string. The actual action needs to get url_for run to add any
468            # prefixes, and we want to avoid adding the prefix to the
469            # nginx_upload_path. This logic is handled in the tool_form.mako
470            # template.
471            if self.nginx_upload and self.app.config.nginx_upload_path:
472                if '?' in urllib.unquote_plus( self.action ):
473                    raise Exception( 'URL parameters in a non-default tool action can not be used ' \
474                                     'in conjunction with nginx upload.  Please convert them to ' \
475                                     'hidden POST parameters' )
476                self.action = (self.app.config.nginx_upload_path + '?nginx_redir=',
477                        urllib.unquote_plus(self.action))
478            self.target = input_elem.get( "target", "galaxy_main" )
479            self.method = input_elem.get( "method", "post" )
480            # Parse the actual parameters
481            self.inputs = odict()
482            self.inputs_by_page = list()
483            self.display_by_page = list()
484            enctypes = set()
485            # Handle multiple page case
486            pages = input_elem.findall( "page" )
487            for page in ( pages or [ input_elem ] ):
488                display, inputs = self.parse_input_page( page, enctypes )
489                self.inputs_by_page.append( inputs )
490                self.inputs.update( inputs )
491                self.display_by_page.append( display )
492            self.display = self.display_by_page[0]
493            self.npages = len( self.inputs_by_page )
494            self.last_page = len( self.inputs_by_page ) - 1
495            self.has_multiple_pages = bool( self.last_page )
496            # Determine the needed enctype for the form
497            if len( enctypes ) == 0:
498                self.enctype = "application/x-www-form-urlencoded"
499            elif len( enctypes ) == 1:
500                self.enctype = enctypes.pop()
501            else:
502                raise Exception, "Conflicting required enctypes: %s" % str( enctypes )
503        # Check if the tool either has no parameters or only hidden (and
504        # thus hardcoded) parameters. FIXME: hidden parameters aren't
505        # parameters at all really, and should be passed in a different
506        # way, making this check easier.
507        self.input_required = False
508        for param in self.inputs.values():
509            if not isinstance( param, ( HiddenToolParameter, BaseURLToolParameter ) ):
510                self.input_required = True
511                break
512               
513    def parse_help( self, root ):
514        """
515        Parse the help text for the tool. Formatted in reStructuredText.
516        This implementation supports multiple pages.
517        """
518        # TODO: Allow raw HTML or an external link.
519        self.help = root.find("help")
520        self.help_by_page = list()
521        help_header = ""
522        help_footer = ""
523        if self.help is not None:
524            help_pages = self.help.findall( "page" )
525            help_header = self.help.text
526            try:
527                self.help = util.rst_to_html(self.help.text)
528            except:
529                log.exception( "error in help for tool %s" % self.name )
530            # Multiple help page case
531            if help_pages:
532                for help_page in help_pages:
533                    self.help_by_page.append( help_page.text )
534                    help_footer = help_footer + help_page.tail
535        # Each page has to rendered all-together because of backreferences allowed by rst
536        try:
537            self.help_by_page = [ util.rst_to_html( help_header + x + help_footer )
538                                  for x in self.help_by_page ]
539        except:
540            log.exception( "error in multi-page help for tool %s" % self.name )
541        # Pad out help pages to match npages ... could this be done better?
542        while len( self.help_by_page ) < self.npages:
543            self.help_by_page.append( self.help )
544     
545    def parse_outputs( self, root ):
546        """
547        Parse <outputs> elements and fill in self.outputs (keyed by name)
548        """
549        self.outputs = odict()
550        out_elem = root.find("outputs")
551        if not out_elem:
552            return
553        for data_elem in out_elem.findall("data"):
554            output = ToolOutput( data_elem.get("name") )
555            output.format = data_elem.get("format", "data")
556            output.change_format = data_elem.findall("change_format")
557            output.metadata_source = data_elem.get("metadata_source", "")
558            output.parent = data_elem.get("parent", None)
559            output.label = util.xml_text( data_elem, "label" )
560            output.count = int( data_elem.get("count", 1) )
561            output.filters = data_elem.findall( 'filter' )
562            output.tool = self
563            output.actions = ToolOutputActionGroup( output, data_elem.find( 'actions' ) )
564            self.outputs[ output.name ] = output
565
566    def parse_tests( self, tests_elem ):
567        """
568        Parse any "<test>" elements, create a `ToolTestBuilder` for each and
569        store in `self.tests`.
570        """
571        self.tests = []
572        # Composite datasets need a unique name: each test occurs in a fresh
573        # history, but we'll keep it unique per set of tests
574        composite_data_names_counter = 0
575        for i, test_elem in enumerate( tests_elem.findall( 'test' ) ):
576            name = test_elem.get( 'name', 'Test-%d' % (i+1) )
577            maxseconds = int( test_elem.get( 'maxseconds', '120' ) )
578            test = ToolTestBuilder( self, name, maxseconds )
579            try:
580                for param_elem in test_elem.findall( "param" ):
581                    attrib = dict( param_elem.attrib )
582                    if 'values' in attrib:
583                        value = attrib[ 'values' ].split( ',' )
584                    elif 'value' in attrib:
585                        value = attrib['value']
586                    else:
587                        value = None
588                    attrib['children'] = list( param_elem.getchildren() )
589                    if attrib['children']:
590                        # At this time, we can assume having children only
591                        # occurs on DataToolParameter test items but this could
592                        # change and would cause the below parsing to change
593                        # based upon differences in children items
594                        attrib['metadata'] = []
595                        attrib['composite_data'] = []
596                        attrib['edit_attributes'] = []
597                        # Composite datasets need to be renamed uniquely
598                        composite_data_name = None
599                        for child in attrib['children']:
600                            if child.tag == 'composite_data':
601                                attrib['composite_data'].append( child )
602                                if composite_data_name is None:
603                                    # Generate a unique name; each test uses a
604                                    # fresh history
605                                    composite_data_name = '_COMPOSITE_RENAMED_%i_' \
606                                        % ( composite_data_names_counter )
607                                    composite_data_names_counter += 1
608                            elif child.tag == 'metadata':
609                                attrib['metadata'].append( child )
610                            elif child.tag == 'metadata':
611                                attrib['metadata'].append( child )
612                            elif child.tag == 'edit_attributes':
613                                attrib['edit_attributes'].append( child )
614                        if composite_data_name:
615                            # Composite datasets need implicit renaming;
616                            # inserted at front of list so explicit declarations
617                            # take precedence
618                            attrib['edit_attributes'].insert( 0, { 'type': 'name', 'value': composite_data_name } )
619                    test.add_param( attrib.pop( 'name' ), value, attrib )
620                for output_elem in test_elem.findall( "output" ):
621                    attrib = dict( output_elem.attrib )
622                    name = attrib.pop( 'name', None )
623                    if name is None:
624                        raise Exception( "Test output does not have a 'name'" )
625                    file = attrib.pop( 'file', None )
626                    if file is None:
627                        raise Exception( "Test output does not have a 'file'")
628                    attributes = {}
629                    # Method of comparison
630                    attributes['compare'] = attrib.pop( 'compare', 'diff' ).lower()
631                    # Number of lines to allow to vary in logs (for dates, etc)
632                    attributes['lines_diff'] = int( attrib.pop( 'lines_diff', '0' ) )
633                    # Allow a file size to vary if sim_size compare
634                    attributes['delta'] = int( attrib.pop( 'delta', '10000' ) )
635                    attributes['sort'] = util.string_as_bool( attrib.pop( 'sort', False ) )
636                    attributes['extra_files'] = []
637                    for extra in output_elem.findall( 'extra_files' ):
638                        # File or directory, when directory, compare basename
639                        # by basename
640                        extra_type = extra.get( 'type', 'file' )
641                        extra_name = extra.get( 'name', None )
642                        assert extra_type == 'directory' or extra_name is not None, \
643                            'extra_files type (%s) requires a name attribute' % extra_type
644                        extra_value = extra.get( 'value', None )
645                        assert extra_value is not None, 'extra_files requires a value attribute'
646                        extra_attributes = {}
647                        extra_attributes['compare'] = extra.get( 'compare', 'diff' ).lower()
648                        extra_attributes['delta'] = extra.get( 'delta', '0' )
649                        extra_attributes['lines_diff'] = int( extra.get( 'lines_diff', '0' ) )
650                        extra_attributes['sort'] = util.string_as_bool( extra.get( 'sort', False ) )
651                        attributes['extra_files'].append( ( extra_type, extra_value, extra_name, extra_attributes ) )
652                    test.add_output( name, file, attributes )
653            except Exception, e:
654                test.error = True
655                test.exception = e
656            self.tests.append( test )
657           
658    def parse_input_page( self, input_elem, enctypes ):
659        """
660        Parse a page of inputs. This basically just calls 'parse_input_elem',
661        but it also deals with possible 'display' elements which are supported
662        only at the top/page level (not in groups).
663        """
664        inputs = self.parse_input_elem( input_elem, enctypes )
665        # Display
666        display_elem = input_elem.find("display")
667        if display_elem is not None:
668            display = util.xml_to_string(display_elem)
669        else:
670            display = None
671        return display, inputs
672       
673    def parse_input_elem( self, parent_elem, enctypes, context=None ):
674        """
675        Parse a parent element whose children are inputs -- these could be
676        groups (repeat, conditional) or param elements. Groups will be parsed
677        recursively.
678        """
679        rval = odict()
680        context = ExpressionContext( rval, context )
681        for elem in parent_elem:
682            # Repeat group
683            if elem.tag == "repeat":
684                group = Repeat()
685                group.name = elem.get( "name" )
686                group.title = elem.get( "title" )
687                group.inputs = self.parse_input_elem( elem, enctypes, context )
688                group.default = int( elem.get( "default", 0 ) )
689                group.min = int( elem.get( "min", 0 ) )
690                # Use float instead of int so that 'inf' can be used for no max
691                group.max = float( elem.get( "max", "inf" ) )
692                assert group.min <= group.max, \
693                    ValueError( "Min repeat count must be less-than-or-equal to the max." )
694                # Force default to be within min-max range
695                group.default = min( max( group.default, group.min ), group.max )
696                rval[group.name] = group
697            elif elem.tag == "conditional":
698                group = Conditional()
699                group.name = elem.get( "name" )
700                group.value_ref = elem.get( 'value_ref', None )
701                group.value_ref_in_group = util.string_as_bool( elem.get( 'value_ref_in_group', 'True' ) )
702                value_from = elem.get( "value_from" )
703                if value_from:
704                    value_from = value_from.split( ':' )
705                    group.value_from = locals().get( value_from[0] )
706                    group.test_param = rval[ group.value_ref ]
707                    group.test_param.refresh_on_change = True
708                    for attr in value_from[1].split( '.' ):
709                        group.value_from = getattr( group.value_from, attr )
710                    for case_value, case_inputs in group.value_from( context, group, self ).iteritems():
711                        case = ConditionalWhen()
712                        case.value = case_value
713                        if case_inputs:
714                            case.inputs = self.parse_input_elem(
715                                ElementTree.XML( "<when>%s</when>" % case_inputs ), enctypes, context )
716                        else:
717                            case.inputs = {}
718                        group.cases.append( case )
719                else:
720                    # Should have one child "input" which determines the case
721                    input_elem = elem.find( "param" )
722                    assert input_elem is not None, "<conditional> must have a child <param>"
723                    group.test_param = self.parse_param_elem( input_elem, enctypes, context )
724                    # Must refresh when test_param changes
725                    group.test_param.refresh_on_change = True
726                    # And a set of possible cases
727                    for case_elem in elem.findall( "when" ):
728                        case = ConditionalWhen()
729                        case.value = case_elem.get( "value" )
730                        case.inputs = self.parse_input_elem( case_elem, enctypes, context )
731                        group.cases.append( case )
732                rval[group.name] = group
733            elif elem.tag == "upload_dataset":
734                group = UploadDataset()
735                group.name = elem.get( "name" )
736                group.title = elem.get( "title" )
737                group.file_type_name = elem.get( 'file_type_name', group.file_type_name )
738                group.default_file_type = elem.get( 'default_file_type', group.default_file_type )
739                group.metadata_ref = elem.get( 'metadata_ref', group.metadata_ref )
740                rval[ group.file_type_name ].refresh_on_change = True
741                rval[ group.file_type_name ].refresh_on_change_values = \
742                    self.app.datatypes_registry.get_composite_extensions()
743                group.inputs = self.parse_input_elem( elem, enctypes, context )
744                rval[ group.name ] = group
745            elif elem.tag == "param":
746                param = self.parse_param_elem( elem, enctypes, context )
747                rval[param.name] = param
748        return rval
749
750    def parse_param_elem( self, input_elem, enctypes, context ):
751        """
752        Parse a single "<param>" element and return a ToolParameter instance.
753        Also, if the parameter has a 'required_enctype' add it to the set
754        enctypes.
755        """
756        param = ToolParameter.build( self, input_elem )
757        param_enctype = param.get_required_enctype()
758        if param_enctype:
759            enctypes.add( param_enctype )
760        # If parameter depends on any other paramters, we must refresh the
761        # form when it changes
762        for name in param.get_dependencies():
763            context[ name ].refresh_on_change = True
764        return param
765
766    def parse_requirements( self, requirements_elem ):
767        """
768        Parse each requirement from the <requirements> element and add to
769        self.requirements
770        """
771        for requirement_elem in requirements_elem.findall( 'requirement' ):
772            requirement = ToolRequirement()
773            requirement.name = util.xml_text( requirement_elem )
774            requirement.type = requirement_elem.get( "type", "package" )
775            requirement.version = requirement_elem.get( "version" )
776            self.requirements.append( requirement )
777   
778    def check_workflow_compatible( self ):
779        """
780        Determine if a tool can be used in workflows. External tools and the
781        upload tool are currently not supported by workflows.
782        """
783        # Multiple page tools are not supported -- we're eliminating most
784        # of these anyway
785        if self.has_multiple_pages:
786            return False
787        # This is probably the best bet for detecting external web tools
788        # right now
789        if self.action != "/tool_runner/index":
790            return False
791        # HACK: upload is (as always) a special case becuase file parameters
792        #       can't be persisted.
793        if self.id == "upload1":
794            return False
795        # TODO: Anyway to capture tools that dynamically change their own
796        #       outputs?
797        return True
798
799    def new_state( self, trans, all_pages=False ):
800        """
801        Create a new `DefaultToolState` for this tool. It will be initialized
802        with default values for inputs.
803       
804        Only inputs on the first page will be initialized unless `all_pages` is
805        True, in which case all inputs regardless of page are initialized.
806        """
807        state = DefaultToolState()
808        state.inputs = {}
809        if all_pages:
810            inputs = self.inputs
811        else:
812            inputs = self.inputs_by_page[ 0 ]
813        self.fill_in_new_state( trans, inputs, state.inputs )
814        return state
815
816    def fill_in_new_state( self, trans, inputs, state, context=None ):
817        """
818        Fill in a tool state dictionary with default values for all parameters
819        in the dictionary `inputs`. Grouping elements are filled in recursively.
820        """
821        context = ExpressionContext( state, context )
822        for input in inputs.itervalues():
823            state[ input.name ] = input.get_initial_value( trans, context )
824
825    def get_param_html_map( self, trans, page=0, other_values={} ):
826        """
827        Return a dictionary containing the HTML representation of each
828        parameter. This is used for rendering display elements. It is
829        currently not compatible with grouping constructs.
830       
831        NOTE: This should be considered deprecated, it is only used for tools
832              with `display` elements. These should be eliminated.
833        """
834        rval = dict()
835        for key, param in self.inputs_by_page[page].iteritems():
836            if not isinstance( param, ToolParameter ):
837                raise Exception( "'get_param_html_map' only supported for simple paramters" )
838            rval[key] = param.get_html( trans, other_values=other_values )
839        return rval
840
841    def get_param( self, key ):
842        """
843        Returns the parameter named `key` or None if there is no such
844        parameter.
845        """
846        return self.inputs.get( key, None )
847
848    def get_hook(self, name):
849        """
850        Returns an object from the code file referenced by `code_namespace`
851        (this will normally be a callable object)
852        """
853        if self.code_namespace:
854            # Try to look up hook in self.hook_map, otherwise resort to default
855            if name in self.hook_map and self.hook_map[name] in self.code_namespace:
856                return self.code_namespace[self.hook_map[name]]
857            elif name in self.code_namespace:
858                return self.code_namespace[name]
859        return None
860       
861    def visit_inputs( self, value, callback ):
862        """
863        Call the function `callback` on each parameter of this tool. Visits
864        grouping parameters recursively and constructs unique prefixes for
865        each nested set of parameters. The callback method is then called as:
866       
867        `callback( level_prefix, parameter, parameter_value )`
868        """
869        # HACK: Yet another hack around check_values -- WHY HERE?
870        if not self.check_values:
871            return
872        for input in self.inputs.itervalues():
873            if isinstance( input, ToolParameter ):
874                callback( "", input, value[input.name] )
875            else:
876                input.visit_inputs( "", value[input.name], callback )
877
878    def handle_input( self, trans, incoming ):
879        """
880        Process incoming parameters for this tool from the dict `incoming`,
881        update the tool state (or create if none existed), and either return
882        to the form or execute the tool (only if 'execute' was clicked and
883        there were no errors).
884        """
885        # Get the state or create if not found
886        if "tool_state" in incoming:
887            encoded_state = util.string_to_object( incoming["tool_state"] )
888            state = DefaultToolState()
889            state.decode( encoded_state, self, trans.app )
890        else:
891            state = self.new_state( trans )
892            # This feels a bit like a hack. It allows forcing full processing
893            # of inputs even when there is no state in the incoming dictionary
894            # by providing either 'runtool_btn' (the name of the submit button
895            # on the standard run form) or "URL" (a parameter provided by
896            # external data source tools).
897            if "runtool_btn" not in incoming and "URL" not in incoming:
898                return "tool_form.mako", dict( errors={}, tool_state=state, param_values={}, incoming={} )
899        # Process incoming data
900        if not( self.check_values ):
901            # If `self.check_values` is false we don't do any checking or
902            # processing on input parameters. This is used to pass raw values
903            # through to/from external sites. FIXME: This should be handled
904            # more cleanly, there is no reason why external sites need to
905            # post back to the same URL that the tool interface uses.
906            errors = {}
907            params = incoming
908        else:
909            # Update state for all inputs on the current page taking new
910            # values from `incoming`.
911            errors = self.update_state( trans, self.inputs_by_page[state.page], state.inputs, incoming )
912            # If the tool provides a `validate_input` hook, call it.
913            validate_input = self.get_hook( 'validate_input' )
914            if validate_input:
915                validate_input( trans, errors, state.inputs, self.inputs_by_page[state.page] )
916            params = state.inputs
917        # Did the user actually click next / execute or is this just
918        # a refresh?
919        if 'runtool_btn' in incoming or 'URL' in incoming or 'ajax_upload' in incoming:
920            # If there were errors, we stay on the same page and display
921            # error messages
922            if errors:
923                error_message = "One or more errors were found in the input you provided. The specific errors are marked below."   
924                return "tool_form.mako", dict( errors=errors, tool_state=state, incoming=incoming, error_message=error_message )
925            # If we've completed the last page we can execute the tool
926            elif state.page == self.last_page:
927                _, out_data = self.execute( trans, incoming=params )
928                try:
929                    assert isinstance( out_data, odict )
930                    return 'tool_executed.mako', dict( out_data=out_data )
931                except:
932                    return 'message.mako', dict( status='error', message='odict not returned from tool execution', refresh_frames=[] )
933            # Otherwise move on to the next page
934            else:
935                state.page += 1
936                # Fill in the default values for the next page
937                self.fill_in_new_state( trans, self.inputs_by_page[ state.page ], state.inputs )
938                return 'tool_form.mako', dict( errors=errors, tool_state=state )
939        else:
940            try:
941                self.find_fieldstorage( state.inputs )
942            except InterruptedUpload:
943                # If inputs contain a file it won't persist.  Most likely this
944                # is an interrupted upload.  We should probably find a more
945                # standard method of determining an incomplete POST.
946                return self.handle_interrupted( trans, state.inputs )
947            except:
948                pass
949            # Just a refresh, render the form with updated state and errors.
950            return 'tool_form.mako', dict( errors=errors, tool_state=state )
951     
952    def find_fieldstorage( self, x ):
953        if isinstance( x, FieldStorage ):
954            raise InterruptedUpload( None )
955        elif type( x ) is types.DictType:
956            [ self.find_fieldstorage( y ) for y in x.values() ]
957        elif type( x ) is types.ListType:
958            [ self.find_fieldstorage( y ) for y in x ]
959
960    def handle_interrupted( self, trans, inputs ):
961        """
962        Upon handling inputs, if it appears that we have received an incomplete
963        form, do some cleanup or anything else deemed necessary.  Currently
964        this is only likely during file uploads, but this method could be
965        generalized and a method standardized for handling other tools.
966        """
967        # If the async upload tool has uploading datasets, we need to error them.
968        if 'async_datasets' in inputs and inputs['async_datasets'] not in [ 'None', '', None ]:
969            for id in inputs['async_datasets'].split(','):
970                try:
971                    data = self.sa_session.query( trans.model.HistoryDatasetAssociation ).get( int( id ) )
972                except:
973                    log.exception( 'Unable to load precreated dataset (%s) sent in upload form' % id )
974                    continue
975                if trans.user is None and trans.galaxy_session.current_history != data.history:
976                    log.error( 'Got a precreated dataset (%s) but it does not belong to anonymous user\'s current session (%s)'
977                        % ( data.id, trans.galaxy_session.id ) )
978                elif data.history.user != trans.user:
979                    log.error( 'Got a precreated dataset (%s) but it does not belong to current user (%s)'
980                        % ( data.id, trans.user.id ) )
981                else:
982                    data.state = data.states.ERROR
983                    data.info = 'Upload of this dataset was interrupted.  Please try uploading again or'
984                    self.sa_session.add( data )
985                    self.sa_session.flush()
986        # It's unlikely the user will ever see this.
987        return 'message.mako', dict( status='error',
988            message='Your upload was interrupted. If this was uninentional, please retry it.',
989            refresh_frames=[], cont=None )
990
991    def update_state( self, trans, inputs, state, incoming, prefix="", context=None,
992                      update_only=False, old_errors={}, item_callback=None ):
993        """
994        Update the tool state in `state` using the user input in `incoming`.
995        This is designed to be called recursively: `inputs` contains the
996        set of inputs being processed, and `prefix` specifies a prefix to
997        add to the name of each input to extract it's value from `incoming`.
998       
999        If `update_only` is True, values that are not in `incoming` will
1000        not be modified. In this case `old_errors` can be provided, and any
1001        errors for parameters which were *not* updated will be preserved.
1002        """
1003        errors = dict()     
1004        # Push this level onto the context stack
1005        context = ExpressionContext( state, context )
1006        # Iterate inputs and update (recursively)
1007        for input in inputs.itervalues():
1008            key = prefix + input.name
1009            if isinstance( input, Repeat ):
1010                group_state = state[input.name]
1011                # Create list of empty errors for each previously existing state
1012                group_errors = [ {} for i in range( len( group_state ) ) ]
1013                group_old_errors = old_errors.get( input.name, None )
1014                any_group_errors = False
1015                # Check any removals before updating state -- only one
1016                # removal can be performed, others will be ignored
1017                for i, rep_state in enumerate( group_state ):
1018                    rep_index = rep_state['__index__']
1019                    if key + "_" + str(rep_index) + "_remove" in incoming:
1020                        if len( group_state ) > input.min:
1021                            del group_state[i]
1022                            del group_errors[i]
1023                            if group_old_errors:
1024                                del group_old_errors[i]
1025                            break
1026                        else:
1027                            group_errors[i] = { '__index__': 'Cannot remove repeat (min size=%i).' % input.min }
1028                            any_group_errors = True
1029                            # Only need to find one that can't be removed due to size, since only
1030                            # one removal is processed at # a time anyway
1031                            break
1032                # Update state
1033                max_index = -1
1034                for i, rep_state in enumerate( group_state ):
1035                    rep_index = rep_state['__index__']
1036                    max_index = max( max_index, rep_index )
1037                    rep_prefix = "%s_%d|" % ( key, rep_index )
1038                    if group_old_errors:
1039                        rep_old_errors = group_old_errors[i]
1040                    else:
1041                        rep_old_errors = {}
1042                    rep_errors = self.update_state( trans,
1043                                                    input.inputs,
1044                                                    rep_state,
1045                                                    incoming,
1046                                                    prefix=rep_prefix,
1047                                                    context=context,
1048                                                    update_only=update_only,
1049                                                    old_errors=rep_old_errors,
1050                                                    item_callback=item_callback )
1051                    if rep_errors:
1052                        any_group_errors = True
1053                        group_errors[i].update( rep_errors )
1054                # Check for addition
1055                if key + "_add" in incoming:
1056                    if len( group_state ) < input.max:
1057                        new_state = {}
1058                        new_state['__index__'] = max_index + 1
1059                        self.fill_in_new_state( trans, input.inputs, new_state, context )
1060                        group_state.append( new_state )
1061                        group_errors.append( {} )
1062                    else:
1063                        group_errors[-1] = { '__index__': 'Cannot add repeat (max size=%i).' % input.max }
1064                        any_group_errors = True
1065                # Were there *any* errors for any repetition?
1066                if any_group_errors:
1067                    errors[input.name] = group_errors
1068            elif isinstance( input, Conditional ):
1069                group_state = state[input.name]
1070                group_old_errors = old_errors.get( input.name, {} )
1071                old_current_case = group_state['__current_case__']
1072                group_prefix = "%s|" % ( key )
1073                # Deal with the 'test' element and see if it's value changed
1074                if input.value_ref and not input.value_ref_in_group:
1075                    # We are referencing an existent parameter, which is not
1076                    # part of this group
1077                    test_param_key = prefix + input.test_param.name
1078                else:
1079                    test_param_key = group_prefix + input.test_param.name
1080                test_param_error = None
1081                test_incoming = get_incoming_value( incoming, test_param_key, None )
1082                if test_param_key not in incoming \
1083                   and "__force_update__" + test_param_key not in incoming \
1084                   and update_only:
1085                    # Update only, keep previous value and state, but still
1086                    # recurse in case there are nested changes
1087                    value = group_state[ input.test_param.name ]
1088                    current_case = old_current_case
1089                    if input.test_param.name in old_errors:
1090                        errors[ input.test_param.name ] = old_errors[ input.test_param.name ]
1091                else:
1092                    # Get value of test param and determine current case
1093                    value, test_param_error = \
1094                        check_param( trans, input.test_param, test_incoming, context )
1095                    current_case = input.get_current_case( value, trans )
1096                if current_case != old_current_case:
1097                    # Current case has changed, throw away old state
1098                    group_state = state[input.name] = {}
1099                    # TODO: we should try to preserve values if we can
1100                    self.fill_in_new_state( trans, input.cases[current_case].inputs, group_state, context )
1101                    group_errors = dict()
1102                    group_old_errors = dict()
1103                else:
1104                    # Current case has not changed, update children
1105                    group_errors = self.update_state( trans,
1106                                                      input.cases[current_case].inputs,
1107                                                      group_state,
1108                                                      incoming,
1109                                                      prefix=group_prefix,
1110                                                      context=context,
1111                                                      update_only=update_only,
1112                                                      old_errors=group_old_errors,
1113                                                      item_callback=item_callback )
1114                if test_param_error:
1115                    group_errors[ input.test_param.name ] = test_param_error
1116                if group_errors:
1117                    errors[ input.name ] = group_errors
1118                # Store the current case in a special value
1119                group_state['__current_case__'] = current_case
1120                # Store the value of the test element
1121                group_state[ input.test_param.name ] = value
1122            elif isinstance( input, UploadDataset ):
1123                group_state = state[input.name]
1124                group_errors = []
1125                group_old_errors = old_errors.get( input.name, None )
1126                any_group_errors = False
1127                d_type = input.get_datatype( trans, context )
1128                writable_files = d_type.writable_files
1129                #remove extra files
1130                while len( group_state ) > len( writable_files ):
1131                    del group_state[-1]
1132                    if group_old_errors:
1133                        del group_old_errors[-1]
1134                # Update state
1135                max_index = -1
1136                for i, rep_state in enumerate( group_state ):
1137                    rep_index = rep_state['__index__']
1138                    max_index = max( max_index, rep_index )
1139                    rep_prefix = "%s_%d|" % ( key, rep_index )
1140                    if group_old_errors:
1141                        rep_old_errors = group_old_errors[i]
1142                    else:
1143                        rep_old_errors = {}
1144                    rep_errors = self.update_state( trans,
1145                                                    input.inputs,
1146                                                    rep_state,
1147                                                    incoming,
1148                                                    prefix=rep_prefix,
1149                                                    context=context,
1150                                                    update_only=update_only,
1151                                                    old_errors=rep_old_errors,
1152                                                    item_callback=item_callback )
1153                    if rep_errors:
1154                        any_group_errors = True
1155                        group_errors.append( rep_errors )
1156                    else:
1157                        group_errors.append( {} )
1158                # Add new fileupload as needed
1159                offset = 1
1160                while len( writable_files ) > len( group_state ):
1161                    new_state = {}
1162                    new_state['__index__'] = max_index + offset
1163                    offset += 1
1164                    self.fill_in_new_state( trans, input.inputs, new_state, context )
1165                    group_state.append( new_state )
1166                    if any_group_errors:
1167                        group_errors.append( {} )
1168                # Were there *any* errors for any repetition?
1169                if any_group_errors:
1170                    errors[input.name] = group_errors
1171            else:
1172                if key not in incoming \
1173                   and "__force_update__" + key not in incoming \
1174                   and update_only:
1175                    # No new value provided, and we are only updating, so keep
1176                    # the old value (which should already be in the state) and
1177                    # preserve the old error message.
1178                    if input.name in old_errors:
1179                        errors[ input.name ] = old_errors[ input.name ]
1180                else:
1181                    incoming_value = get_incoming_value( incoming, key, None )
1182                    value, error = check_param( trans, input, incoming_value, context )
1183                    # If a callback was provided, allow it to process the value
1184                    if item_callback:
1185                        old_value = state.get( input.name, None )
1186                        value, error = item_callback( trans, key, input, value, error, old_value, context )                                         
1187                    if error:
1188                        errors[ input.name ] = error
1189                    state[ input.name ] = value
1190        return errors
1191           
1192    def get_static_param_values( self, trans ):
1193        """
1194        Returns a map of parameter names and values if the tool does not
1195        require any user input. Will raise an exception if any parameter
1196        does require input.
1197        """
1198        args = dict()
1199        for key, param in self.inputs.iteritems():
1200            if isinstance( param, HiddenToolParameter ):
1201                args[key] = param.value
1202            elif isinstance( param, BaseURLToolParameter ):
1203                args[key] = param.get_value( trans )
1204            else:
1205                raise Exception( "Unexpected parameter type" )
1206        return args
1207           
1208    def execute( self, trans, incoming={}, set_output_hid=True ):
1209        """
1210        Execute the tool using parameter values in `incoming`. This just
1211        dispatches to the `ToolAction` instance specified by
1212        `self.tool_action`. In general this will create a `Job` that
1213        when run will build the tool's outputs, e.g. `DefaultToolAction`.
1214        """
1215        return self.tool_action.execute( self, trans, incoming=incoming, set_output_hid=set_output_hid )
1216       
1217    def params_to_strings( self, params, app ):
1218        return params_to_strings( self.inputs, params, app )
1219       
1220    def params_from_strings( self, params, app, ignore_errors=False ):
1221        return params_from_strings( self.inputs, params, app, ignore_errors )
1222           
1223    def check_and_update_param_values( self, values, trans ):
1224        """
1225        Check that all parameters have values, and fill in with default
1226        values where neccesary. This could be called after loading values
1227        from a database in case new parameters have been added.
1228        """
1229        messages = {}
1230        self.check_and_update_param_values_helper( self.inputs, values, trans, messages )
1231        return messages
1232       
1233    def check_and_update_param_values_helper( self, inputs, values, trans, messages, context=None, prefix="" ):
1234        """
1235        Recursive helper for `check_and_update_param_values_helper`
1236        """
1237        context = ExpressionContext( values, context )
1238        for input in inputs.itervalues():
1239            # No value, insert the default
1240            if input.name not in values:
1241                messages[ input.name ] = "No value found for '%s%s', used default" % ( prefix, input.label )
1242                values[ input.name ] = input.get_initial_value( trans, context )
1243            # Value, visit recursively as usual
1244            else:
1245                if isinstance( input, Repeat ):
1246                    for i, d in enumerate( values[ input.name ] ):
1247                        rep_prefix = prefix + "%s %d > " % ( input.title, i + 1 )
1248                        self.check_and_update_param_values_helper( input.inputs, d, trans, messages, context, rep_prefix )
1249                elif isinstance( input, Conditional ):
1250                    group_values = values[ input.name ]
1251                    if input.test_param.name not in group_values:
1252                        # No test param invalidates the whole conditional
1253                        values[ input.name ] = group_values = input.get_initial_value( trans, context )
1254                        messages[ input.test_param.name ] = "No value found for '%s%s', used default" % ( prefix, input.test_param.label )
1255                        current_case = group_values['__current_case__']
1256                        for child_input in input.cases[current_case].inputs.itervalues():
1257                            messages[ child_input.name ] = "Value no longer valid for '%s%s', replaced with default" % ( prefix, child_input.label )                   
1258                    else:
1259                        current = group_values["__current_case__"]                   
1260                        self.check_and_update_param_values_helper( input.cases[current].inputs, group_values, trans, messages, context, prefix )
1261                else:
1262                    # Regular tool parameter, no recursion needed
1263                    pass       
1264   
1265    def handle_unvalidated_param_values( self, input_values, app ):
1266        """
1267        Find any instances of `UnvalidatedValue` within input_values and
1268        validate them (by calling `ToolParameter.from_html` and
1269        `ToolParameter.validate`).
1270        """
1271        # No validation is done when check_values is False
1272        if not self.check_values:
1273            return
1274        self.handle_unvalidated_param_values_helper( self.inputs, input_values, app )
1275
1276    def handle_unvalidated_param_values_helper( self, inputs, input_values, app, context=None, prefix="" ):
1277        """
1278        Recursive helper for `handle_unvalidated_param_values`
1279        """
1280        context = ExpressionContext( input_values, context )
1281        for input in inputs.itervalues():
1282            if isinstance( input, Repeat ): 
1283                for i, d in enumerate( input_values[ input.name ] ):
1284                    rep_prefix = prefix + "%s %d > " % ( input.title, i + 1 )
1285                    self.handle_unvalidated_param_values_helper( input.inputs, d, app, context, rep_prefix )
1286            elif isinstance( input, Conditional ):
1287                values = input_values[ input.name ]
1288                current = values["__current_case__"]
1289                # NOTE: The test param doesn't need to be checked since
1290                #       there would be no way to tell what case to use at
1291                #       workflow build time. However I'm not sure if we are
1292                #       actually preventing such a case explicately.
1293                self.handle_unvalidated_param_values_helper( input.cases[current].inputs, values, app, context, prefix )
1294            else:
1295                # Regular tool parameter
1296                value = input_values[ input.name ]
1297                if isinstance( value, UnvalidatedValue ):
1298                    try:
1299                        # Convert from html representation
1300                        if value.value is None:
1301                            # If value.value is None, it could not have been
1302                            # submited via html form and therefore .from_html
1303                            # can't be guaranteed to work
1304                            value = None
1305                        else:
1306                            value = input.from_html( value.value, None, context )
1307                        # Do any further validation on the value
1308                        input.validate( value, None )
1309                    except Exception, e:
1310                        # Wrap an re-raise any generated error so we can
1311                        # generate a more informative message
1312                        v = input.value_to_display_text( value, self.app )
1313                        message = "Failed runtime validation of %s%s (%s)" \
1314                            % ( prefix, input.label, e )
1315                        raise LateValidationError( message )
1316                    input_values[ input.name ] = value
1317   
1318    def handle_job_failure_exception( self, e ):
1319        """
1320        Called by job.fail when an exception is generated to allow generation
1321        of a better error message (returning None yields the default behavior)
1322        """
1323        message = None
1324        # If the exception was generated by late validation, use its error
1325        # message (contains the parameter name and value)
1326        if isinstance( e, LateValidationError ):
1327            message = e.message
1328        return message
1329   
1330    def build_param_dict( self, incoming, input_datasets, output_datasets, output_paths, job_working_directory ):
1331        """
1332        Build the dictionary of parameters for substituting into the command
1333        line. Each value is wrapped in a `InputValueWrapper`, which allows
1334        all the attributes of the value to be used in the template, *but*
1335        when the __str__ method is called it actually calls the
1336        `to_param_dict_value` method of the associated input.
1337        """
1338        param_dict = dict()
1339        # All parameters go into the param_dict
1340        param_dict.update( incoming )
1341        # Wrap parameters as neccesary
1342        def wrap_values( inputs, input_values ):
1343            for input in inputs.itervalues():
1344                if isinstance( input, Repeat ): 
1345                    for d in input_values[ input.name ]:
1346                        wrap_values( input.inputs, d )
1347                elif isinstance( input, Conditional ):
1348                    values = input_values[ input.name ]
1349                    current = values["__current_case__"]
1350                    wrap_values( input.cases[current].inputs, values )
1351                elif isinstance( input, DataToolParameter ):
1352                    ## FIXME: We're populating param_dict with conversions when
1353                    ##        wrapping values, this should happen as a separate
1354                    ##        step before wrapping (or call this wrapping step
1355                    ##        something more generic) (but iterating this same
1356                    ##        list twice would be wasteful)
1357                    # Add explicit conversions by name to current parent
1358                    for conversion_name, conversion_extensions, conversion_datatypes in input.conversions:
1359                        # If we are at building cmdline step, then converters
1360                        # have already executed
1361                        conv_ext, converted_dataset = input_values[ input.name ].find_conversion_destination( conversion_datatypes )
1362                        # When dealing with optional inputs, we'll provide a
1363                        # valid extension to be used for None converted dataset
1364                        if not conv_ext:
1365                            conv_ext = conversion_extensions[0]
1366                        # input_values[ input.name ] is None when optional
1367                        # dataset, 'conversion' of optional dataset should
1368                        # create wrapper around NoneDataset for converter output
1369                        if input_values[ input.name ] and not converted_dataset:
1370                            # Input that converter is based from has a value,
1371                            # but converted dataset does not exist
1372                            raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s'
1373                                % ( input_values[ input.name ].extension, conversion_extensions ) )
1374                        else:
1375                            # Trick wrapper into using target conv ext (when
1376                            # None) without actually being a tool parameter
1377                            input_values[ conversion_name ] = \
1378                                DatasetFilenameWrapper( converted_dataset,
1379                                                        datatypes_registry = self.app.datatypes_registry,
1380                                                        tool = Bunch( conversion_name = Bunch( extensions = conv_ext ) ),
1381                                                        name = conversion_name )
1382                    # Wrap actual input dataset
1383                    input_values[ input.name ] = \
1384                        DatasetFilenameWrapper( input_values[ input.name ],
1385                                                datatypes_registry = self.app.datatypes_registry,
1386                                                tool = self,
1387                                                name = input.name )
1388                elif isinstance( input, SelectToolParameter ):
1389                    input_values[ input.name ] = SelectToolParameterWrapper(
1390                        input, input_values[ input.name ], self.app, other_values = param_dict )
1391                else:
1392                    input_values[ input.name ] = InputValueWrapper(
1393                        input, input_values[ input.name ], param_dict )
1394        # HACK: only wrap if check_values is not false, this deals with external
1395        #       tools where the inputs don't even get passed through. These
1396        #       tools (e.g. UCSC) should really be handled in a special way.
1397        if self.check_values:
1398            wrap_values( self.inputs, param_dict )
1399        ## FIXME: when self.check_values==True, input datasets are being wrapped
1400        ##        twice (above and below, creating 2 separate
1401        ##        DatasetFilenameWrapper objects - first is overwritten by
1402        ##        second), is this necessary? - if we get rid of this way to
1403        ##        access children, can we stop this redundancy, or is there
1404        ##        another reason for this?
1405        ## - Only necessary when self.check_values is False (==external dataset
1406        ##   tool?: can this be abstracted out as part of being a datasouce tool?)
1407        ## - But we still want (ALWAYS) to wrap input datasets (this should be
1408        ##   checked to prevent overhead of creating a new object?)
1409        # Additionally, datasets go in the param dict. We wrap them such that
1410        # if the bare variable name is used it returns the filename (for
1411        # backwards compatibility). We also add any child datasets to the
1412        # the param dict encoded as:
1413        #   "_CHILD___{dataset_name}___{child_designation}",
1414        # but this should be considered DEPRECATED, instead use:
1415        #   $dataset.get_child( 'name' ).filename
1416        for name, data in input_datasets.items():
1417            param_dict[name] = DatasetFilenameWrapper( data,
1418                                                       datatypes_registry = self.app.datatypes_registry,
1419                                                       tool = self,
1420                                                       name = name )
1421            if data:
1422                for child in data.children:
1423                    param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
1424        for name, hda in output_datasets.items():
1425            # Write outputs to the working directory (for security purposes)
1426            # if desired.
1427            if self.app.config.outputs_to_working_directory:
1428                try:
1429                    false_path = [ dp.false_path for dp in output_paths if dp.real_path == hda.file_name ][0]
1430                    param_dict[name] = DatasetFilenameWrapper( hda, false_path = false_path )
1431                    open( false_path, 'w' ).close()
1432                except IndexError:
1433                    log.warning( "Unable to determine alternate path for writing job outputs, outputs will be written to their real paths" )
1434                    param_dict[name] = DatasetFilenameWrapper( hda )
1435            else:
1436                param_dict[name] = DatasetFilenameWrapper( hda )
1437            # Provide access to a path to store additional files
1438            # TODO: path munging for cluster/dataset server relocatability
1439            param_dict[name].files_path = os.path.abspath(os.path.join( job_working_directory, "dataset_%s_files" % (hda.dataset.id) ))
1440            for child in hda.children:
1441                param_dict[ "_CHILD___%s___%s" % ( name, child.designation ) ] = DatasetFilenameWrapper( child )
1442        for out_name, output in self.outputs.iteritems():
1443            if out_name not in param_dict and output.filters:
1444                # Assume the reason we lack this output is because a filter
1445                # failed to pass; for tool writing convienence, provide a
1446                # NoneDataset
1447                param_dict[ out_name ] = NoneDataset( datatypes_registry = self.app.datatypes_registry, ext = output.format )
1448        # We add access to app here, this allows access to app.config, etc
1449        param_dict['__app__'] = RawObjectWrapper( self.app )
1450        # More convienent access to app.config.new_file_path; we don't need to
1451        # wrap a string, but this method of generating additional datasets
1452        # should be considered DEPRECATED
1453        # TODO: path munging for cluster/dataset server relocatability
1454        param_dict['__new_file_path__'] = os.path.abspath(self.app.config.new_file_path)
1455        # The following points to location (xxx.loc) files which are pointers
1456        # to locally cached data
1457        param_dict['GALAXY_DATA_INDEX_DIR'] = self.app.config.tool_data_path
1458        # For the upload tool, we need to know the root directory and the
1459        # datatypes conf path, so we can load the datatypes registry
1460        param_dict['GALAXY_ROOT_DIR'] = os.path.abspath( self.app.config.root )
1461        param_dict['GALAXY_DATATYPES_CONF_FILE'] = os.path.abspath( self.app.config.datatypes_config )
1462        # Return the dictionary of parameters
1463        return param_dict
1464   
1465    def build_param_file( self, param_dict, directory=None ):
1466        """
1467        Build temporary file for file based parameter transfer if needed
1468        """
1469        if self.command and "$param_file" in self.command:
1470            fd, param_filename = tempfile.mkstemp( dir=directory )
1471            os.close( fd )
1472            f = open( param_filename, "wt" )
1473            for key, value in param_dict.items():
1474                # parameters can be strings or lists of strings, coerce to list
1475                if type(value) != type([]):
1476                    value = [ value ]
1477                for elem in value:
1478                    f.write( '%s=%s\n' % (key, elem) )
1479            f.close()
1480            param_dict['param_file'] = param_filename
1481            return param_filename
1482        else:
1483            return None
1484           
1485    def build_config_files( self, param_dict, directory=None ):
1486        """
1487        Build temporary file for file based parameter transfer if needed
1488        """
1489        config_filenames = []
1490        for name, filename, template_text in self.config_files:
1491            # If a particular filename was forced by the config use it
1492            if filename is not None:
1493                if directory is None:
1494                    raise Exception( "Config files with fixed filenames require a working directory" )
1495                config_filename = os.path.join( directory, filename )
1496            else:
1497                fd, config_filename = tempfile.mkstemp( dir=directory )
1498                os.close( fd )
1499            f = open( config_filename, "wt" )
1500            f.write( fill_template( template_text, context=param_dict ) )
1501            f.close()
1502            param_dict[name] = config_filename
1503            config_filenames.append( config_filename )
1504        return config_filenames
1505       
1506    def build_command_line( self, param_dict ):
1507        """
1508        Build command line to invoke this tool given a populated param_dict
1509        """
1510        command_line = None
1511        if not self.command:
1512            return
1513        try:               
1514            # Substituting parameters into the command
1515            command_line = fill_template( self.command, context=param_dict )
1516            # Remove newlines from command line
1517            command_line = command_line.replace( "\n", " " ).replace( "\r", " " )
1518        except Exception, e:
1519            # Modify exception message to be more clear
1520            #e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ) )
1521            raise
1522        return command_line
1523
1524    def build_dependency_shell_commands( self ):
1525        """
1526        Return a list of commands to be run to populate the current
1527        environment to include this tools requirements.
1528        """
1529        commands = []
1530        for requirement in self.requirements:
1531            # TODO: currently only supporting requirements of type package,
1532            #       need to implement some mechanism for mapping other types
1533            #       back to packages
1534            log.debug( "Dependency %s", requirement.name )
1535            if requirement.type == 'package':
1536                script_file, base_path, version = self.app.toolbox.dependency_manager.find_dep( requirement.name, requirement.version )
1537                if script_file is None:
1538                    log.warn( "Failed to resolve dependency on '%s', ignoring", requirement.name )
1539                else:
1540                    commands.append( 'PACKAGE_BASE=%s source %s' % ( base_path, script_file ) )
1541        return commands
1542
1543    def build_redirect_url_params( self, param_dict ):
1544        """
1545        Substitute parameter values into self.redirect_url_params
1546        """
1547        if not self.redirect_url_params:
1548            return
1549        redirect_url_params = None           
1550        # Substituting parameter values into the url params
1551        redirect_url_params = fill_template( self.redirect_url_params, context=param_dict )
1552        # Remove newlines
1553        redirect_url_params = redirect_url_params.replace( "\n", " " ).replace( "\r", " " )
1554        return redirect_url_params
1555
1556    def parse_redirect_url( self, data, param_dict ):
1557        """
1558        Parse the REDIRECT_URL tool param. Tools that send data to an external
1559        application via a redirect must include the following 3 tool params:
1560       
1561        1) REDIRECT_URL - the url to which the data is being sent
1562       
1563        2) DATA_URL - the url to which the receiving application will send an
1564           http post to retrieve the Galaxy data
1565       
1566        3) GALAXY_URL - the url to which the external application may post
1567           data as a response
1568        """
1569        redirect_url = param_dict.get( 'REDIRECT_URL' )
1570        redirect_url_params = self.build_redirect_url_params( param_dict )
1571        # Add the parameters to the redirect url.  We're splitting the param
1572        # string on '**^**' because the self.parse() method replaced white
1573        # space with that separator.
1574        params = redirect_url_params.split( '**^**' )
1575        rup_dict = {}
1576        for param in params:
1577            p_list = param.split( '=' )
1578            p_name = p_list[0]
1579            p_val = p_list[1]
1580            rup_dict[ p_name ] = p_val
1581        DATA_URL = param_dict.get( 'DATA_URL', None )
1582        assert DATA_URL is not None, "DATA_URL parameter missing in tool config."
1583        DATA_URL += "/%s/display" % str( data.id )
1584        redirect_url += "?DATA_URL=%s" % DATA_URL
1585        # Add the redirect_url_params to redirect_url
1586        for p_name in rup_dict:
1587            redirect_url += "&%s=%s" % ( p_name, rup_dict[ p_name ] )
1588        # Add the current user email to redirect_url
1589        if data.history.user:
1590            USERNAME = str( data.history.user.email )
1591        else:
1592            USERNAME = 'Anonymous'
1593        redirect_url += "&USERNAME=%s" % USERNAME
1594        return redirect_url
1595
1596    def call_hook( self, hook_name, *args, **kwargs ):
1597        """
1598        Call the custom code hook function identified by 'hook_name' if any,
1599        and return the results
1600        """
1601        try:
1602            code = self.get_hook( hook_name )
1603            if code:
1604                return code( *args, **kwargs )
1605        except Exception, e:
1606            e.args = ( "Error in '%s' hook '%s', original message: %s" % ( self.name, hook_name, e.args[0] ) )
1607            raise
1608
1609    def exec_before_job( self, app, inp_data, out_data, param_dict={} ):
1610        pass
1611
1612    def exec_after_process( self, app, inp_data, out_data, param_dict, job = None ):
1613        pass
1614
1615    def job_failed( self, job_wrapper, message, exception = False ):
1616        """
1617        Called when a job has failed
1618        """
1619        pass
1620
1621    def collect_associated_files( self, output, job_working_directory ):
1622        """
1623        Find extra files in the job working directory and move them into
1624        the appropriate dataset's files directory
1625        """
1626        for name, hda in output.items():
1627            temp_file_path = os.path.join( job_working_directory, "dataset_%s_files" % ( hda.dataset.id ) )
1628            try:
1629                if len( os.listdir( temp_file_path ) ) > 0:
1630                    store_file_path = os.path.join(
1631                        os.path.join( self.app.config.file_path, *directory_hash_id( hda.dataset.id ) ),
1632                        "dataset_%d_files" % hda.dataset.id )
1633                    shutil.move( temp_file_path, store_file_path )
1634                    # Fix permissions
1635                    for basedir, dirs, files in os.walk( store_file_path ):
1636                        util.umask_fix_perms( basedir, self.app.config.umask, 0777, self.app.config.gid )
1637                        for file in files:
1638                            path = os.path.join( basedir, file )
1639                            # Ignore symlinks
1640                            if os.path.islink( path ):
1641                                continue
1642                            util.umask_fix_perms( path, self.app.config.umask, 0666, self.app.config.gid )
1643            except:
1644                continue
1645   
1646    def collect_child_datasets( self, output):
1647        """
1648        Look for child dataset files, create HDA and attach to parent.
1649        """
1650        children = {}
1651        # Loop through output file names, looking for generated children in
1652        # form of 'child_parentId_designation_visibility_extension'
1653        for name, outdata in output.items():
1654            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"child_%i_*" % outdata.id) ):
1655                if not name in children:
1656                    children[name] = {}
1657                fields = os.path.basename(filename).split("_")
1658                fields.pop(0)
1659                parent_id = int(fields.pop(0))
1660                designation = fields.pop(0)
1661                visible = fields.pop(0).lower()
1662                if visible == "visible": visible = True
1663                else: visible = False
1664                ext = fields.pop(0).lower()
1665                child_dataset = self.app.model.HistoryDatasetAssociation( extension=ext,
1666                                                                          parent_id=outdata.id,
1667                                                                          designation=designation,
1668                                                                          visible=visible,
1669                                                                          dbkey=outdata.dbkey,
1670                                                                          create_dataset=True,
1671                                                                          sa_session=self.sa_session )
1672                self.app.security_agent.copy_dataset_permissions( outdata.dataset, child_dataset.dataset )
1673                # Move data from temp location to dataset location
1674                shutil.move( filename, child_dataset.file_name )
1675                self.sa_session.add( child_dataset )
1676                self.sa_session.flush()
1677                child_dataset.set_size()
1678                child_dataset.name = "Secondary Dataset (%s)" % ( designation )
1679                child_dataset.init_meta()
1680                child_dataset.set_meta()
1681                child_dataset.set_peek()
1682                # Associate new dataset with job
1683                job = None
1684                for assoc in outdata.creating_job_associations:
1685                    job = assoc.job
1686                    break   
1687                if job:
1688                    assoc = self.app.model.JobToOutputDatasetAssociation( '__new_child_file_%s|%s__' % ( name, designation ), child_dataset )
1689                    assoc.job = job
1690                    self.sa_session.add( assoc )
1691                    self.sa_session.flush()
1692                child_dataset.state = outdata.state
1693                self.sa_session.add( child_dataset )
1694                self.sa_session.flush()
1695                # Add child to return dict
1696                children[name][designation] = child_dataset
1697                # Need to update all associated output hdas, i.e. history was
1698                # shared with job running
1699                for dataset in outdata.dataset.history_associations:
1700                    if outdata == dataset: continue
1701                    # Create new child dataset
1702                    child_data = child_dataset.copy( parent_id = dataset.id )
1703                    self.sa_session.add( child_dataset )
1704                    self.sa_session.flush()
1705        return children
1706       
1707    def collect_primary_datasets( self, output):
1708        """
1709        Find any additional datasets generated by a tool and attach (for
1710        cases where number of outputs is not known in advance).
1711        """
1712        primary_datasets = {}
1713        # Loop through output file names, looking for generated primary
1714        # datasets in form of:
1715        #     'primary_associatedWithDatasetID_designation_visibility_extension(_DBKEY)'
1716        for name, outdata in output.items():
1717            for filename in glob.glob(os.path.join(self.app.config.new_file_path,"primary_%i_*" % outdata.id) ):
1718                if not name in primary_datasets:
1719                    primary_datasets[name] = {}
1720                fields = os.path.basename(filename).split("_")
1721                fields.pop(0)
1722                parent_id = int(fields.pop(0))
1723                designation = fields.pop(0)
1724                visible = fields.pop(0).lower()
1725                if visible == "visible": visible = True
1726                else: visible = False
1727                ext = fields.pop(0).lower()
1728                dbkey = outdata.dbkey
1729                if fields:
1730                    dbkey = fields[ 0 ]
1731                # Create new primary dataset
1732                primary_data = self.app.model.HistoryDatasetAssociation( extension=ext,
1733                                                                         designation=designation,
1734                                                                         visible=visible,
1735                                                                         dbkey=dbkey,
1736                                                                         create_dataset=True,
1737                                                                         sa_session=self.sa_session )
1738                self.app.security_agent.copy_dataset_permissions( outdata.dataset, primary_data.dataset )
1739                self.sa_session.add( primary_data )
1740                self.sa_session.flush()
1741                # Move data from temp location to dataset location
1742                shutil.move( filename, primary_data.file_name )
1743                primary_data.set_size()
1744                primary_data.name = outdata.name
1745                primary_data.info = outdata.info
1746                primary_data.init_meta( copy_from=outdata )
1747                primary_data.dbkey = dbkey
1748                primary_data.set_meta()
1749                primary_data.set_peek()
1750                # Associate new dataset with job
1751                job = None
1752                for assoc in outdata.creating_job_associations:
1753                    job = assoc.job
1754                    break   
1755                if job:
1756                    assoc = self.app.model.JobToOutputDatasetAssociation( '__new_primary_file_%s|%s__' % ( name, designation ), primary_data )
1757                    assoc.job = job
1758                    self.sa_session.add( assoc )
1759                    self.sa_session.flush()
1760                primary_data.state = outdata.state
1761                self.sa_session.add( primary_data )
1762                self.sa_session.flush()
1763                outdata.history.add_dataset( primary_data )
1764                # Add dataset to return dict
1765                primary_datasets[name][designation] = primary_data
1766                # Need to update all associated output hdas, i.e. history was
1767                # shared with job running
1768                for dataset in outdata.dataset.history_associations:
1769                    if outdata == dataset: continue
1770                    new_data = primary_data.copy()
1771                    dataset.history.add( new_data )
1772                    self.sa_session.add( new_data )
1773                    self.sa_session.flush()
1774        return primary_datasets
1775
1776class DataSourceTool( Tool ):
1777    """
1778    Alternate implementation of Tool for data_source tools -- those that
1779    allow the user to query and extract data from another web site.
1780    """
1781    tool_type = 'data_source'
1782   
1783    def _build_GALAXY_URL_parameter( self ):
1784        return ToolParameter.build( self, ElementTree.XML( '<param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=%s" />' % self.id ) )
1785   
1786    def parse_inputs( self, root ):
1787        Tool.parse_inputs( self, root )
1788        if 'GALAXY_URL' not in self.inputs:
1789            self.inputs[ 'GALAXY_URL' ] = self._build_GALAXY_URL_parameter()
1790   
1791    def exec_before_job( self, app, inp_data, out_data, param_dict={} ):
1792        # TODO: Allow for a generic way for all Tools to have output dataset
1793        #       properties be set to input parameter values as defined in a
1794        #       tool XML
1795        dbkey = param_dict.get( 'dbkey' )
1796        organism = param_dict.get( 'organism' )
1797        table = param_dict.get( 'table' )
1798        description = param_dict.get( 'description' )
1799        info = param_dict.get( 'info' )
1800        if description == 'range':
1801            description = param_dict.get( 'position', '' )
1802            if not description:
1803                description = 'unknown position'
1804        gb_landmark_region = param_dict.get( 'q' )
1805        data_type = param_dict.get( 'data_type' )
1806        items = out_data.items()
1807        for name, data in items:
1808            if organism and table and description:
1809                # This is UCSC
1810                data.name  = '%s on %s: %s (%s)' % ( data.name, organism, table, description )
1811            elif gb_landmark_region:
1812                # This is GBrowse
1813                data.name = '%s on %s' % ( data.name, gb_landmark_region )
1814            data.info = info
1815            data.dbkey = dbkey
1816            if data_type not in app.datatypes_registry.datatypes_by_extension:
1817                # Setting data_type to tabular will force the data to be sniffed in exec_after_process()
1818                data_type = 'tabular'
1819            data.change_datatype( data_type )
1820            # Store external data source's request parameters temporarily in
1821            # output file. In case the config setting for
1822            # "outputs_to_working_directory" is True, we must write to the
1823            # DatasetFilenameWrapper object in the param_dict since it's
1824            # "false_path" attribute is the temporary path to the output dataset
1825            # ( until the job is run ).  However, even if the
1826            # "outputs_to_working_directory" setting is False, we can still
1827            # open the file the same way for temporarily storing the request
1828            # parameters.
1829            ## TODO: Input parameters should be jsonified and written into a
1830            ##       <configfile> and passed to data_source.py, instead of
1831            ##       writing tab separated key, value pairs to the output file
1832            out = open( str( param_dict.get( name ) ), 'w' )
1833            for key, value in param_dict.items():
1834                print >> out, '%s\t%s' % ( key, value )
1835            out.close()
1836
1837    def exec_after_process( self, app, inp_data, out_data, param_dict, job = None ):
1838        name, data = out_data.items()[0]
1839        data.set_size()
1840        #TODO: these should be already be set before the tool runs:
1841        if data.state == data.states.OK:
1842            data.name = param_dict.get( 'name', data.name )
1843            data.info = param_dict.get( 'info', data.name )
1844            data.dbkey = param_dict.get( 'dbkey', data.dbkey )
1845            data.extension = param_dict.get( 'data_type', data.extension )
1846        #TODO: these should be possible as part of data_source.py and external set_meta, see the upload tool:
1847        if data.extension in [ 'txt', 'tabular' ]:
1848            data_type = sniff.guess_ext( data.file_name, sniff_order=app.datatypes_registry.sniff_order, is_multi_byte=self.is_multi_byte )
1849            if data.extension != data_type:
1850                data.change_datatype( data_type )
1851        elif not isinstance( data.datatype, datatypes.interval.Bed ) and isinstance( data.datatype, datatypes.interval.Interval ):
1852            if data.missing_meta():
1853                data.change_datatype( 'tabular' )
1854        data.set_peek()
1855        self.sa_session.add( data )
1856        self.sa_session.flush()
1857
1858class AsyncDataSourceTool( DataSourceTool ):
1859    tool_type = 'data_source_async'
1860   
1861    def _build_GALAXY_URL_parameter( self ):
1862        return ToolParameter.build( self, ElementTree.XML( '<param name="GALAXY_URL" type="baseurl" value="/async/%s" />' % self.id ) )
1863
1864class DataDestinationTool( Tool ):
1865    tool_type = 'data_destination'
1866
1867class SetMetadataTool( Tool ):
1868    """
1869    Tool implementation for special tool that sets metadata on an existing
1870    dataset.
1871    """
1872    tool_type = 'set_metadata'
1873    def exec_after_process( self, app, inp_data, out_data, param_dict, job = None ):
1874        for name, dataset in inp_data.iteritems():
1875            external_metadata = galaxy.datatypes.metadata.JobExternalOutputMetadataWrapper( job )
1876            if external_metadata.external_metadata_set_successfully( dataset, app.model.context ):
1877                dataset.metadata.from_JSON_dict( external_metadata.get_output_filenames_by_dataset( dataset, app.model.context ).filename_out )   
1878            else:
1879                dataset._state = model.Dataset.states.FAILED_METADATA
1880                self.sa_session.add( dataset )
1881                self.sa_session.flush()
1882                return
1883            # If setting external metadata has failed, how can we inform the
1884            # user? For now, we'll leave the default metadata and set the state
1885            # back to its original.
1886            dataset.datatype.after_setting_metadata( dataset )
1887            if job and job.tool_id == '1.0.0':
1888                dataset.state = param_dict.get( '__ORIGINAL_DATASET_STATE__' )
1889            else:
1890                # Revert dataset.state to fall back to dataset.dataset.state
1891                dataset._state = None
1892            # Need to reset the peek, which may rely on metadata
1893            dataset.set_peek()
1894            self.sa_session.add( dataset )
1895            self.sa_session.flush()
1896   
1897    def job_failed( self, job_wrapper, message, exception = False ):
1898        job = job_wrapper.sa_session.query( model.Job ).get( job_wrapper.job_id )
1899        if job:
1900            inp_data = {}
1901            for dataset_assoc in job.input_datasets:
1902                inp_data[dataset_assoc.name] = dataset_assoc.dataset
1903            return self.exec_after_process( job_wrapper.app, inp_data, {}, job_wrapper.get_param_dict(), job = job )
1904           
1905class ExportHistoryTool( Tool ):
1906    tool_type = 'export_history'
1907
1908# Populate tool_type to ToolClass mappings
1909tool_types = {}
1910for tool_class in [ Tool, DataDestinationTool, SetMetadataTool, DataSourceTool, AsyncDataSourceTool ]:
1911    tool_types[ tool_class.tool_type ] = tool_class
1912
1913# ---- Utility classes to be factored out -----------------------------------
1914       
1915class BadValue( object ):
1916    def __init__( self, value ):
1917        self.value = value
1918
1919class RawObjectWrapper( object ):
1920    """
1921    Wraps an object so that __str__ returns module_name:class_name.
1922    """
1923    def __init__( self, obj ):
1924        self.obj = obj
1925    def __str__( self ):
1926        return "%s:%s" % (self.obj.__module__, self.obj.__class__.__name__)
1927    def __getattr__( self, key ):
1928        return getattr( self.obj, key )
1929
1930class InputValueWrapper( object ):
1931    """
1932    Wraps an input so that __str__ gives the "param_dict" representation.
1933    """
1934    def __init__( self, input, value, other_values={} ):
1935        self.input = input
1936        self.value = value
1937        self._other_values = other_values
1938    def __str__( self ):
1939        return self.input.to_param_dict_string( self.value, self._other_values )
1940    def __getattr__( self, key ):
1941        return getattr( self.value, key )
1942
1943class SelectToolParameterWrapper( object ):
1944    """
1945    Wraps a SelectTooParameter so that __str__ returns the selected value, but all other
1946    attributes are accessible.
1947    """
1948    def __init__( self, input, value, app, other_values={} ):
1949        self.input = input
1950        self.value = value
1951        self.input.value_label = input.value_to_display_text( value, app )
1952        self._other_values = other_values
1953    def __str__( self ):
1954        return self.input.to_param_dict_string( self.value, other_values = self._other_values )
1955    def __getattr__( self, key ):
1956        return getattr( self.input, key )
1957
1958class DatasetFilenameWrapper( object ):
1959    """
1960    Wraps a dataset so that __str__ returns the filename, but all other
1961    attributes are accessible.
1962    """
1963   
1964    class MetadataWrapper:
1965        """
1966        Wraps a Metadata Collection to return MetadataParameters wrapped
1967        according to the metadata spec. Methods implemented to match behavior
1968        of a Metadata Collection.
1969        """
1970        def __init__( self, metadata ):
1971            self.metadata = metadata
1972        def __getattr__( self, name ):
1973            rval = self.metadata.get( name, None )
1974            if name in self.metadata.spec:
1975                if rval is None:
1976                    rval = self.metadata.spec[name].no_value
1977                rval = self.metadata.spec[name].param.to_string( rval )
1978                # Store this value, so we don't need to recalculate if needed
1979                # again
1980                setattr( self, name, rval )
1981            return rval
1982        def __nonzero__( self ):
1983            return self.metadata.__nonzero__()
1984        def __iter__( self ):
1985            return self.metadata.__iter__()
1986        def get( self, key, default=None ):
1987            try:
1988                return getattr( self, key )
1989            except:
1990                return default
1991        def items( self ):
1992            return iter( [ ( k, self.get( k ) ) for k, v in self.metadata.items() ] )
1993   
1994    def __init__( self, dataset, datatypes_registry = None, tool = None, name = None, false_path = None ):
1995        if not dataset:
1996            try:
1997                # TODO: allow this to work when working with grouping
1998                ext = tool.inputs[name].extensions[0]
1999            except:
2000                ext = 'data'
2001            self.dataset = NoneDataset( datatypes_registry = datatypes_registry, ext = ext )
2002        else:
2003            self.dataset = dataset
2004            self.metadata = self.MetadataWrapper( dataset.metadata )
2005        self.false_path = false_path
2006
2007    def __str__( self ):
2008        if self.false_path is not None:
2009            return self.false_path
2010        else:
2011            return self.dataset.file_name
2012
2013    def __getattr__( self, key ):
2014        if self.false_path is not None and key == 'file_name':
2015            return self.false_path
2016        else:
2017            return getattr( self.dataset, key )
2018       
2019def json_fix( val ):
2020    if isinstance( val, list ):
2021        return [ json_fix( v ) for v in val ]
2022    elif isinstance( val, dict ):
2023        return dict( [ ( json_fix( k ), json_fix( v ) ) for ( k, v ) in val.iteritems() ] )
2024    elif isinstance( val, unicode ):
2025        return val.encode( "utf8" )
2026    else:
2027        return val
2028   
2029def get_incoming_value( incoming, key, default ):
2030    if "__" + key + "__is_composite" in incoming:
2031        composite_keys = incoming["__" + key + "__keys"].split()
2032        value = dict()
2033        for composite_key in composite_keys:
2034            value[composite_key] = incoming[key + "_" + composite_key]
2035        return value
2036    else:
2037        return incoming.get( key, default )
2038
2039class InterruptedUpload( Exception ):
2040    pass
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。