1 | import sys, logging, tarfile |
---|
2 | from galaxy.util import parse_xml |
---|
3 | from galaxy.util.bunch import Bunch |
---|
4 | |
---|
5 | log = logging.getLogger( __name__ ) |
---|
6 | |
---|
7 | if sys.version_info[:2] == ( 2, 4 ): |
---|
8 | from galaxy import eggs |
---|
9 | eggs.require( 'ElementTree' ) |
---|
10 | from elementtree import ElementTree |
---|
11 | else: |
---|
12 | from xml.etree import ElementTree |
---|
13 | |
---|
14 | class DatatypeVerificationError( Exception ): |
---|
15 | pass |
---|
16 | |
---|
17 | class Registry( object ): |
---|
18 | def __init__( self, root_dir=None, config=None ): |
---|
19 | self.datatypes_by_extension = {} |
---|
20 | if root_dir and config: |
---|
21 | # Parse datatypes_conf.xml |
---|
22 | tree = parse_xml( config ) |
---|
23 | root = tree.getroot() |
---|
24 | # Load datatypes and converters from config |
---|
25 | log.debug( 'Loading datatypes from %s' % config ) |
---|
26 | registration = root.find( 'registration' ) |
---|
27 | for elem in registration.findall( 'datatype' ): |
---|
28 | try: |
---|
29 | extension = elem.get( 'extension', None ) |
---|
30 | dtype = elem.get( 'type', None ) |
---|
31 | model_object = elem.get( 'model', None ) |
---|
32 | if extension and dtype: |
---|
33 | fields = dtype.split( ':' ) |
---|
34 | datatype_module = fields[0] |
---|
35 | datatype_class = fields[1] |
---|
36 | fields = datatype_module.split( '.' ) |
---|
37 | module = __import__( fields.pop(0) ) |
---|
38 | for mod in fields: |
---|
39 | module = getattr( module, mod ) |
---|
40 | self.datatypes_by_extension[extension] = getattr( module, datatype_class )() |
---|
41 | log.debug( 'Loaded datatype: %s' % dtype ) |
---|
42 | if model_object: |
---|
43 | model_module, model_class = model_object.split( ':' ) |
---|
44 | fields = model_module.split( '.' ) |
---|
45 | module = __import__( fields.pop(0) ) |
---|
46 | for mod in fields: |
---|
47 | module = getattr( module, mod ) |
---|
48 | self.datatypes_by_extension[extension].model_object = getattr( module, model_class ) |
---|
49 | log.debug( 'Added model class: %s to datatype: %s' % ( model_class, dtype ) ) |
---|
50 | except Exception, e: |
---|
51 | log.warning( 'Error loading datatype "%s", problem: %s' % ( extension, str( e ) ) ) |
---|
52 | def get_datatype_by_extension( self, ext ): |
---|
53 | return self.datatypes_by_extension.get( ext, None ) |
---|
54 | def get_datatype_labels( self ): |
---|
55 | rval = [] |
---|
56 | for ext, datatype in self.datatypes_by_extension.items(): |
---|
57 | rval.append( ( ext, datatype.label ) ) |
---|
58 | return rval |
---|
59 | |
---|
60 | class Tool( object ): |
---|
61 | def __init__( self, model_object=None ): |
---|
62 | self.model_object = model_object |
---|
63 | self.label = 'Tool' |
---|
64 | def verify( self, f, xml_files=[], tool_tags={} ): |
---|
65 | # xml_files and tool_tags will only be received if we're called from the ToolSuite.verify() method. |
---|
66 | try: |
---|
67 | tar = tarfile.open( f.name ) |
---|
68 | except tarfile.ReadError: |
---|
69 | raise DatatypeVerificationError( 'The archive is not a readable tar file.' ) |
---|
70 | if not xml_files: |
---|
71 | # Make sure we're not uploading a tool suite |
---|
72 | if filter( lambda x: x.lower().find( 'suite_config.xml' ) >= 0, tar.getnames() ): |
---|
73 | raise DatatypeVerificationError( 'The archive includes a suite_config.xml file, so set the upload type to "Tool Suite".' ) |
---|
74 | xml_files = filter( lambda x: x.lower().endswith( '.xml' ), tar.getnames() ) |
---|
75 | if not xml_files: |
---|
76 | raise DatatypeVerificationError( 'The archive does not contain any xml config files.' ) |
---|
77 | for xml_file in xml_files: |
---|
78 | try: |
---|
79 | tree = ElementTree.parse( tar.extractfile( xml_file ) ) |
---|
80 | root = tree.getroot() |
---|
81 | except: |
---|
82 | log.exception( 'fail:' ) |
---|
83 | continue |
---|
84 | if root.tag == 'tool': |
---|
85 | if 'id' not in root.keys(): |
---|
86 | raise DatatypeVerificationError( "Tool xml file (%s) does not include the required 'id' attribute in the <tool> tag" % str( xml_file ) ) |
---|
87 | if 'name' not in root.keys(): |
---|
88 | raise DatatypeVerificationError( "Tool xml file (%s) does not include the required 'name' attribute in the <tool> tag" % str( xml_file ) ) |
---|
89 | if 'version' not in root.keys(): |
---|
90 | raise DatatypeVerificationError( "Tool xml file (%s) does not include the required 'version' attribute in the <tool> tag" % str( xml_file ) ) |
---|
91 | if tool_tags: |
---|
92 | # We are verifying the tools inside a tool suite, so the current tag should have been found in the suite_config.xml |
---|
93 | # file parsed in the ToolSuite verify() method. The tool_tags dictionary should include a key matching the current |
---|
94 | # tool Id, and a tuple value matching the tool name and version. |
---|
95 | if root.attrib[ 'id' ] not in tool_tags: |
---|
96 | raise DatatypeVerificationError( 'Tool Id (%s) is not included in the suite_config.xml file.' % \ |
---|
97 | ( str( root.attrib[ 'id' ] ) ) ) |
---|
98 | tup = tool_tags[ root.attrib[ 'id' ] ] |
---|
99 | if root.attrib[ 'name' ] != tup[ 0 ]: |
---|
100 | raise DatatypeVerificationError( 'Tool name (%s) differs between suite_config.xml and the tool config file for tool Id (%s).' % \ |
---|
101 | ( str( root.attrib[ 'name' ] ), str( root.attrib[ 'id' ] ) ) ) |
---|
102 | if root.attrib[ 'version' ] != tup[ 1 ]: |
---|
103 | raise DatatypeVerificationError( 'Tool version (%s) differs between suite_config.xml and the tool config file for tool Id (%s).' % \ |
---|
104 | ( str( root.attrib[ 'version' ] ), str( root.attrib[ 'id' ] ) ) ) |
---|
105 | else: |
---|
106 | # We are not verifying a tool suite, so we'll create a bunch for returning to the caller. |
---|
107 | tool_bunch = Bunch() |
---|
108 | try: |
---|
109 | tool_bunch.id = root.attrib['id'] |
---|
110 | tool_bunch.name = root.attrib['name'] |
---|
111 | tool_bunch.version = root.attrib['version'] |
---|
112 | except KeyError, e: |
---|
113 | raise DatatypeVerificationError( 'Tool XML file does not conform to the specification. Missing required <tool> tag attribute: %s' % str( e ) ) |
---|
114 | tool_bunch.description = '' |
---|
115 | desc_tag = root.find( 'description' ) |
---|
116 | if desc_tag is not None: |
---|
117 | description = desc_tag.text |
---|
118 | if description: |
---|
119 | tool_bunch.description = description.strip() |
---|
120 | tool_bunch.message = 'Tool: %s %s, Version: %s, Id: %s' % \ |
---|
121 | ( str( tool_bunch.name ), str( tool_bunch.description ), str( tool_bunch.version ), str( tool_bunch.id ) ) |
---|
122 | return tool_bunch |
---|
123 | else: |
---|
124 | # TODO: should we verify files that are not tool configs? |
---|
125 | log.debug( "The file named (%s) is not a tool config, so skipping verification." % str( xml_file ) ) |
---|
126 | def create_model_object( self, datatype_bunch ): |
---|
127 | if self.model_object is None: |
---|
128 | raise Exception( 'No model object configured for %s, check the datatype configuration file' % self.__class__.__name__ ) |
---|
129 | if datatype_bunch is None: |
---|
130 | # TODO: do it automatically |
---|
131 | raise Exception( 'Unable to create %s model object without passing in data' % self.__class__.__name__ ) |
---|
132 | o = self.model_object() |
---|
133 | o.create_from_datatype( datatype_bunch ) |
---|
134 | return o |
---|
135 | |
---|
136 | class ToolSuite( Tool ): |
---|
137 | def __init__( self, model_object=None ): |
---|
138 | self.model_object = model_object |
---|
139 | self.label = 'Tool Suite' |
---|
140 | def verify( self, f ): |
---|
141 | """ |
---|
142 | A sample tool suite config: |
---|
143 | <suite id="onto_toolkit" name="ONTO Toolkit" version="1.0"> |
---|
144 | <description>ONTO-Toolkit is a collection of Galaxy tools which support the manipulation of bio-ontologies.</description> |
---|
145 | <tool id="get_ancestor_terms" name="Get the ancestor terms of a given OBO term" version="1.0.0"> |
---|
146 | <description>Collects the ancestor terms from a given term in the given OBO ontology</description> |
---|
147 | </tool> |
---|
148 | <tool id="get_child_terms" name="Get the child terms of a given OBO term" version="1.0.0"> |
---|
149 | <description>Collects the child terms from a given term in the given OBO ontology</description> |
---|
150 | </tool> |
---|
151 | </suite> |
---|
152 | """ |
---|
153 | try: |
---|
154 | tar = tarfile.open( f.name ) |
---|
155 | except tarfile.ReadError: |
---|
156 | raise DatatypeVerificationError( 'The archive is not a readable tar file.' ) |
---|
157 | suite_config = filter( lambda x: x.lower().find( 'suite_config.xml' ) >=0, tar.getnames() ) |
---|
158 | if not suite_config: |
---|
159 | raise DatatypeVerificationError( 'The archive does not contain the required suite_config.xml config file. If you are uploading a single tool archive, set the upload type to "Tool".' ) |
---|
160 | suite_config = suite_config[ 0 ] |
---|
161 | # Parse and verify suite_config |
---|
162 | archive_ok = False |
---|
163 | try: |
---|
164 | tree = ElementTree.parse( tar.extractfile( suite_config ) ) |
---|
165 | root = tree.getroot() |
---|
166 | archive_ok = True |
---|
167 | except: |
---|
168 | log.exception( 'fail:' ) |
---|
169 | if archive_ok and root.tag == 'suite': |
---|
170 | suite_bunch = Bunch() |
---|
171 | try: |
---|
172 | suite_bunch.id = root.attrib['id'] |
---|
173 | suite_bunch.name = root.attrib['name'] |
---|
174 | suite_bunch.version = root.attrib['version'] |
---|
175 | except KeyError, e: |
---|
176 | raise DatatypeVerificationError( 'The file named tool-suite.xml does not conform to the specification. Missing required <suite> tag attribute: %s' % str( e ) ) |
---|
177 | suite_bunch.description = '' |
---|
178 | desc_tag = root.find( 'description' ) |
---|
179 | if desc_tag is not None: |
---|
180 | description = desc_tag.text |
---|
181 | if description: |
---|
182 | suite_bunch.description = description.strip() |
---|
183 | suite_bunch.message = 'Tool suite: %s %s, Version: %s, Id: %s' % \ |
---|
184 | ( str( suite_bunch.name ), str( suite_bunch.description ), str( suite_bunch.version ), str( suite_bunch.id ) ) |
---|
185 | # Create a dictionary of the tools in the suite where the keys are tool_ids and the |
---|
186 | # values are tuples of tool name and version |
---|
187 | tool_tags = {} |
---|
188 | for elem in root.findall( 'tool' ): |
---|
189 | tool_tags[ elem.attrib['id'] ] = ( elem.attrib['name'], elem.attrib['version'] ) |
---|
190 | else: |
---|
191 | raise DatatypeVerificationError( "The file named %s is not a valid tool suite config." % str( suite_config ) ) |
---|
192 | # Verify all included tool config files |
---|
193 | xml_files = filter( lambda x: x.lower().endswith( '.xml' ) and x.lower() != 'suite_config.xml', tar.getnames() ) |
---|
194 | if not xml_files: |
---|
195 | raise DatatypeVerificationError( 'The archive does not contain any tool config (xml) files.' ) |
---|
196 | Tool.verify( self, f, xml_files=xml_files, tool_tags=tool_tags ) |
---|
197 | return suite_bunch |
---|