root/galaxy-central/lib/galaxy/tools/data/__init__.py

リビジョン 2, 5.1 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1"""
2Manage tool data tables, which store (at the application level) data that is
3used by tools, for example in the generation of dynamic options. Tables are
4loaded and stored by names which tools use to refer to them. This allows
5users to configure data tables for a local Galaxy instance without needing
6to modify the tool configurations.
7"""
8
9import logging, sys, os.path
10from galaxy import util
11
12log = logging.getLogger( __name__ )
13
14class ToolDataTableManager( object ):
15    """
16    Manages a collection of tool data tables
17    """
18   
19    def __init__( self, config_filename=None ):
20        self.data_tables = {}
21        if config_filename:
22            self.add_from_config_file( config_filename )
23       
24    def __getitem__( self, key ):
25        return self.data_tables.__getitem__( key )
26       
27    def __contains__( self, key ):
28        return self.data_tables.__contains__( key )
29       
30    def add_from_config_file( self, config_filename ):
31        tree = util.parse_xml( config_filename )
32        root = tree.getroot()
33        for table_elem in root.findall( 'table' ):
34            type = table_elem.get( 'type', 'tabular' )
35            assert type in tool_data_table_types, "Unknown data table type '%s'" % type
36            table = tool_data_table_types[ type ]( table_elem )
37            self.data_tables[ table.name ] = table
38            log.debug( "Loaded tool data table '%s", table.name )
39   
40class ToolDataTable( object ):
41    def __init__( self, config_element ):
42        self.name = config_element.get( 'name' )
43   
44class TabularToolDataTable( ToolDataTable ):
45    """
46    Data stored in a tabular / separated value format on disk, allows multiple
47    files to be merged but all must have the same column definitions.
48   
49    <table type="tabular" name="test">
50        <column name='...' index = '...' />
51        <file path="..." />
52        <file path="..." />
53    </table>
54    """
55   
56    type_key = 'tabular'
57   
58    def __init__( self, config_element ):
59        super( TabularToolDataTable, self ).__init__( config_element )
60        self.configure_and_load( config_element )
61   
62    def configure_and_load( self, config_element ):
63        """
64        Configure and load table from an XML element.
65        """
66        self.separator = config_element.get( 'separator', '\t' )
67        self.comment_char = config_element.get( 'comment_char', '#' )
68        # Configure columns
69        self.parse_column_spec( config_element )
70        # Read every file
71        all_rows = []
72        for file_element in config_element.findall( 'file' ):
73            filename = file_element.get( 'path' )
74            if not os.path.exists( filename ):
75                log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) )
76            else:
77                all_rows.extend( self.parse_file_fields( open( filename ) ) )
78        self.data = all_rows
79       
80    def get_fields( self ):
81        return self.data
82           
83    def parse_column_spec( self, config_element ):
84        """
85        Parse column definitions, which can either be a set of 'column' elements
86        with a name and index (as in dynamic options config), or a shorthand
87        comma separated list of names in order as the text of a 'column_names'
88        element.
89       
90        A column named 'value' is required.
91        """
92        self.columns = {}
93        if config_element.find( 'columns' ) is not None:
94            column_names = util.xml_text( config_element.find( 'columns' ) )
95            column_names = [ n.strip() for n in column_names.split( ',' ) ]
96            for index, name in enumerate( column_names ):
97                self.columns[ name ] = index
98                self.largest_index = index
99        else:
100            for column_elem in config_element.findall( 'column' ):
101                name = column_elem.get( 'name', None )
102                assert name is not None, "Required 'name' attribute missing from column def"
103                index = column_elem.get( 'index', None )
104                assert index is not None, "Required 'index' attribute missing from column def"
105                index = int( index )
106                self.columns[name] = index
107                if index > self.largest_index:
108                    self.largest_index = index
109        assert 'value' in self.columns, "Required 'value' column missing from column def"
110        if 'name' not in self.columns:
111            self.columns['name'] = self.columns['value']
112       
113    def parse_file_fields( self, reader ):
114        """
115        Parse separated lines from file and return a list of tuples.
116       
117        TODO: Allow named access to fields using the column names.
118        """
119        rval = []
120        for line in reader:
121            if line.lstrip().startswith( self.comment_char ):
122                continue
123            line = line.rstrip( "\n\r" )
124            if line:
125                fields = line.split( self.separator )
126                if self.largest_index < len( fields ):
127                    rval.append( fields )
128        return rval       
129
130# Registry of tool data types by type_key
131tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。