root/galaxy-central/lib/galaxy/util/__init__.py

リビジョン 2, 17.4 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1"""
2Utility functions used systemwide.
3
4"""
5import logging
6import threading, random, string, re, binascii, pickle, time, datetime, math, re, os, sys, tempfile, stat, grp
7
8# Older py compatibility
9try:
10    set()
11except:
12    from sets import Set as set
13
14try:
15    from hashlib import md5
16except ImportError:
17    from md5 import new as md5
18
19import pkg_resources
20
21pkg_resources.require( 'docutils' )
22import docutils.core
23from galaxy.util.docutils_ext.htmlfrag import Writer as HTMLFragWriter
24
25pkg_resources.require( 'elementtree' )
26from elementtree import ElementTree, ElementInclude
27
28pkg_resources.require( "wchartype" )
29import wchartype
30
31log   = logging.getLogger(__name__)
32_lock = threading.RLock()
33
34gzip_magic = '\037\213'
35bz2_magic = 'BZh'
36
37def is_multi_byte( chars ):
38    for char in chars:
39        try:
40            char = unicode( char )
41        except UnicodeDecodeError, e:
42            # Probably binary
43            return False
44        if wchartype.is_asian( char ) or \
45            wchartype.is_full_width( char ) or \
46            wchartype.is_kanji( char ) or \
47            wchartype.is_hiragana( char ) or \
48            wchartype.is_katakana( char ) or \
49            wchartype.is_half_katakana( char ) or \
50            wchartype.is_hangul( char ) or \
51            wchartype.is_full_digit( char ) or \
52            wchartype.is_full_letter( char ):
53            return True
54    return False
55
56def synchronized(func):
57    """This wrapper will serialize access to 'func' to a single thread. Use it as a decorator."""
58    def caller(*params, **kparams):
59        _lock.acquire(True) # Wait
60        try:
61            return func(*params, **kparams)
62        finally:
63            _lock.release()
64    return caller
65
66def file_iter(fname, sep=None):
67    """
68    This generator iterates over a file and yields its lines
69    splitted via the C{sep} parameter. Skips empty lines and lines starting with
70    the C{#} character.
71   
72    >>> lines = [ line for line in file_iter(__file__) ]
73    >>> len(lines) !=  0
74    True
75    """
76    for line in file(fname):
77        if line and line[0] != '#':
78            yield line.split(sep)
79
80def file_reader(fp, chunk_size=65536):
81    """This generator yields the open fileobject in chunks (default 64k). Closes the file at the end"""
82    while 1:
83        data = fp.read(chunk_size)
84        if not data:
85            break
86        yield data
87    fp.close()
88
89def unique_id(KEY_SIZE=128):
90    """
91    Generates an unique id
92   
93    >>> ids = [ unique_id() for i in range(1000) ]
94    >>> len(set(ids))
95    1000
96    """
97    id  = str( random.getrandbits( KEY_SIZE ) )
98    return md5(id).hexdigest()
99
100def parse_xml(fname):
101    """Returns a parsed xml tree"""
102    tree = ElementTree.parse(fname)
103    root = tree.getroot()
104    ElementInclude.include(root)
105    return tree
106
107def xml_to_string(elem):
108    """Returns an string from and xml tree"""
109    text = ElementTree.tostring(elem)
110    return text
111
112# characters that are valid
113valid_chars  = set(string.letters + string.digits + " -=_.()/+*^,:?!")
114
115# characters that are allowed but need to be escaped
116mapped_chars = { '>' :'__gt__',
117                 '<' :'__lt__',
118                 "'" :'__sq__',
119                 '"' :'__dq__',
120                 '[' :'__ob__',
121                 ']' :'__cb__',
122                 '{' :'__oc__',
123                 '}' :'__cc__',
124                 '@' : '__at__',
125                 '\n' : '__cn__',
126                 '\r' : '__cr__',
127                 '\t' : '__tc__'
128                 }
129
130def restore_text(text):
131    """Restores sanitized text"""
132    for key, value in mapped_chars.items():
133        text = text.replace(value, key)
134    return text
135
136def sanitize_text(text):
137    """Restricts the characters that are allowed in a text"""
138    out = []
139    for c in text:
140        if c in valid_chars:
141            out.append(c)
142        elif c in mapped_chars:
143            out.append(mapped_chars[c])
144        else:
145            out.append('X') # makes debugging easier
146    return ''.join(out)
147
148def sanitize_param(value):
149    """Clean incoming parameters (strings or lists)"""
150    if isinstance( value, basestring ):
151        return sanitize_text(value)
152    elif isinstance( value, list ):
153        return map(sanitize_text, value)
154    else:
155        print value
156        raise Exception, 'Unknown parameter type (%s)' % ( type( value ) )
157
158class Params:
159    """
160    Stores and 'sanitizes' parameters. Alphanumeric characters and the 
161    non-alphanumeric ones that are deemed safe are let to pass through (see L{valid_chars}).
162    Some non-safe characters are escaped to safe forms for example C{>} becomes C{__lt__}
163    (see L{mapped_chars}). All other characters are replaced with C{X}.
164   
165    Operates on string or list values only (HTTP parameters).
166   
167    >>> values = { 'status':'on', 'symbols':[  'alpha', '<>', '$rm&#!' ]  }
168    >>> par = Params(values)
169    >>> par.status
170    'on'
171    >>> par.value == None      # missing attributes return None
172    True
173    >>> par.get('price', 0)
174    0
175    >>> par.symbols            # replaces unknown symbols with X
176    ['alpha', '__lt____gt__', 'XrmXX!']
177    >>> par.flatten()          # flattening to a list
178    [('status', 'on'), ('symbols', 'alpha'), ('symbols', '__lt____gt__'), ('symbols', 'XrmXX!')]
179    """
180   
181    # is NEVER_SANITIZE required now that sanitizing for tool parameters can be controlled on a per parameter basis and occurs via InputValueWrappers?
182    NEVER_SANITIZE = ['file_data', 'url_paste', 'URL', 'filesystem_paths']
183   
184    def __init__( self, params, sanitize=True ):
185        if sanitize:
186            for key, value in params.items():
187                if key not in self.NEVER_SANITIZE and True not in [ key.endswith( "|%s" % nonsanitize_parameter ) for nonsanitize_parameter in self.NEVER_SANITIZE ]: #sanitize check both ungrouped and grouped parameters by name. Anything relying on NEVER_SANITIZE should be changed to not require this and NEVER_SANITIZE should be removed.
188                    self.__dict__[ key ] = sanitize_param( value )
189                else:
190                    self.__dict__[ key ] = value
191        else:
192            self.__dict__.update(params)
193
194    def flatten(self):
195        """
196        Creates a tuple list from a dict with a tuple/value pair for every value that is a list
197        """
198        flat = []
199        for key, value in self.__dict__.items():
200            if type(value) == type([]):
201                for v in value:
202                    flat.append( (key, v) )
203            else:
204                flat.append( (key, value) )
205        return flat
206
207    def __getattr__(self, name):
208        """This is here to ensure that we get None for non existing parameters"""
209        return None
210   
211    def get(self, key, default):
212        return self.__dict__.get(key, default)
213   
214    def __str__(self):
215        return '%s' % self.__dict__
216
217    def __len__(self):
218        return len(self.__dict__)
219
220    def __iter__(self):
221        return iter(self.__dict__)
222
223    def update(self, values):
224        self.__dict__.update(values)
225
226def rst_to_html( s ):
227    """Convert a blob of reStructuredText to HTML"""
228    log = logging.getLogger( "docutils" )
229    class FakeStream( object ):
230        def write( self, str ):
231            if len( str ) > 0 and not str.isspace():
232                log.warn( str )
233    return docutils.core.publish_string( s, writer=HTMLFragWriter(), settings_overrides=dict( warning_stream=FakeStream() ) )
234
235def xml_text(root, name=None):
236    """Returns the text inside an element"""
237    if name is not None:
238        # Try attribute first
239        val = root.get(name)
240        if val:
241            return val
242        # Then try as element
243        elem = root.find(name)
244    else:
245        elem = root
246    if elem is not None and elem.text:
247        text = ''.join(elem.text.splitlines())
248        return text.strip()
249    # No luck, return empty string
250    return ''
251   
252def string_as_bool( string ):
253    if str( string ).lower() in ( 'true', 'yes', 'on' ):
254        return True
255    else:
256        return False
257
258def listify( item ):
259    """
260    Make a single item a single item list, or return a list if passed a
261    list.  Passing a None returns an empty list.
262    """
263    if not item:
264        return []
265    elif isinstance( item, list ):
266        return item
267    elif isinstance( item, basestring ) and item.count( ',' ):
268        return item.split( ',' )
269    else:
270        return [ item ]
271
272def commaify(amount):
273    orig = amount
274    new = re.sub("^(-?\d+)(\d{3})", '\g<1>,\g<2>', amount)
275    if orig == new:
276        return new
277    else:
278        return commaify(new)
279 
280def object_to_string( obj ):
281    return binascii.hexlify( pickle.dumps( obj, 2 ) )
282   
283def string_to_object( s ):
284    return pickle.loads( binascii.unhexlify( s ) )
285       
286def get_ucsc_by_build(build):
287    sites = []
288    for site in ucsc_build_sites:
289        if build in site['builds']:
290            sites.append((site['name'],site['url']))
291    return sites
292def get_gbrowse_sites_by_build(build):
293    sites = []
294    for site in gbrowse_build_sites:
295        if build in site['builds']:
296            sites.append((site['name'],site['url']))
297    return sites
298def get_genetrack_sites():
299    sites = []
300    for site in genetrack_sites:
301        sites.append( ( site['name'], site['url'] ) )
302    return sites
303
304def read_dbnames(filename):
305    """ Read build names from file """
306    class DBNames( list ):
307        default_value = "?"
308        default_name = "unspecified (?)"
309    db_names = DBNames()
310    try:
311        ucsc_builds = {}
312        man_builds = [] #assume these are integers
313        name_to_db_base = {}
314        for line in open(filename):
315            try:
316                if line[0:1] == "#": continue
317                fields = line.replace("\r","").replace("\n","").split("\t")
318                #Special case of unspecified build is at top of list
319                if fields[0] == "?":
320                    db_names.insert(0,(fields[0],fields[1]))
321                    continue
322                try: #manual build (i.e. microbes)
323                    int(fields[0])
324                    man_builds.append((fields[1], fields[0]))
325                except: #UCSC build
326                    db_base = fields[0].rstrip('0123456789')
327                    if db_base not in ucsc_builds:
328                        ucsc_builds[db_base] = []
329                        name_to_db_base[fields[1]] = db_base
330                    #we want to sort within a species numerically by revision number
331                    build_rev = re.compile(r'\d+$')
332                    try: build_rev = int(build_rev.findall(fields[0])[0])
333                    except: build_rev = 0
334                    ucsc_builds[db_base].append((build_rev, fields[0],fields[1]))
335            except: continue
336        sort_names = name_to_db_base.keys()
337        sort_names.sort()
338        for name in sort_names:
339            db_base = name_to_db_base[name]
340            ucsc_builds[db_base].sort()
341            ucsc_builds[db_base].reverse()
342            ucsc_builds[db_base] = [(build, name) for build_rev, build, name in ucsc_builds[db_base]]
343            db_names = DBNames( db_names + ucsc_builds[db_base] )
344        if len( db_names ) > 1 and len( man_builds ) > 0: db_names.append( ( db_names.default_value, '----- Additional Species Are Below -----' ) )
345        man_builds.sort()
346        man_builds = [(build, name) for name, build  in man_builds]
347        db_names = DBNames( db_names + man_builds )
348    except Exception, e:
349        print "ERROR: Unable to read builds file:", e
350    if len(db_names)<1:
351        db_names = DBNames( [( db_names.default_value,  db_names.default_name )] )
352    return db_names
353
354def read_build_sites( filename, check_builds=True ):
355    """ read db names to ucsc mappings from file, this file should probably be merged with the one above """
356    build_sites = []
357    try:
358        for line in open(filename):
359            try:
360                if line[0:1] == "#": continue
361                fields = line.replace("\r","").replace("\n","").split("\t")
362                site_name = fields[0]
363                site = fields[1]
364                if check_builds:
365                    site_builds = fields[2].split(",")
366                    site_dict = {'name':site_name, 'url':site, 'builds':site_builds}
367                else:
368                    site_dict = {'name':site_name, 'url':site}
369                build_sites.append( site_dict )
370            except: continue
371    except:
372        print "ERROR: Unable to read builds for site file %s" %filename
373    return build_sites
374
375def relpath( path, start = None ):
376    """Return a relative version of a path"""
377    #modified from python 2.6.1 source code
378   
379    #version 2.6+ has it built in, we'll use the 'official' copy
380    if sys.version_info[:2] >= ( 2, 6 ):
381        if start is not None:
382            return os.path.relpath( path, start )
383        return os.path.relpath( path )
384   
385    #we need to initialize some local parameters
386    curdir = os.curdir
387    pardir = os.pardir
388    sep = os.sep
389    commonprefix = os.path.commonprefix
390    join = os.path.join
391    if start is None:
392        start = curdir
393   
394    #below is the unedited (but formated) relpath() from posixpath.py of 2.6.1
395    #this will likely not function properly on non-posix systems, i.e. windows
396    if not path:
397        raise ValueError( "no path specified" )
398   
399    start_list = os.path.abspath( start ).split( sep )
400    path_list = os.path.abspath( path ).split( sep )
401   
402    # Work out how much of the filepath is shared by start and path.
403    i = len( commonprefix( [ start_list, path_list ] ) )
404   
405    rel_list = [ pardir ] * ( len( start_list )- i ) + path_list[ i: ]
406    if not rel_list:
407        return curdir
408    return join( *rel_list )
409
410def stringify_dictionary_keys( in_dict ):
411    #returns a new dictionary
412    #changes unicode keys into strings, only works on top level (does not recurse)
413    #unicode keys are not valid for expansion into keyword arguments on method calls
414    out_dict = {}
415    for key, value in in_dict.iteritems():
416        out_dict[ str( key ) ] = value
417    return out_dict
418
419def recursively_stringify_dictionary_keys( d ):
420    if isinstance(d, dict):
421        return dict([(k.encode('utf-8'), recursively_stringify_dictionary_keys(v)) for k,v in d.iteritems()])
422    elif isinstance(d, list):
423        return [recursively_stringify_dictionary_keys(x) for x in d]
424    else:
425        return d
426
427def mkstemp_ln( src, prefix='mkstemp_ln_' ):
428    """
429    From tempfile._mkstemp_inner, generate a hard link in the same dir with a
430    random name.  Created so we can persist the underlying file of a
431    NamedTemporaryFile upon its closure.
432    """
433    dir = os.path.dirname(src)
434    names = tempfile._get_candidate_names()
435    for seq in xrange(tempfile.TMP_MAX):
436        name = names.next()
437        file = os.path.join(dir, prefix + name)
438        try:
439            linked_path = os.link( src, file )
440            return (os.path.abspath(file))
441        except OSError, e:
442            if e.errno == errno.EEXIST:
443                continue # try again
444            raise
445    raise IOError, (errno.EEXIST, "No usable temporary file name found")
446
447def umask_fix_perms( path, umask, unmasked_perms, gid=None ):
448    """
449    umask-friendly permissions fixing
450    """
451    perms = unmasked_perms & ~umask
452    try:
453        st = os.stat( path )
454    except OSError, e:
455        log.exception( 'Unable to set permissions or group on %s' % path )
456        return
457    # fix modes
458    if stat.S_IMODE( st.st_mode ) != perms:
459        try:
460            os.chmod( path, perms )
461        except Exception, e:
462            log.warning( 'Unable to honor umask (%s) for %s, tried to set: %s but mode remains %s, error was: %s' % ( oct( umask ), \
463                                                                                                                      path,
464                                                                                                                      oct( perms ),
465                                                                                                                      oct( stat.S_IMODE( st.st_mode ) ),
466                                                                                                                      e ) )
467    # fix group
468    if gid is not None and st.st_gid != gid:
469        try:
470            os.chown( path, -1, gid )
471        except Exception, e:
472            try:
473                desired_group = grp.getgrgid( gid )
474                current_group = grp.getgrgid( st.st_gid )
475            except:
476                desired_group = gid
477                current_group = st.st_gid
478            log.warning( 'Unable to honor primary group (%s) for %s, group remains %s, error was: %s' % ( desired_group, \
479                                                                                                          path,
480                                                                                                          current_group,
481                                                                                                          e ) )
482
483galaxy_root_path = os.path.join(__path__[0], "..","..","..")
484# The dbnames list is used in edit attributes and the upload tool
485dbnames = read_dbnames( os.path.join( galaxy_root_path, "tool-data", "shared", "ucsc", "builds.txt" ) )
486ucsc_build_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "ucsc", "ucsc_build_sites.txt" ) )
487gbrowse_build_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "gbrowse", "gbrowse_build_sites.txt" ) )
488genetrack_sites = read_build_sites( os.path.join( galaxy_root_path, "tool-data", "shared", "genetrack", "genetrack_sites.txt" ), check_builds=False )
489
490if __name__ == '__main__':
491    import doctest, sys
492    doctest.testmod(sys.modules[__name__], verbose=False)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。