| 1 | """ |
|---|
| 2 | Utility functions. |
|---|
| 3 | |
|---|
| 4 | |
|---|
| 5 | """ |
|---|
| 6 | import logger, conf |
|---|
| 7 | import os, sys, random, hashlib, re, string, csv, gc |
|---|
| 8 | import tempfile, os, random, glob, time |
|---|
| 9 | |
|---|
| 10 | # dealing with roman numerals for chromosomes |
|---|
| 11 | from genetrack import roman |
|---|
| 12 | |
|---|
| 13 | def path_join(*args): |
|---|
| 14 | "Builds absolute path" |
|---|
| 15 | return os.path.abspath(os.path.join(*args)) |
|---|
| 16 | |
|---|
| 17 | def chromosome_remap(text): |
|---|
| 18 | """ |
|---|
| 19 | Attempts to produce the standardized chromosome from |
|---|
| 20 | multiple possible inputs:: |
|---|
| 21 | |
|---|
| 22 | chr5, chr05, chrV, chrom5, chrom05, chromV -> chr5 |
|---|
| 23 | |
|---|
| 24 | >>> |
|---|
| 25 | >>> map(chromosome_remap, 'chr1 chr06 chrIX chrom02 chrom12 chromV'.split()) |
|---|
| 26 | ['chr1', 'chr6', 'chr9', 'chr2', 'chr12', 'chr5'] |
|---|
| 27 | >>> |
|---|
| 28 | """ |
|---|
| 29 | |
|---|
| 30 | if not text.startswith('chr'): |
|---|
| 31 | return text |
|---|
| 32 | |
|---|
| 33 | text = text.replace('chrom','') |
|---|
| 34 | text = text.replace('chr','') |
|---|
| 35 | |
|---|
| 36 | try: |
|---|
| 37 | # cast to integer |
|---|
| 38 | text = int(text) |
|---|
| 39 | except ValueError, exc: |
|---|
| 40 | try: |
|---|
| 41 | # cast to roman numerals |
|---|
| 42 | text = roman.fromRoman(text) |
|---|
| 43 | except Exception, exc: |
|---|
| 44 | pass |
|---|
| 45 | |
|---|
| 46 | return 'chr%s' % text |
|---|
| 47 | |
|---|
| 48 | class Params(object): |
|---|
| 49 | """ |
|---|
| 50 | >>> p = Params(a=1, b=2, c=None, d=None) |
|---|
| 51 | >>> p.a, p.b |
|---|
| 52 | (1, 2) |
|---|
| 53 | >>> p.c, p.d |
|---|
| 54 | (None, None) |
|---|
| 55 | """ |
|---|
| 56 | def __init__(self, **kwds): |
|---|
| 57 | self.__dict__.update(kwds) |
|---|
| 58 | |
|---|
| 59 | def update(self, other): |
|---|
| 60 | self.__dict__.update( other ) |
|---|
| 61 | |
|---|
| 62 | def defaults(self, other): |
|---|
| 63 | "Sets default values for non-existing attributes" |
|---|
| 64 | store = dict() |
|---|
| 65 | store.update( other ) |
|---|
| 66 | store.update( self.__dict__ ) |
|---|
| 67 | self.__dict__.update( store ) |
|---|
| 68 | |
|---|
| 69 | def dict(self): |
|---|
| 70 | return self.__dict__ |
|---|
| 71 | |
|---|
| 72 | def get(self, key, default=None): |
|---|
| 73 | return self.__dict__.get(key, default) |
|---|
| 74 | |
|---|
| 75 | def __repr__(self): |
|---|
| 76 | return repr(self.__dict__) |
|---|
| 77 | |
|---|
| 78 | def uuid(KEY_SIZE=128): |
|---|
| 79 | "Genenerates a unique id" |
|---|
| 80 | id = str( random.getrandbits( KEY_SIZE ) ) |
|---|
| 81 | return hashlib.md5(id).hexdigest() |
|---|
| 82 | |
|---|
| 83 | def nice_bytes( value ): |
|---|
| 84 | """ |
|---|
| 85 | Returns a size as human readable bytes |
|---|
| 86 | |
|---|
| 87 | >>> nice_bytes(100), nice_bytes(10**4), nice_bytes(10**8), nice_bytes(10**10) |
|---|
| 88 | ('100 bytes', '9 Kbytes', '95 Mbytes', '9 Gbyte') |
|---|
| 89 | """ |
|---|
| 90 | if value < 1024: return "%s bytes" % value |
|---|
| 91 | elif value < 1048576: return "%s Kbytes" % int(value/1024) |
|---|
| 92 | elif value < 1073741824: return "%s Mbytes" % int(value/1048576) |
|---|
| 93 | else: return "%s Gbyte" % int(value/1073741824) |
|---|
| 94 | |
|---|
| 95 | def nice_sort( data ): |
|---|
| 96 | """ |
|---|
| 97 | Sort the given list data in the way that humans expect. |
|---|
| 98 | Adapted from a posting by Ned Batchelder: http://nedbatchelder.com/blog/200712.html#e20071211T054956 |
|---|
| 99 | |
|---|
| 100 | >>> data = [ 'chr1', 'chr2', 'chr10', 'chr100' ] |
|---|
| 101 | >>> data.sort() |
|---|
| 102 | >>> data |
|---|
| 103 | ['chr1', 'chr10', 'chr100', 'chr2'] |
|---|
| 104 | >>> nice_sort(data) |
|---|
| 105 | >>> data |
|---|
| 106 | ['chr1', 'chr2', 'chr10', 'chr100'] |
|---|
| 107 | """ |
|---|
| 108 | def convert(text): |
|---|
| 109 | if text.isdigit(): |
|---|
| 110 | return int(text) |
|---|
| 111 | else: |
|---|
| 112 | return text |
|---|
| 113 | |
|---|
| 114 | split = lambda key: re.split('([0-9]+)', key) |
|---|
| 115 | alphanum = lambda key: map(convert, split(key) ) |
|---|
| 116 | data.sort( key=alphanum ) |
|---|
| 117 | |
|---|
| 118 | def commify(n): |
|---|
| 119 | """ |
|---|
| 120 | Formats numbers with commas |
|---|
| 121 | |
|---|
| 122 | >>> commify(10000) |
|---|
| 123 | '10,000' |
|---|
| 124 | """ |
|---|
| 125 | n = str(n) |
|---|
| 126 | while True: |
|---|
| 127 | (n, count) = re.subn(r'^([-+]?\d+)(\d{3})', r'\1,\2', n) |
|---|
| 128 | if count == 0: |
|---|
| 129 | break |
|---|
| 130 | return n |
|---|
| 131 | |
|---|
| 132 | class Timer(object): |
|---|
| 133 | """ |
|---|
| 134 | A timer object for display elapsed times. |
|---|
| 135 | |
|---|
| 136 | >>> timer = Timer() |
|---|
| 137 | >>> timer.format(30) |
|---|
| 138 | '30.00 seconds' |
|---|
| 139 | >>> timer.format(320) |
|---|
| 140 | '5.3 minutes' |
|---|
| 141 | >>> timer.format(3200) |
|---|
| 142 | '53.3 minutes' |
|---|
| 143 | >>> timer.format(30500) |
|---|
| 144 | '8.5 hours' |
|---|
| 145 | """ |
|---|
| 146 | def __init__(self): |
|---|
| 147 | self.start() |
|---|
| 148 | |
|---|
| 149 | def start(self): |
|---|
| 150 | self.start_time = time.time() |
|---|
| 151 | |
|---|
| 152 | def format(self, value): |
|---|
| 153 | min1 = 60.0 |
|---|
| 154 | hour = 60 * min1 |
|---|
| 155 | if value < 60: |
|---|
| 156 | return '%4.2f seconds' % value |
|---|
| 157 | elif value < hour: |
|---|
| 158 | return '%3.1f minutes' % (value/min1) |
|---|
| 159 | else: |
|---|
| 160 | return '%3.1f hours' % (value/hour) |
|---|
| 161 | |
|---|
| 162 | def report(self): |
|---|
| 163 | elapsed = self.stop() |
|---|
| 164 | return self.format( elapsed ) |
|---|
| 165 | |
|---|
| 166 | def stop(self): |
|---|
| 167 | elapsed = time.time() - self.start_time |
|---|
| 168 | self.start() |
|---|
| 169 | return elapsed |
|---|
| 170 | |
|---|
| 171 | def gc_off( func ): |
|---|
| 172 | """ |
|---|
| 173 | A decorator that turns the off the garbage collector |
|---|
| 174 | during the lifetime of the wrapped function |
|---|
| 175 | |
|---|
| 176 | >>> @gc_off |
|---|
| 177 | ... def foo(): |
|---|
| 178 | ... pass |
|---|
| 179 | >>> foo() |
|---|
| 180 | """ |
|---|
| 181 | def newfunc(*args,**kargs): |
|---|
| 182 | try: |
|---|
| 183 | gc.disable() |
|---|
| 184 | result = func( *args, **kargs) |
|---|
| 185 | finally: |
|---|
| 186 | gc.enable() |
|---|
| 187 | return result |
|---|
| 188 | return newfunc |
|---|
| 189 | |
|---|
| 190 | def make_tempfile( fname=None, dir='', prefix='temp-', suffix='.png'): |
|---|
| 191 | """ |
|---|
| 192 | Returns a filename and filepath to a temporary file |
|---|
| 193 | |
|---|
| 194 | If the {tid} parameter is not specified it will generate a random id |
|---|
| 195 | >>> make_tempfile(fname=1, prefix='img')[0] |
|---|
| 196 | 'img1.png' |
|---|
| 197 | >>> len(make_tempfile()) |
|---|
| 198 | 2 |
|---|
| 199 | """ |
|---|
| 200 | |
|---|
| 201 | if fname == None: |
|---|
| 202 | if dir: |
|---|
| 203 | fd, fpath = tempfile.mkstemp( suffix=suffix, prefix=prefix, dir=dir, text='wb') |
|---|
| 204 | else: |
|---|
| 205 | fd, fpath = tempfile.mkstemp( suffix=suffix, prefix=prefix, text='wb') |
|---|
| 206 | os.close(fd) |
|---|
| 207 | head, fname = os.path.split( fpath ) |
|---|
| 208 | else: |
|---|
| 209 | fname = '%s%s%s' % (prefix, fname, suffix) |
|---|
| 210 | fpath = os.path.join( dir, fname ) |
|---|
| 211 | |
|---|
| 212 | return fname, fpath |
|---|
| 213 | |
|---|
| 214 | def test( verbose=0 ): |
|---|
| 215 | "Performs module level testing" |
|---|
| 216 | import doctest |
|---|
| 217 | doctest.testmod( verbose=verbose ) |
|---|
| 218 | |
|---|
| 219 | if __name__ == "__main__": |
|---|
| 220 | test() |
|---|