1 | """ |
---|
2 | Utility functions. |
---|
3 | |
---|
4 | |
---|
5 | """ |
---|
6 | import logger, conf |
---|
7 | import os, sys, random, hashlib, re, string, csv, gc |
---|
8 | import tempfile, os, random, glob, time |
---|
9 | |
---|
10 | # dealing with roman numerals for chromosomes |
---|
11 | from genetrack import roman |
---|
12 | |
---|
13 | def path_join(*args): |
---|
14 | "Builds absolute path" |
---|
15 | return os.path.abspath(os.path.join(*args)) |
---|
16 | |
---|
17 | def chromosome_remap(text): |
---|
18 | """ |
---|
19 | Attempts to produce the standardized chromosome from |
---|
20 | multiple possible inputs:: |
---|
21 | |
---|
22 | chr5, chr05, chrV, chrom5, chrom05, chromV -> chr5 |
---|
23 | |
---|
24 | >>> |
---|
25 | >>> map(chromosome_remap, 'chr1 chr06 chrIX chrom02 chrom12 chromV'.split()) |
---|
26 | ['chr1', 'chr6', 'chr9', 'chr2', 'chr12', 'chr5'] |
---|
27 | >>> |
---|
28 | """ |
---|
29 | |
---|
30 | if not text.startswith('chr'): |
---|
31 | return text |
---|
32 | |
---|
33 | text = text.replace('chrom','') |
---|
34 | text = text.replace('chr','') |
---|
35 | |
---|
36 | try: |
---|
37 | # cast to integer |
---|
38 | text = int(text) |
---|
39 | except ValueError, exc: |
---|
40 | try: |
---|
41 | # cast to roman numerals |
---|
42 | text = roman.fromRoman(text) |
---|
43 | except Exception, exc: |
---|
44 | pass |
---|
45 | |
---|
46 | return 'chr%s' % text |
---|
47 | |
---|
48 | class Params(object): |
---|
49 | """ |
---|
50 | >>> p = Params(a=1, b=2, c=None, d=None) |
---|
51 | >>> p.a, p.b |
---|
52 | (1, 2) |
---|
53 | >>> p.c, p.d |
---|
54 | (None, None) |
---|
55 | """ |
---|
56 | def __init__(self, **kwds): |
---|
57 | self.__dict__.update(kwds) |
---|
58 | |
---|
59 | def update(self, other): |
---|
60 | self.__dict__.update( other ) |
---|
61 | |
---|
62 | def defaults(self, other): |
---|
63 | "Sets default values for non-existing attributes" |
---|
64 | store = dict() |
---|
65 | store.update( other ) |
---|
66 | store.update( self.__dict__ ) |
---|
67 | self.__dict__.update( store ) |
---|
68 | |
---|
69 | def dict(self): |
---|
70 | return self.__dict__ |
---|
71 | |
---|
72 | def get(self, key, default=None): |
---|
73 | return self.__dict__.get(key, default) |
---|
74 | |
---|
75 | def __repr__(self): |
---|
76 | return repr(self.__dict__) |
---|
77 | |
---|
78 | def uuid(KEY_SIZE=128): |
---|
79 | "Genenerates a unique id" |
---|
80 | id = str( random.getrandbits( KEY_SIZE ) ) |
---|
81 | return hashlib.md5(id).hexdigest() |
---|
82 | |
---|
83 | def nice_bytes( value ): |
---|
84 | """ |
---|
85 | Returns a size as human readable bytes |
---|
86 | |
---|
87 | >>> nice_bytes(100), nice_bytes(10**4), nice_bytes(10**8), nice_bytes(10**10) |
---|
88 | ('100 bytes', '9 Kbytes', '95 Mbytes', '9 Gbyte') |
---|
89 | """ |
---|
90 | if value < 1024: return "%s bytes" % value |
---|
91 | elif value < 1048576: return "%s Kbytes" % int(value/1024) |
---|
92 | elif value < 1073741824: return "%s Mbytes" % int(value/1048576) |
---|
93 | else: return "%s Gbyte" % int(value/1073741824) |
---|
94 | |
---|
95 | def nice_sort( data ): |
---|
96 | """ |
---|
97 | Sort the given list data in the way that humans expect. |
---|
98 | Adapted from a posting by Ned Batchelder: http://nedbatchelder.com/blog/200712.html#e20071211T054956 |
---|
99 | |
---|
100 | >>> data = [ 'chr1', 'chr2', 'chr10', 'chr100' ] |
---|
101 | >>> data.sort() |
---|
102 | >>> data |
---|
103 | ['chr1', 'chr10', 'chr100', 'chr2'] |
---|
104 | >>> nice_sort(data) |
---|
105 | >>> data |
---|
106 | ['chr1', 'chr2', 'chr10', 'chr100'] |
---|
107 | """ |
---|
108 | def convert(text): |
---|
109 | if text.isdigit(): |
---|
110 | return int(text) |
---|
111 | else: |
---|
112 | return text |
---|
113 | |
---|
114 | split = lambda key: re.split('([0-9]+)', key) |
---|
115 | alphanum = lambda key: map(convert, split(key) ) |
---|
116 | data.sort( key=alphanum ) |
---|
117 | |
---|
118 | def commify(n): |
---|
119 | """ |
---|
120 | Formats numbers with commas |
---|
121 | |
---|
122 | >>> commify(10000) |
---|
123 | '10,000' |
---|
124 | """ |
---|
125 | n = str(n) |
---|
126 | while True: |
---|
127 | (n, count) = re.subn(r'^([-+]?\d+)(\d{3})', r'\1,\2', n) |
---|
128 | if count == 0: |
---|
129 | break |
---|
130 | return n |
---|
131 | |
---|
132 | class Timer(object): |
---|
133 | """ |
---|
134 | A timer object for display elapsed times. |
---|
135 | |
---|
136 | >>> timer = Timer() |
---|
137 | >>> timer.format(30) |
---|
138 | '30.00 seconds' |
---|
139 | >>> timer.format(320) |
---|
140 | '5.3 minutes' |
---|
141 | >>> timer.format(3200) |
---|
142 | '53.3 minutes' |
---|
143 | >>> timer.format(30500) |
---|
144 | '8.5 hours' |
---|
145 | """ |
---|
146 | def __init__(self): |
---|
147 | self.start() |
---|
148 | |
---|
149 | def start(self): |
---|
150 | self.start_time = time.time() |
---|
151 | |
---|
152 | def format(self, value): |
---|
153 | min1 = 60.0 |
---|
154 | hour = 60 * min1 |
---|
155 | if value < 60: |
---|
156 | return '%4.2f seconds' % value |
---|
157 | elif value < hour: |
---|
158 | return '%3.1f minutes' % (value/min1) |
---|
159 | else: |
---|
160 | return '%3.1f hours' % (value/hour) |
---|
161 | |
---|
162 | def report(self): |
---|
163 | elapsed = self.stop() |
---|
164 | return self.format( elapsed ) |
---|
165 | |
---|
166 | def stop(self): |
---|
167 | elapsed = time.time() - self.start_time |
---|
168 | self.start() |
---|
169 | return elapsed |
---|
170 | |
---|
171 | def gc_off( func ): |
---|
172 | """ |
---|
173 | A decorator that turns the off the garbage collector |
---|
174 | during the lifetime of the wrapped function |
---|
175 | |
---|
176 | >>> @gc_off |
---|
177 | ... def foo(): |
---|
178 | ... pass |
---|
179 | >>> foo() |
---|
180 | """ |
---|
181 | def newfunc(*args,**kargs): |
---|
182 | try: |
---|
183 | gc.disable() |
---|
184 | result = func( *args, **kargs) |
---|
185 | finally: |
---|
186 | gc.enable() |
---|
187 | return result |
---|
188 | return newfunc |
---|
189 | |
---|
190 | def make_tempfile( fname=None, dir='', prefix='temp-', suffix='.png'): |
---|
191 | """ |
---|
192 | Returns a filename and filepath to a temporary file |
---|
193 | |
---|
194 | If the {tid} parameter is not specified it will generate a random id |
---|
195 | >>> make_tempfile(fname=1, prefix='img')[0] |
---|
196 | 'img1.png' |
---|
197 | >>> len(make_tempfile()) |
---|
198 | 2 |
---|
199 | """ |
---|
200 | |
---|
201 | if fname == None: |
---|
202 | if dir: |
---|
203 | fd, fpath = tempfile.mkstemp( suffix=suffix, prefix=prefix, dir=dir, text='wb') |
---|
204 | else: |
---|
205 | fd, fpath = tempfile.mkstemp( suffix=suffix, prefix=prefix, text='wb') |
---|
206 | os.close(fd) |
---|
207 | head, fname = os.path.split( fpath ) |
---|
208 | else: |
---|
209 | fname = '%s%s%s' % (prefix, fname, suffix) |
---|
210 | fpath = os.path.join( dir, fname ) |
---|
211 | |
---|
212 | return fname, fpath |
---|
213 | |
---|
214 | def test( verbose=0 ): |
---|
215 | "Performs module level testing" |
---|
216 | import doctest |
---|
217 | doctest.testmod( verbose=verbose ) |
---|
218 | |
---|
219 | if __name__ == "__main__": |
---|
220 | test() |
---|