1 | """ |
---|
2 | Tabular datatype |
---|
3 | |
---|
4 | """ |
---|
5 | import pkg_resources |
---|
6 | pkg_resources.require( "bx-python" ) |
---|
7 | |
---|
8 | import logging |
---|
9 | import data |
---|
10 | from galaxy import util |
---|
11 | from cgi import escape |
---|
12 | from galaxy.datatypes import metadata |
---|
13 | from galaxy.datatypes.metadata import MetadataElement |
---|
14 | import galaxy_utils.sequence.vcf |
---|
15 | from sniff import * |
---|
16 | |
---|
17 | log = logging.getLogger(__name__) |
---|
18 | |
---|
19 | class Tabular( data.Text ): |
---|
20 | """Tab delimited data""" |
---|
21 | |
---|
22 | """Add metadata elements""" |
---|
23 | MetadataElement( name="comment_lines", default=0, desc="Number of comment lines", readonly=False, optional=True, no_value=0 ) |
---|
24 | MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=False, no_value=0 ) |
---|
25 | MetadataElement( name="column_types", default=[], desc="Column types", param=metadata.ColumnTypesParameter, readonly=True, visible=False, no_value=[] ) |
---|
26 | |
---|
27 | def init_meta( self, dataset, copy_from=None ): |
---|
28 | data.Text.init_meta( self, dataset, copy_from=copy_from ) |
---|
29 | def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): |
---|
30 | """ |
---|
31 | Tries to determine the number of columns as well as those columns |
---|
32 | that contain numerical values in the dataset. A skip parameter is |
---|
33 | used because various tabular data types reuse this function, and |
---|
34 | their data type classes are responsible to determine how many invalid |
---|
35 | comment lines should be skipped. Using None for skip will cause skip |
---|
36 | to be zero, but the first line will be processed as a header. A |
---|
37 | max_data_lines parameter is used because various tabular data types |
---|
38 | reuse this function, and their data type classes are responsible to |
---|
39 | determine how many data lines should be processed to ensure that the |
---|
40 | non-optional metadata parameters are properly set; if used, optional |
---|
41 | metadata parameters will be set to None, unless the entire file has |
---|
42 | already been read. Using None (default) for max_data_lines will |
---|
43 | process all data lines. |
---|
44 | |
---|
45 | Items of interest: |
---|
46 | 1. We treat 'overwrite' as always True (we always want to set tabular metadata when called). |
---|
47 | 2. If a tabular file has no data, it will have one column of type 'str'. |
---|
48 | 3. We used to check only the first 100 lines when setting metadata and this class's |
---|
49 | set_peek() method read the entire file to determine the number of lines in the file. |
---|
50 | Since metadata can now be processed on cluster nodes, we've merged the line count portion |
---|
51 | of the set_peek() processing here, and we now check the entire contents of the file. |
---|
52 | """ |
---|
53 | # Store original skip value to check with later |
---|
54 | requested_skip = skip |
---|
55 | if skip is None: |
---|
56 | skip = 0 |
---|
57 | column_type_set_order = [ 'int', 'float', 'list', 'str' ] #Order to set column types in |
---|
58 | default_column_type = column_type_set_order[-1] # Default column type is lowest in list |
---|
59 | column_type_compare_order = list( column_type_set_order ) #Order to compare column types |
---|
60 | column_type_compare_order.reverse() |
---|
61 | def type_overrules_type( column_type1, column_type2 ): |
---|
62 | if column_type1 is None or column_type1 == column_type2: |
---|
63 | return False |
---|
64 | if column_type2 is None: |
---|
65 | return True |
---|
66 | for column_type in column_type_compare_order: |
---|
67 | if column_type1 == column_type: |
---|
68 | return True |
---|
69 | if column_type2 == column_type: |
---|
70 | return False |
---|
71 | #neither column type was found in our ordered list, this cannot happen |
---|
72 | raise "Tried to compare unknown column types" |
---|
73 | def is_int( column_text ): |
---|
74 | try: |
---|
75 | int( column_text ) |
---|
76 | return True |
---|
77 | except: |
---|
78 | return False |
---|
79 | def is_float( column_text ): |
---|
80 | try: |
---|
81 | float( column_text ) |
---|
82 | return True |
---|
83 | except: |
---|
84 | if column_text.strip().lower() == 'na': |
---|
85 | return True #na is special cased to be a float |
---|
86 | return False |
---|
87 | def is_list( column_text ): |
---|
88 | return "," in column_text |
---|
89 | def is_str( column_text ): |
---|
90 | #anything, except an empty string, is True |
---|
91 | if column_text == "": |
---|
92 | return False |
---|
93 | return True |
---|
94 | is_column_type = {} #Dict to store column type string to checking function |
---|
95 | for column_type in column_type_set_order: |
---|
96 | is_column_type[column_type] = locals()[ "is_%s" % ( column_type ) ] |
---|
97 | def guess_column_type( column_text ): |
---|
98 | for column_type in column_type_set_order: |
---|
99 | if is_column_type[column_type]( column_text ): |
---|
100 | return column_type |
---|
101 | return None |
---|
102 | data_lines = 0 |
---|
103 | comment_lines = 0 |
---|
104 | column_types = [] |
---|
105 | first_line_column_types = [default_column_type] # default value is one column of type str |
---|
106 | if dataset.has_data(): |
---|
107 | #NOTE: if skip > num_check_lines, we won't detect any metadata, and will use default |
---|
108 | dataset_fh = open( dataset.file_name ) |
---|
109 | i = 0 |
---|
110 | while True: |
---|
111 | line = dataset_fh.readline() |
---|
112 | if not line: break |
---|
113 | line = line.rstrip( '\r\n' ) |
---|
114 | if i < skip or not line or line.startswith( '#' ): |
---|
115 | # We'll call blank lines comments |
---|
116 | comment_lines += 1 |
---|
117 | else: |
---|
118 | data_lines += 1 |
---|
119 | fields = line.split( '\t' ) |
---|
120 | for field_count, field in enumerate( fields ): |
---|
121 | if field_count >= len( column_types ): #found a previously unknown column, we append None |
---|
122 | column_types.append( None ) |
---|
123 | column_type = guess_column_type( field ) |
---|
124 | if type_overrules_type( column_type, column_types[field_count] ): |
---|
125 | column_types[field_count] = column_type |
---|
126 | if i == 0 and requested_skip is None: |
---|
127 | # This is our first line, people seem to like to upload files that have a header line, but do not |
---|
128 | # start with '#' (i.e. all column types would then most likely be detected as str). We will assume |
---|
129 | # that the first line is always a header (this was previous behavior - it was always skipped). When |
---|
130 | # the requested skip is None, we only use the data from the first line if we have no other data for |
---|
131 | # a column. This is far from perfect, as |
---|
132 | # 1,2,3 1.1 2.2 qwerty |
---|
133 | # 0 0 1,2,3 |
---|
134 | # will be detected as |
---|
135 | # "column_types": ["int", "int", "float", "list"] |
---|
136 | # instead of |
---|
137 | # "column_types": ["list", "float", "float", "str"] *** would seem to be the 'Truth' by manual |
---|
138 | # observation that the first line should be included as data. The old method would have detected as |
---|
139 | # "column_types": ["int", "int", "str", "list"] |
---|
140 | first_line_column_types = column_types |
---|
141 | column_types = [ None for col in first_line_column_types ] |
---|
142 | if max_data_lines is not None and data_lines >= max_data_lines: |
---|
143 | if dataset_fh.tell() != dataset.get_size(): |
---|
144 | data_lines = None #Clear optional data_lines metadata value |
---|
145 | comment_lines = None #Clear optional comment_lines metadata value; additional comment lines could appear below this point |
---|
146 | break |
---|
147 | i += 1 |
---|
148 | dataset_fh.close() |
---|
149 | |
---|
150 | #we error on the larger number of columns |
---|
151 | #first we pad our column_types by using data from first line |
---|
152 | if len( first_line_column_types ) > len( column_types ): |
---|
153 | for column_type in first_line_column_types[len( column_types ):]: |
---|
154 | column_types.append( column_type ) |
---|
155 | #Now we fill any unknown (None) column_types with data from first line |
---|
156 | for i in range( len( column_types ) ): |
---|
157 | if column_types[i] is None: |
---|
158 | if len( first_line_column_types ) <= i or first_line_column_types[i] is None: |
---|
159 | column_types[i] = default_column_type |
---|
160 | else: |
---|
161 | column_types[i] = first_line_column_types[i] |
---|
162 | # Set the discovered metadata values for the dataset |
---|
163 | dataset.metadata.data_lines = data_lines |
---|
164 | dataset.metadata.comment_lines = comment_lines |
---|
165 | dataset.metadata.column_types = column_types |
---|
166 | dataset.metadata.columns = len( column_types ) |
---|
167 | def make_html_table( self, dataset, skipchars=[] ): |
---|
168 | """Create HTML table, used for displaying peek""" |
---|
169 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
170 | try: |
---|
171 | out.append( '<tr>' ) |
---|
172 | # Generate column header |
---|
173 | for i in range( 1, dataset.metadata.columns+1 ): |
---|
174 | out.append( '<th>%s</th>' % str( i ) ) |
---|
175 | out.append( '</tr>' ) |
---|
176 | out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) |
---|
177 | out.append( '</table>' ) |
---|
178 | out = "".join( out ) |
---|
179 | except Exception, exc: |
---|
180 | out = "Can't create peek %s" % str( exc ) |
---|
181 | return out |
---|
182 | def make_html_peek_rows( self, dataset, skipchars=[] ): |
---|
183 | out = [""] |
---|
184 | comments = [] |
---|
185 | if not dataset.peek: |
---|
186 | dataset.set_peek() |
---|
187 | data = dataset.peek |
---|
188 | lines = data.splitlines() |
---|
189 | for line in lines: |
---|
190 | line = line.rstrip( '\r\n' ) |
---|
191 | if not line: |
---|
192 | continue |
---|
193 | comment = False |
---|
194 | for skipchar in skipchars: |
---|
195 | if line.startswith( skipchar ): |
---|
196 | comments.append( line ) |
---|
197 | comment = True |
---|
198 | break |
---|
199 | if comment: |
---|
200 | continue |
---|
201 | elems = line.split( '\t' ) |
---|
202 | if len( elems ) != dataset.metadata.columns: |
---|
203 | # We may have an invalid comment line or invalid data |
---|
204 | comments.append( line ) |
---|
205 | comment = True |
---|
206 | continue |
---|
207 | while len( comments ) > 0: # Keep comments |
---|
208 | try: |
---|
209 | out.append( '<tr><td colspan="100%">' ) |
---|
210 | except: |
---|
211 | out.append( '<tr><td>' ) |
---|
212 | out.append( '%s</td></tr>' % escape( comments.pop(0) ) ) |
---|
213 | out.append( '<tr>' ) |
---|
214 | for elem in elems: # valid data |
---|
215 | elem = escape( elem ) |
---|
216 | out.append( '<td>%s</td>' % elem ) |
---|
217 | out.append( '</tr>' ) |
---|
218 | # Peek may consist only of comments |
---|
219 | while len( comments ) > 0: |
---|
220 | try: |
---|
221 | out.append( '<tr><td colspan="100%">' ) |
---|
222 | except: |
---|
223 | out.append( '<tr><td>' ) |
---|
224 | out.append( '%s</td></tr>' % escape( comments.pop(0) ) ) |
---|
225 | return "".join( out ) |
---|
226 | def set_peek( self, dataset, line_count=None, is_multi_byte=False ): |
---|
227 | data.Text.set_peek( self, dataset, line_count=line_count, is_multi_byte=is_multi_byte ) |
---|
228 | if dataset.metadata.comment_lines: |
---|
229 | dataset.blurb = "%s, %s comments" % ( dataset.blurb, util.commaify( str( dataset.metadata.comment_lines ) ) ) |
---|
230 | def display_peek( self, dataset ): |
---|
231 | """Returns formatted html of peek""" |
---|
232 | return self.make_html_table( dataset ) |
---|
233 | def displayable( self, dataset ): |
---|
234 | try: |
---|
235 | return dataset.has_data() \ |
---|
236 | and dataset.state == dataset.states.OK \ |
---|
237 | and dataset.metadata.columns > 0 \ |
---|
238 | and dataset.metadata.data_lines > 0 |
---|
239 | except: |
---|
240 | return False |
---|
241 | def as_gbrowse_display_file( self, dataset, **kwd ): |
---|
242 | return open( dataset.file_name ) |
---|
243 | def as_ucsc_display_file( self, dataset, **kwd ): |
---|
244 | return open( dataset.file_name ) |
---|
245 | |
---|
246 | class Taxonomy( Tabular ): |
---|
247 | def __init__(self, **kwd): |
---|
248 | """Initialize taxonomy datatype""" |
---|
249 | Tabular.__init__( self, **kwd ) |
---|
250 | self.column_names = ['Name', 'TaxId', 'Root', 'Superkingdom', 'Kingdom', 'Subkingdom', |
---|
251 | 'Superphylum', 'Phylum', 'Subphylum', 'Superclass', 'Class', 'Subclass', |
---|
252 | 'Superorder', 'Order', 'Suborder', 'Superfamily', 'Family', 'Subfamily', |
---|
253 | 'Tribe', 'Subtribe', 'Genus', 'Subgenus', 'Species', 'Subspecies' |
---|
254 | ] |
---|
255 | def make_html_table( self, dataset, skipchars=[] ): |
---|
256 | """Create HTML table, used for displaying peek""" |
---|
257 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
258 | comments = [] |
---|
259 | try: |
---|
260 | # Generate column header |
---|
261 | out.append( '<tr>' ) |
---|
262 | for i, name in enumerate( self.column_names ): |
---|
263 | out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) |
---|
264 | # This data type requires at least 24 columns in the data |
---|
265 | if dataset.metadata.columns - len( self.column_names ) > 0: |
---|
266 | for i in range( len( self.column_names ), dataset.metadata.columns ): |
---|
267 | out.append( '<th>%s</th>' % str( i+1 ) ) |
---|
268 | out.append( '</tr>' ) |
---|
269 | out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) |
---|
270 | out.append( '</table>' ) |
---|
271 | out = "".join( out ) |
---|
272 | except Exception, exc: |
---|
273 | out = "Can't create peek %s" % exc |
---|
274 | return out |
---|
275 | |
---|
276 | class Sam( Tabular ): |
---|
277 | file_ext = 'sam' |
---|
278 | def __init__(self, **kwd): |
---|
279 | """Initialize taxonomy datatype""" |
---|
280 | Tabular.__init__( self, **kwd ) |
---|
281 | self.column_names = ['QNAME', 'FLAG', 'RNAME', 'POS', 'MAPQ', 'CIGAR', |
---|
282 | 'MRNM', 'MPOS', 'ISIZE', 'SEQ', 'QUAL', 'OPT' |
---|
283 | ] |
---|
284 | def make_html_table( self, dataset, skipchars=[] ): |
---|
285 | """Create HTML table, used for displaying peek""" |
---|
286 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
287 | try: |
---|
288 | # Generate column header |
---|
289 | out.append( '<tr>' ) |
---|
290 | for i, name in enumerate( self.column_names ): |
---|
291 | out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) |
---|
292 | # This data type requires at least 11 columns in the data |
---|
293 | if dataset.metadata.columns - len( self.column_names ) > 0: |
---|
294 | for i in range( len( self.column_names ), dataset.metadata.columns ): |
---|
295 | out.append( '<th>%s</th>' % str( i+1 ) ) |
---|
296 | out.append( '</tr>' ) |
---|
297 | out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) |
---|
298 | out.append( '</table>' ) |
---|
299 | out = "".join( out ) |
---|
300 | except Exception, exc: |
---|
301 | out = "Can't create peek %s" % exc |
---|
302 | return out |
---|
303 | def sniff( self, filename ): |
---|
304 | """ |
---|
305 | Determines whether the file is in SAM format |
---|
306 | |
---|
307 | A file in SAM format consists of lines of tab-separated data. |
---|
308 | The following header line may be the first line: |
---|
309 | @QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL |
---|
310 | or |
---|
311 | @QNAME FLAG RNAME POS MAPQ CIGAR MRNM MPOS ISIZE SEQ QUAL OPT |
---|
312 | Data in the OPT column is optional and can consist of tab-separated data |
---|
313 | |
---|
314 | For complete details see http://samtools.sourceforge.net/SAM1.pdf |
---|
315 | |
---|
316 | Rules for sniffing as True: |
---|
317 | There must be 11 or more columns of data on each line |
---|
318 | Columns 2 (FLAG), 4(POS), 5 (MAPQ), 8 (MPOS), and 9 (ISIZE) must be numbers (9 can be negative) |
---|
319 | We will only check that up to the first 5 alignments are correctly formatted. |
---|
320 | |
---|
321 | >>> fname = get_test_fname( 'sequence.maf' ) |
---|
322 | >>> Sam().sniff( fname ) |
---|
323 | False |
---|
324 | >>> fname = get_test_fname( '1.sam' ) |
---|
325 | >>> Sam().sniff( fname ) |
---|
326 | True |
---|
327 | """ |
---|
328 | try: |
---|
329 | fh = open( filename ) |
---|
330 | count = 0 |
---|
331 | while True: |
---|
332 | line = fh.readline() |
---|
333 | line = line.strip() |
---|
334 | if not line: |
---|
335 | break #EOF |
---|
336 | if line: |
---|
337 | if line[0] != '@': |
---|
338 | linePieces = line.split('\t') |
---|
339 | if len(linePieces) < 11: |
---|
340 | return False |
---|
341 | try: |
---|
342 | check = int(linePieces[1]) |
---|
343 | check = int(linePieces[3]) |
---|
344 | check = int(linePieces[4]) |
---|
345 | check = int(linePieces[7]) |
---|
346 | check = int(linePieces[8]) |
---|
347 | except ValueError: |
---|
348 | return False |
---|
349 | count += 1 |
---|
350 | if count == 5: |
---|
351 | return True |
---|
352 | fh.close() |
---|
353 | if count < 5 and count > 0: |
---|
354 | return True |
---|
355 | except: |
---|
356 | pass |
---|
357 | return False |
---|
358 | |
---|
359 | class Pileup( Tabular ): |
---|
360 | """Tab delimited data in pileup (6- or 10-column) format""" |
---|
361 | file_ext = "pileup" |
---|
362 | |
---|
363 | """Add metadata elements""" |
---|
364 | MetadataElement( name="chromCol", default=1, desc="Chrom column", param=metadata.ColumnParameter ) |
---|
365 | MetadataElement( name="startCol", default=2, desc="Start column", param=metadata.ColumnParameter ) |
---|
366 | MetadataElement( name="baseCol", default=3, desc="Reference base column", param=metadata.ColumnParameter ) |
---|
367 | |
---|
368 | def init_meta( self, dataset, copy_from=None ): |
---|
369 | Tabular.init_meta( self, dataset, copy_from=copy_from ) |
---|
370 | |
---|
371 | def set_peek( self, dataset, line_count=None, is_multi_byte=False ): |
---|
372 | """Set the peek and blurb text""" |
---|
373 | if not dataset.dataset.purged: |
---|
374 | dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
---|
375 | if line_count is None: |
---|
376 | # See if line_count is stored in the metadata |
---|
377 | if dataset.metadata.data_lines: |
---|
378 | dataset.blurb = "%s genomic coordinates" % util.commaify( str( dataset.metadata.data_lines ) ) |
---|
379 | else: |
---|
380 | # Number of lines is not known ( this should not happen ), and auto-detect is |
---|
381 | # needed to set metadata |
---|
382 | dataset.blurb = "? genomic coordinates" |
---|
383 | else: |
---|
384 | dataset.blurb = "%s genomic coordinates" % util.commaify( str( line_count ) ) |
---|
385 | else: |
---|
386 | dataset.peek = 'file does not exist' |
---|
387 | dataset.blurb = 'file purged from disk' |
---|
388 | |
---|
389 | def make_html_table( self, dataset, skipchars=[] ): |
---|
390 | """Create HTML table, used for displaying peek""" |
---|
391 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
392 | comments = [] |
---|
393 | try: |
---|
394 | # Generate column header |
---|
395 | out.append('<tr>') |
---|
396 | for i in range( 1, dataset.metadata.columns+1 ): |
---|
397 | if i == dataset.metadata.chromCol: |
---|
398 | out.append( '<th>%s.Chrom</th>' % i ) |
---|
399 | elif i == dataset.metadata.startCol: |
---|
400 | out.append( '<th>%s.Start</th>' % i ) |
---|
401 | elif i == dataset.metadata.baseCol: |
---|
402 | out.append( '<th>%s.Base</th>' % i ) |
---|
403 | else: |
---|
404 | out.append( '<th>%s</th>' % i ) |
---|
405 | out.append('</tr>') |
---|
406 | out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) |
---|
407 | out.append( '</table>' ) |
---|
408 | out = "".join( out ) |
---|
409 | except Exception, exc: |
---|
410 | out = "Can't create peek %s" % str( exc ) |
---|
411 | return out |
---|
412 | |
---|
413 | def repair_methods( self, dataset ): |
---|
414 | """Return options for removing errors along with a description""" |
---|
415 | return [ ("lines", "Remove erroneous lines") ] |
---|
416 | |
---|
417 | def sniff( self, filename ): |
---|
418 | """ |
---|
419 | Checks for 'pileup-ness' |
---|
420 | |
---|
421 | There are two main types of pileup: 6-column and 10-column. For both, |
---|
422 | the first three and last two columns are the same. We only check the |
---|
423 | first three to allow for some personalization of the format. |
---|
424 | |
---|
425 | >>> fname = get_test_fname( 'interval.interval' ) |
---|
426 | >>> Pileup().sniff( fname ) |
---|
427 | False |
---|
428 | >>> fname = get_test_fname( '6col.pileup' ) |
---|
429 | >>> Pileup().sniff( fname ) |
---|
430 | True |
---|
431 | >>> fname = get_test_fname( '10col.pileup' ) |
---|
432 | >>> Pileup().sniff( fname ) |
---|
433 | True |
---|
434 | """ |
---|
435 | headers = get_headers( filename, '\t' ) |
---|
436 | try: |
---|
437 | for hdr in headers: |
---|
438 | if hdr and not hdr[0].startswith( '#' ): |
---|
439 | if len( hdr ) < 3: |
---|
440 | return False |
---|
441 | try: |
---|
442 | # chrom start in column 1 (with 0-based columns) |
---|
443 | # and reference base is in column 2 |
---|
444 | check = int( hdr[1] ) |
---|
445 | assert hdr[2] in [ 'A', 'C', 'G', 'T', 'N', 'a', 'c', 'g', 't', 'n' ] |
---|
446 | except: |
---|
447 | return False |
---|
448 | return True |
---|
449 | except: |
---|
450 | return False |
---|
451 | |
---|
452 | class Eland( Tabular ): |
---|
453 | file_ext = 'eland' |
---|
454 | |
---|
455 | def sniff( self, filename ): |
---|
456 | return False |
---|
457 | |
---|
458 | class ElandMulti( Tabular ): |
---|
459 | file_ext = 'elandmulti' |
---|
460 | |
---|
461 | def sniff( self, filename ): |
---|
462 | return False |
---|
463 | |
---|
464 | class Vcf( Tabular ): |
---|
465 | """ Variant Call Format for describing SNPs and other simple genome variations. """ |
---|
466 | |
---|
467 | file_ext = 'vcf' |
---|
468 | column_names = [ 'Chrom', 'Pos', 'ID', 'Ref', 'Alt', 'Qual', 'Filter', 'Info', 'Format', 'data' ] |
---|
469 | |
---|
470 | MetadataElement( name="columns", default=10, desc="Number of columns", readonly=True, visible=False ) |
---|
471 | MetadataElement( name="column_types", default=['str','int','str','str','str','int','str','list','str','str'], param=metadata.ColumnTypesParameter, desc="Column types", readonly=True, visible=False ) |
---|
472 | MetadataElement( name="viz_filter_cols", default=[5], param=metadata.ColumnParameter, multiple=True ) |
---|
473 | |
---|
474 | def sniff( self, filename ): |
---|
475 | try: |
---|
476 | # If reader can read and parse file, it's VCF. |
---|
477 | for line in list( galaxy_utils.sequence.vcf.Reader( open( filename ) ) ): |
---|
478 | pass |
---|
479 | return True |
---|
480 | except: |
---|
481 | return False |
---|
482 | |
---|
483 | def make_html_table( self, dataset, skipchars=[] ): |
---|
484 | """Create HTML table, used for displaying peek""" |
---|
485 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
486 | try: |
---|
487 | # Generate column header |
---|
488 | out.append( '<tr>' ) |
---|
489 | for i, name in enumerate( self.column_names ): |
---|
490 | out.append( '<th>%s.%s</th>' % ( str( i+1 ), name ) ) |
---|
491 | out.append( self.make_html_peek_rows( dataset, skipchars=skipchars ) ) |
---|
492 | out.append( '</table>' ) |
---|
493 | out = "".join( out ) |
---|
494 | except Exception, exc: |
---|
495 | out = "Can't create peek %s" % exc |
---|
496 | return out |
---|
497 | |
---|
498 | def get_track_type( self ): |
---|
499 | return "FeatureTrack", {"data": "interval_index", "index": "summary_tree"} |
---|