[2] | 1 | import logging, os, sys, time, tempfile |
---|
| 2 | from galaxy import util |
---|
| 3 | from galaxy.util.odict import odict |
---|
| 4 | from galaxy.util.bunch import Bunch |
---|
| 5 | from cgi import escape |
---|
| 6 | import metadata |
---|
| 7 | import zipfile |
---|
| 8 | from metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions |
---|
| 9 | |
---|
| 10 | log = logging.getLogger(__name__) |
---|
| 11 | |
---|
| 12 | # Valid first column and strand column values vor bed, other formats |
---|
| 13 | col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho'] |
---|
| 14 | valid_strand = ['+', '-', '.'] |
---|
| 15 | |
---|
| 16 | class DataMeta( type ): |
---|
| 17 | """ |
---|
| 18 | Metaclass for Data class. Sets up metadata spec. |
---|
| 19 | """ |
---|
| 20 | def __init__( cls, name, bases, dict_ ): |
---|
| 21 | cls.metadata_spec = metadata.MetadataSpecCollection() |
---|
| 22 | for base in bases: #loop through bases (class/types) of cls |
---|
| 23 | if hasattr( base, "metadata_spec" ): #base of class Data (object) has no metadata |
---|
| 24 | cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls |
---|
| 25 | metadata.Statement.process( cls ) |
---|
| 26 | |
---|
| 27 | class Data( object ): |
---|
| 28 | """ |
---|
| 29 | Base class for all datatypes. Implements basic interfaces as well |
---|
| 30 | as class methods for metadata. |
---|
| 31 | |
---|
| 32 | >>> class DataTest( Data ): |
---|
| 33 | ... MetadataElement( name="test" ) |
---|
| 34 | ... |
---|
| 35 | >>> DataTest.metadata_spec.test.name |
---|
| 36 | 'test' |
---|
| 37 | >>> DataTest.metadata_spec.test.desc |
---|
| 38 | 'test' |
---|
| 39 | >>> type( DataTest.metadata_spec.test.param ) |
---|
| 40 | <class 'galaxy.datatypes.metadata.MetadataParameter'> |
---|
| 41 | |
---|
| 42 | """ |
---|
| 43 | __metaclass__ = DataMeta |
---|
| 44 | # Add metadata elements |
---|
| 45 | MetadataElement( name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?" ) |
---|
| 46 | # Stores the set of display applications, and viewing methods, supported by this datatype |
---|
| 47 | supported_display_apps = {} |
---|
| 48 | # If False, the peek is regenerated whenever a dataset of this type is copied |
---|
| 49 | copy_safe_peek = True |
---|
| 50 | # The dataset contains binary data --> do not space_to_tab or convert newlines, etc. |
---|
| 51 | # Allow binary file uploads of this type when True. |
---|
| 52 | is_binary = True |
---|
| 53 | # Allow user to change between this datatype and others. If False, this datatype |
---|
| 54 | # cannot be changed from or into. |
---|
| 55 | allow_datatype_change = True |
---|
| 56 | #Composite datatypes |
---|
| 57 | composite_type = None |
---|
| 58 | composite_files = odict() |
---|
| 59 | primary_file_name = 'index' |
---|
| 60 | #A per datatype setting (inherited): max file size (in bytes) for setting optional metadata |
---|
| 61 | _max_optional_metadata_filesize = None |
---|
| 62 | |
---|
| 63 | def __init__(self, **kwd): |
---|
| 64 | """Initialize the datatype""" |
---|
| 65 | object.__init__(self, **kwd) |
---|
| 66 | self.supported_display_apps = self.supported_display_apps.copy() |
---|
| 67 | self.composite_files = self.composite_files.copy() |
---|
| 68 | self.display_applications = odict() |
---|
| 69 | def write_from_stream(self, dataset, stream): |
---|
| 70 | """Writes data from a stream""" |
---|
| 71 | fd = open(dataset.file_name, 'wb') |
---|
| 72 | while 1: |
---|
| 73 | chunk = stream.read(1048576) |
---|
| 74 | if not chunk: |
---|
| 75 | break |
---|
| 76 | os.write(fd, chunk) |
---|
| 77 | os.close(fd) |
---|
| 78 | def set_raw_data(self, dataset, data): |
---|
| 79 | """Saves the data on the disc""" |
---|
| 80 | fd = open(dataset.file_name, 'wb') |
---|
| 81 | os.write(fd, data) |
---|
| 82 | os.close(fd) |
---|
| 83 | def get_raw_data( self, dataset ): |
---|
| 84 | """Returns the full data. To stream it open the file_name and read/write as needed""" |
---|
| 85 | try: |
---|
| 86 | return file(datset.file_name, 'rb').read(-1) |
---|
| 87 | except OSError, e: |
---|
| 88 | log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name)) |
---|
| 89 | return '' |
---|
| 90 | def groom_dataset_content( self, file_name ): |
---|
| 91 | """This function is called on an output dataset file after the content is initially generated.""" |
---|
| 92 | pass |
---|
| 93 | def init_meta( self, dataset, copy_from=None ): |
---|
| 94 | # Metadata should be left mostly uninitialized. Dataset will |
---|
| 95 | # handle returning default values when metadata is not set. |
---|
| 96 | # copy_from allows metadata to be passed in that will be |
---|
| 97 | # copied. (although this seems ambiguous, see |
---|
| 98 | # Dataset.set_metadata. It always copies the rhs in order to |
---|
| 99 | # flag the object as modified for SQLAlchemy. |
---|
| 100 | if copy_from: |
---|
| 101 | dataset.metadata = copy_from.metadata |
---|
| 102 | def set_meta( self, dataset, overwrite = True, **kwd ): |
---|
| 103 | """Unimplemented method, allows guessing of metadata from contents of file""" |
---|
| 104 | return True |
---|
| 105 | def missing_meta( self, dataset, check = [], skip = [] ): |
---|
| 106 | """ |
---|
| 107 | Checks for empty metadata values, Returns True if non-optional metadata is missing |
---|
| 108 | Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored |
---|
| 109 | Specifying a list of 'skip' items will return True even when a named metadata value is missing |
---|
| 110 | """ |
---|
| 111 | if check: |
---|
| 112 | to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ] |
---|
| 113 | else: |
---|
| 114 | to_check = dataset.metadata.items() |
---|
| 115 | for key, value in to_check: |
---|
| 116 | if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ): |
---|
| 117 | continue #we skip check for optional and nonrequested values here |
---|
| 118 | if not value: |
---|
| 119 | return True |
---|
| 120 | return False |
---|
| 121 | def set_max_optional_metadata_filesize( self, max_value ): |
---|
| 122 | try: |
---|
| 123 | max_value = int( max_value ) |
---|
| 124 | except: |
---|
| 125 | return |
---|
| 126 | self.__class__._max_optional_metadata_filesize = max_value |
---|
| 127 | def get_max_optional_metadata_filesize( self ): |
---|
| 128 | rval = self.__class__._max_optional_metadata_filesize |
---|
| 129 | if rval is None: |
---|
| 130 | return -1 |
---|
| 131 | return rval |
---|
| 132 | max_optional_metadata_filesize = property( get_max_optional_metadata_filesize, set_max_optional_metadata_filesize ) |
---|
| 133 | def set_peek( self, dataset, is_multi_byte=False ): |
---|
| 134 | """Set the peek and blurb text""" |
---|
| 135 | if not dataset.dataset.purged: |
---|
| 136 | dataset.peek = '' |
---|
| 137 | dataset.blurb = 'data' |
---|
| 138 | else: |
---|
| 139 | dataset.peek = 'file does not exist' |
---|
| 140 | dataset.blurb = 'file purged from disk' |
---|
| 141 | def display_peek(self, dataset ): |
---|
| 142 | """Create HTML table, used for displaying peek""" |
---|
| 143 | out = ['<table cellspacing="0" cellpadding="3">'] |
---|
| 144 | try: |
---|
| 145 | if not dataset.peek: |
---|
| 146 | dataset.set_peek() |
---|
| 147 | data = dataset.peek |
---|
| 148 | lines = data.splitlines() |
---|
| 149 | for line in lines: |
---|
| 150 | line = line.strip() |
---|
| 151 | if not line: |
---|
| 152 | continue |
---|
| 153 | if type( line ) is unicode: |
---|
| 154 | out.append( '<tr><td>%s</td></tr>' % escape( line ) ) |
---|
| 155 | else: |
---|
| 156 | out.append( '<tr><td>%s</td></tr>' % escape( unicode( line, 'utf-8' ) ) ) |
---|
| 157 | out.append( '</table>' ) |
---|
| 158 | out = "".join( out ) |
---|
| 159 | except Exception, exc: |
---|
| 160 | out = "Can't create peek %s" % str( exc ) |
---|
| 161 | return out |
---|
| 162 | def display_name(self, dataset): |
---|
| 163 | """Returns formatted html of dataset name""" |
---|
| 164 | try: |
---|
| 165 | if type ( dataset.name ) is unicode: |
---|
| 166 | return escape( dataset.name ) |
---|
| 167 | else: |
---|
| 168 | return escape( unicode( dataset.name, 'utf-8 ') ) |
---|
| 169 | except: |
---|
| 170 | return "name unavailable" |
---|
| 171 | def display_info(self, dataset): |
---|
| 172 | """Returns formatted html of dataset info""" |
---|
| 173 | try: |
---|
| 174 | # Change new line chars to html |
---|
| 175 | info = escape( dataset.info ) |
---|
| 176 | if info.find( '\r\n' ) >= 0: |
---|
| 177 | info = info.replace( '\r\n', '<br/>' ) |
---|
| 178 | if info.find( '\r' ) >= 0: |
---|
| 179 | info = info.replace( '\r', '<br/>' ) |
---|
| 180 | if info.find( '\n' ) >= 0: |
---|
| 181 | info = info.replace( '\n', '<br/>' ) |
---|
| 182 | |
---|
| 183 | # Convert to unicode to display non-ascii characters. |
---|
| 184 | if type( info ) is not unicode: |
---|
| 185 | info = unicode( info, 'utf-8') |
---|
| 186 | |
---|
| 187 | return info |
---|
| 188 | except: |
---|
| 189 | return "info unavailable" |
---|
| 190 | def validate(self, dataset): |
---|
| 191 | """Unimplemented validate, return no exceptions""" |
---|
| 192 | return list() |
---|
| 193 | def repair_methods(self, dataset): |
---|
| 194 | """Unimplemented method, returns dict with method/option for repairing errors""" |
---|
| 195 | return None |
---|
| 196 | def get_mime(self): |
---|
| 197 | """Returns the mime type of the datatype""" |
---|
| 198 | return 'application/octet-stream' |
---|
| 199 | def add_display_app ( self, app_id, label, file_function, links_function ): |
---|
| 200 | """ |
---|
| 201 | Adds a display app to the datatype. |
---|
| 202 | app_id is a unique id |
---|
| 203 | label is the primary display label, e.g., display at 'UCSC' |
---|
| 204 | file_function is a string containing the name of the function that returns a properly formatted display |
---|
| 205 | links_function is a string containing the name of the function that returns a list of (link_name,link) |
---|
| 206 | """ |
---|
| 207 | self.supported_display_apps = self.supported_display_apps.copy() |
---|
| 208 | self.supported_display_apps[app_id] = {'label':label,'file_function':file_function,'links_function':links_function} |
---|
| 209 | def remove_display_app (self, app_id): |
---|
| 210 | """Removes a display app from the datatype""" |
---|
| 211 | self.supported_display_apps = self.supported_display_apps.copy() |
---|
| 212 | try: |
---|
| 213 | del self.supported_display_apps[app_id] |
---|
| 214 | except: |
---|
| 215 | log.exception('Tried to remove display app %s from datatype %s, but this display app is not declared.' % ( type, self.__class__.__name__ ) ) |
---|
| 216 | def clear_display_apps( self ): |
---|
| 217 | self.supported_display_apps = {} |
---|
| 218 | def add_display_application( self, display_application ): |
---|
| 219 | """New style display applications""" |
---|
| 220 | assert display_application.id not in self.display_applications, 'Attempted to add a display application twice' |
---|
| 221 | self.display_applications[ display_application.id ] = display_application |
---|
| 222 | def get_display_application( self, key, default = None ): |
---|
| 223 | return self.display_applications.get( key, default ) |
---|
| 224 | def get_display_applications_by_dataset( self, dataset, trans ): |
---|
| 225 | rval = odict() |
---|
| 226 | for key, value in self.display_applications.iteritems(): |
---|
| 227 | value = value.filter_by_dataset( dataset, trans ) |
---|
| 228 | if value.links: |
---|
| 229 | rval[key] = value |
---|
| 230 | return rval |
---|
| 231 | def get_display_types(self): |
---|
| 232 | """Returns display types available""" |
---|
| 233 | return self.supported_display_apps.keys() |
---|
| 234 | def get_display_label(self, type): |
---|
| 235 | """Returns primary label for display app""" |
---|
| 236 | try: |
---|
| 237 | return self.supported_display_apps[type]['label'] |
---|
| 238 | except: |
---|
| 239 | return 'unknown' |
---|
| 240 | def as_display_type(self, dataset, type, **kwd): |
---|
| 241 | """Returns modified file contents for a particular display type """ |
---|
| 242 | try: |
---|
| 243 | if type in self.get_display_types(): |
---|
| 244 | return getattr (self, self.supported_display_apps[type]['file_function']) (dataset, **kwd) |
---|
| 245 | except: |
---|
| 246 | log.exception('Function %s is referred to in datatype %s for displaying as type %s, but is not accessible' % (self.supported_display_apps[type]['file_function'], self.__class__.__name__, type) ) |
---|
| 247 | return "This display type (%s) is not implemented for this datatype (%s)." % ( type, dataset.ext) |
---|
| 248 | def get_display_links( self, dataset, type, app, base_url, target_frame='_blank', **kwd ): |
---|
| 249 | """ |
---|
| 250 | Returns a list of tuples of (name, link) for a particular display type. No check on |
---|
| 251 | 'access' permissions is done here - if you can view the dataset, you can also save it |
---|
| 252 | or send it to a destination outside of Galaxy, so Galaxy security restrictions do not |
---|
| 253 | apply anyway. |
---|
| 254 | """ |
---|
| 255 | try: |
---|
| 256 | if type in self.get_display_types(): |
---|
| 257 | return target_frame, getattr ( self, self.supported_display_apps[type]['links_function'] ) ( dataset, type, app, base_url, **kwd ) |
---|
| 258 | except: |
---|
| 259 | log.exception( 'Function %s is referred to in datatype %s for generating links for type %s, but is not accessible' \ |
---|
| 260 | % ( self.supported_display_apps[type]['links_function'], self.__class__.__name__, type ) ) |
---|
| 261 | return [] |
---|
| 262 | def get_converter_types(self, original_dataset, datatypes_registry): |
---|
| 263 | """Returns available converters by type for this dataset""" |
---|
| 264 | return datatypes_registry.get_converters_by_datatype(original_dataset.ext) |
---|
| 265 | def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ): |
---|
| 266 | """Returns ( target_ext, existing converted dataset )""" |
---|
| 267 | return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd ) |
---|
| 268 | def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True, deps=None): |
---|
| 269 | """This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure.""" |
---|
| 270 | converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type ) |
---|
| 271 | |
---|
| 272 | if converter is None: |
---|
| 273 | raise Exception( "A converter does not exist for %s to %s." % ( original_dataset.ext, target_type ) ) |
---|
| 274 | #Generate parameter dictionary |
---|
| 275 | params = {} |
---|
| 276 | #determine input parameter name and add to params |
---|
| 277 | input_name = 'input1' |
---|
| 278 | for key, value in converter.inputs.items(): |
---|
| 279 | if (deps) and (value.name in deps): |
---|
| 280 | params[value.name] = deps[value.name] |
---|
| 281 | elif value.type == 'data': |
---|
| 282 | input_name = key |
---|
| 283 | |
---|
| 284 | params[input_name] = original_dataset |
---|
| 285 | #Run converter, job is dispatched through Queue |
---|
| 286 | converted_dataset = converter.execute( trans, incoming = params, set_output_hid = visible )[1] |
---|
| 287 | if len(params) > 0: |
---|
| 288 | trans.log_event( "Converter params: %s" % (str(params)), tool_id=converter.id ) |
---|
| 289 | if not visible: |
---|
| 290 | for name, value in converted_dataset.iteritems(): |
---|
| 291 | value.visible = False |
---|
| 292 | if return_output: |
---|
| 293 | return converted_dataset |
---|
| 294 | return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid) |
---|
| 295 | #We need to clear associated files before we set metadata |
---|
| 296 | #so that as soon as metadata starts to be set, e.g. implicitly converted datasets are deleted and no longer available 'while' metadata is being set, not just after |
---|
| 297 | #We'll also clear after setting metadata, for backwards compatibility |
---|
| 298 | def after_setting_metadata( self, dataset ): |
---|
| 299 | """This function is called on the dataset after metadata is set.""" |
---|
| 300 | dataset.clear_associated_files( metadata_safe = True ) |
---|
| 301 | def before_setting_metadata( self, dataset ): |
---|
| 302 | """This function is called on the dataset before metadata is set.""" |
---|
| 303 | dataset.clear_associated_files( metadata_safe = True ) |
---|
| 304 | def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, is_binary = False, space_to_tab = False, **kwds ): |
---|
| 305 | kwds[ 'name' ] = name |
---|
| 306 | kwds[ 'optional' ] = optional |
---|
| 307 | kwds[ 'mimetype' ] = mimetype |
---|
| 308 | kwds[ 'description' ] = description |
---|
| 309 | kwds[ 'substitute_name_with_metadata' ] = substitute_name_with_metadata |
---|
| 310 | kwds[ 'is_binary' ] = is_binary |
---|
| 311 | kwds[ 'space_to_tab' ] = space_to_tab |
---|
| 312 | return Bunch( **kwds ) |
---|
| 313 | def add_composite_file( self, name, **kwds ): |
---|
| 314 | #self.composite_files = self.composite_files.copy() |
---|
| 315 | self.composite_files[ name ] = self.__new_composite_file( name, **kwds ) |
---|
| 316 | def __substitute_composite_key( self, key, composite_file, dataset = None ): |
---|
| 317 | if composite_file.substitute_name_with_metadata: |
---|
| 318 | if dataset: |
---|
| 319 | meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) ) |
---|
| 320 | else: |
---|
| 321 | meta_value = self.spec[composite_file.substitute_name_with_metadata].default |
---|
| 322 | return key % meta_value |
---|
| 323 | return key |
---|
| 324 | @property |
---|
| 325 | def writable_files( self, dataset = None ): |
---|
| 326 | files = odict() |
---|
| 327 | if self.composite_type != 'auto_primary_file': |
---|
| 328 | files[ self.primary_file_name ] = self.__new_composite_file( self.primary_file_name ) |
---|
| 329 | for key, value in self.get_composite_files( dataset = dataset ).iteritems(): |
---|
| 330 | files[ key ] = value |
---|
| 331 | return files |
---|
| 332 | def get_composite_files( self, dataset = None ): |
---|
| 333 | def substitute_composite_key( key, composite_file ): |
---|
| 334 | if composite_file.substitute_name_with_metadata: |
---|
| 335 | if dataset: |
---|
| 336 | meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) ) |
---|
| 337 | else: |
---|
| 338 | meta_value = self.metadata_spec[ composite_file.substitute_name_with_metadata ].default |
---|
| 339 | return key % meta_value |
---|
| 340 | return key |
---|
| 341 | files = odict() |
---|
| 342 | for key, value in self.composite_files.iteritems(): |
---|
| 343 | files[ substitute_composite_key( key, value ) ] = value |
---|
| 344 | return files |
---|
| 345 | def generate_auto_primary_file( self, dataset = None ): |
---|
| 346 | raise Exception( "generate_auto_primary_file is not implemented for this datatype." ) |
---|
| 347 | @property |
---|
| 348 | def has_resolution(self): |
---|
| 349 | return False |
---|
| 350 | |
---|
| 351 | class Text( Data ): |
---|
| 352 | file_ext = 'txt' |
---|
| 353 | |
---|
| 354 | """Add metadata elements""" |
---|
| 355 | MetadataElement( name="data_lines", default=0, desc="Number of data lines", readonly=True, optional=True, visible=False, no_value=0 ) |
---|
| 356 | |
---|
| 357 | def write_from_stream(self, dataset, stream): |
---|
| 358 | """Writes data from a stream""" |
---|
| 359 | # write it twice for now |
---|
| 360 | fd, temp_name = tempfile.mkstemp() |
---|
| 361 | while 1: |
---|
| 362 | chunk = stream.read(1048576) |
---|
| 363 | if not chunk: |
---|
| 364 | break |
---|
| 365 | os.write(fd, chunk) |
---|
| 366 | os.close(fd) |
---|
| 367 | # rewrite the file with unix newlines |
---|
| 368 | fp = open(dataset.file_name, 'wt') |
---|
| 369 | for line in file(temp_name, "U"): |
---|
| 370 | line = line.strip() + '\n' |
---|
| 371 | fp.write(line) |
---|
| 372 | fp.close() |
---|
| 373 | def set_raw_data(self, dataset, data): |
---|
| 374 | """Saves the data on the disc""" |
---|
| 375 | fd, temp_name = tempfile.mkstemp() |
---|
| 376 | os.write(fd, data) |
---|
| 377 | os.close(fd) |
---|
| 378 | # rewrite the file with unix newlines |
---|
| 379 | fp = open(dataset.file_name, 'wt') |
---|
| 380 | for line in file(temp_name, "U"): |
---|
| 381 | line = line.strip() + '\n' |
---|
| 382 | fp.write(line) |
---|
| 383 | fp.close() |
---|
| 384 | os.remove( temp_name ) |
---|
| 385 | def get_mime(self): |
---|
| 386 | """Returns the mime type of the datatype""" |
---|
| 387 | return 'text/plain' |
---|
| 388 | def set_meta( self, dataset, **kwd ): |
---|
| 389 | """ |
---|
| 390 | Set the number of lines of data in dataset, |
---|
| 391 | skipping all blank lines and comments. |
---|
| 392 | """ |
---|
| 393 | data_lines = 0 |
---|
| 394 | for line in file( dataset.file_name ): |
---|
| 395 | line = line.strip() |
---|
| 396 | if line and not line.startswith( '#' ): |
---|
| 397 | data_lines += 1 |
---|
| 398 | dataset.metadata.data_lines = data_lines |
---|
| 399 | def set_peek( self, dataset, line_count=None, is_multi_byte=False ): |
---|
| 400 | if not dataset.dataset.purged: |
---|
| 401 | # The file must exist on disk for the get_file_peek() method |
---|
| 402 | dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
---|
| 403 | if line_count is None: |
---|
| 404 | # See if line_count is stored in the metadata |
---|
| 405 | if dataset.metadata.data_lines: |
---|
| 406 | dataset.blurb = "%s lines" % util.commaify( str( dataset.metadata.data_lines ) ) |
---|
| 407 | else: |
---|
| 408 | # Number of lines is not known ( this should not happen ), and auto-detect is |
---|
| 409 | # needed to set metadata |
---|
| 410 | dataset.blurb = "? lines" |
---|
| 411 | else: |
---|
| 412 | dataset.blurb = "%s lines" % util.commaify( str( line_count ) ) |
---|
| 413 | else: |
---|
| 414 | dataset.peek = 'file does not exist' |
---|
| 415 | dataset.blurb = 'file purged from disk' |
---|
| 416 | |
---|
| 417 | class Newick( Text ): |
---|
| 418 | pass |
---|
| 419 | |
---|
| 420 | # ------------- Utility methods -------------- |
---|
| 421 | |
---|
| 422 | def get_test_fname( fname ): |
---|
| 423 | """Returns test data filename""" |
---|
| 424 | path, name = os.path.split(__file__) |
---|
| 425 | full_path = os.path.join( path, 'test', fname ) |
---|
| 426 | return full_path |
---|
| 427 | def nice_size(size): |
---|
| 428 | """ |
---|
| 429 | Returns a readably formatted string with the size |
---|
| 430 | |
---|
| 431 | >>> nice_size(100) |
---|
| 432 | '100.0 bytes' |
---|
| 433 | >>> nice_size(10000) |
---|
| 434 | '9.8 Kb' |
---|
| 435 | >>> nice_size(1000000) |
---|
| 436 | '976.6 Kb' |
---|
| 437 | >>> nice_size(100000000) |
---|
| 438 | '95.4 Mb' |
---|
| 439 | """ |
---|
| 440 | words = [ 'bytes', 'Kb', 'Mb', 'Gb' ] |
---|
| 441 | try: |
---|
| 442 | size = float( size ) |
---|
| 443 | except: |
---|
| 444 | return '??? bytes' |
---|
| 445 | for ind, word in enumerate(words): |
---|
| 446 | step = 1024 ** (ind + 1) |
---|
| 447 | if step > size: |
---|
| 448 | size = size / float(1024 ** ind) |
---|
| 449 | out = "%.1f %s" % (size, word) |
---|
| 450 | return out |
---|
| 451 | return '??? bytes' |
---|
| 452 | def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ): |
---|
| 453 | """ |
---|
| 454 | Returns the first LINE_COUNT lines wrapped to WIDTH |
---|
| 455 | |
---|
| 456 | ## >>> fname = get_test_fname('4.bed') |
---|
| 457 | ## >>> get_file_peek(fname) |
---|
| 458 | ## 'chr22 30128507 31828507 uc003bnx.1_cds_2_0_chr22_29227_f 0 +\n' |
---|
| 459 | """ |
---|
| 460 | lines = [] |
---|
| 461 | count = 0 |
---|
| 462 | file_type = None |
---|
| 463 | data_checked = False |
---|
| 464 | temp = open( file_name, "U" ) |
---|
| 465 | while count <= LINE_COUNT: |
---|
| 466 | line = temp.readline( WIDTH ) |
---|
| 467 | if line and not is_multi_byte and not data_checked: |
---|
| 468 | # See if we have a compressed or binary file |
---|
| 469 | if line[0:2] == util.gzip_magic: |
---|
| 470 | file_type = 'gzipped' |
---|
| 471 | break |
---|
| 472 | else: |
---|
| 473 | for char in line: |
---|
| 474 | if ord( char ) > 128: |
---|
| 475 | file_type = 'binary' |
---|
| 476 | break |
---|
| 477 | data_checked = True |
---|
| 478 | if file_type in [ 'gzipped', 'binary' ]: |
---|
| 479 | break |
---|
| 480 | lines.append( line ) |
---|
| 481 | count += 1 |
---|
| 482 | temp.close() |
---|
| 483 | if file_type in [ 'gzipped', 'binary' ]: |
---|
| 484 | text = "%s file" % file_type |
---|
| 485 | else: |
---|
| 486 | try: |
---|
| 487 | text = unicode( '\n'.join( lines ), 'utf-8' ) |
---|
| 488 | except UnicodeDecodeError: |
---|
| 489 | text = "binary/unknown file" |
---|
| 490 | return text |
---|