| 1 | import logging, os, sys, time, tempfile |
|---|
| 2 | from galaxy import util |
|---|
| 3 | from galaxy.util.odict import odict |
|---|
| 4 | from galaxy.util.bunch import Bunch |
|---|
| 5 | from cgi import escape |
|---|
| 6 | import metadata |
|---|
| 7 | import zipfile |
|---|
| 8 | from metadata import MetadataElement #import directly to maintain ease of use in Datatype class definitions |
|---|
| 9 | |
|---|
| 10 | log = logging.getLogger(__name__) |
|---|
| 11 | |
|---|
| 12 | # Valid first column and strand column values vor bed, other formats |
|---|
| 13 | col1_startswith = ['chr', 'chl', 'groupun', 'reftig_', 'scaffold', 'super_', 'vcho'] |
|---|
| 14 | valid_strand = ['+', '-', '.'] |
|---|
| 15 | |
|---|
| 16 | class DataMeta( type ): |
|---|
| 17 | """ |
|---|
| 18 | Metaclass for Data class. Sets up metadata spec. |
|---|
| 19 | """ |
|---|
| 20 | def __init__( cls, name, bases, dict_ ): |
|---|
| 21 | cls.metadata_spec = metadata.MetadataSpecCollection() |
|---|
| 22 | for base in bases: #loop through bases (class/types) of cls |
|---|
| 23 | if hasattr( base, "metadata_spec" ): #base of class Data (object) has no metadata |
|---|
| 24 | cls.metadata_spec.update( base.metadata_spec ) #add contents of metadata spec of base class to cls |
|---|
| 25 | metadata.Statement.process( cls ) |
|---|
| 26 | |
|---|
| 27 | class Data( object ): |
|---|
| 28 | """ |
|---|
| 29 | Base class for all datatypes. Implements basic interfaces as well |
|---|
| 30 | as class methods for metadata. |
|---|
| 31 | |
|---|
| 32 | >>> class DataTest( Data ): |
|---|
| 33 | ... MetadataElement( name="test" ) |
|---|
| 34 | ... |
|---|
| 35 | >>> DataTest.metadata_spec.test.name |
|---|
| 36 | 'test' |
|---|
| 37 | >>> DataTest.metadata_spec.test.desc |
|---|
| 38 | 'test' |
|---|
| 39 | >>> type( DataTest.metadata_spec.test.param ) |
|---|
| 40 | <class 'galaxy.datatypes.metadata.MetadataParameter'> |
|---|
| 41 | |
|---|
| 42 | """ |
|---|
| 43 | __metaclass__ = DataMeta |
|---|
| 44 | # Add metadata elements |
|---|
| 45 | MetadataElement( name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?" ) |
|---|
| 46 | # Stores the set of display applications, and viewing methods, supported by this datatype |
|---|
| 47 | supported_display_apps = {} |
|---|
| 48 | # If False, the peek is regenerated whenever a dataset of this type is copied |
|---|
| 49 | copy_safe_peek = True |
|---|
| 50 | # The dataset contains binary data --> do not space_to_tab or convert newlines, etc. |
|---|
| 51 | # Allow binary file uploads of this type when True. |
|---|
| 52 | is_binary = True |
|---|
| 53 | # Allow user to change between this datatype and others. If False, this datatype |
|---|
| 54 | # cannot be changed from or into. |
|---|
| 55 | allow_datatype_change = True |
|---|
| 56 | #Composite datatypes |
|---|
| 57 | composite_type = None |
|---|
| 58 | composite_files = odict() |
|---|
| 59 | primary_file_name = 'index' |
|---|
| 60 | #A per datatype setting (inherited): max file size (in bytes) for setting optional metadata |
|---|
| 61 | _max_optional_metadata_filesize = None |
|---|
| 62 | |
|---|
| 63 | def __init__(self, **kwd): |
|---|
| 64 | """Initialize the datatype""" |
|---|
| 65 | object.__init__(self, **kwd) |
|---|
| 66 | self.supported_display_apps = self.supported_display_apps.copy() |
|---|
| 67 | self.composite_files = self.composite_files.copy() |
|---|
| 68 | self.display_applications = odict() |
|---|
| 69 | def write_from_stream(self, dataset, stream): |
|---|
| 70 | """Writes data from a stream""" |
|---|
| 71 | fd = open(dataset.file_name, 'wb') |
|---|
| 72 | while 1: |
|---|
| 73 | chunk = stream.read(1048576) |
|---|
| 74 | if not chunk: |
|---|
| 75 | break |
|---|
| 76 | os.write(fd, chunk) |
|---|
| 77 | os.close(fd) |
|---|
| 78 | def set_raw_data(self, dataset, data): |
|---|
| 79 | """Saves the data on the disc""" |
|---|
| 80 | fd = open(dataset.file_name, 'wb') |
|---|
| 81 | os.write(fd, data) |
|---|
| 82 | os.close(fd) |
|---|
| 83 | def get_raw_data( self, dataset ): |
|---|
| 84 | """Returns the full data. To stream it open the file_name and read/write as needed""" |
|---|
| 85 | try: |
|---|
| 86 | return file(datset.file_name, 'rb').read(-1) |
|---|
| 87 | except OSError, e: |
|---|
| 88 | log.exception('%s reading a file that does not exist %s' % (self.__class__.__name__, dataset.file_name)) |
|---|
| 89 | return '' |
|---|
| 90 | def groom_dataset_content( self, file_name ): |
|---|
| 91 | """This function is called on an output dataset file after the content is initially generated.""" |
|---|
| 92 | pass |
|---|
| 93 | def init_meta( self, dataset, copy_from=None ): |
|---|
| 94 | # Metadata should be left mostly uninitialized. Dataset will |
|---|
| 95 | # handle returning default values when metadata is not set. |
|---|
| 96 | # copy_from allows metadata to be passed in that will be |
|---|
| 97 | # copied. (although this seems ambiguous, see |
|---|
| 98 | # Dataset.set_metadata. It always copies the rhs in order to |
|---|
| 99 | # flag the object as modified for SQLAlchemy. |
|---|
| 100 | if copy_from: |
|---|
| 101 | dataset.metadata = copy_from.metadata |
|---|
| 102 | def set_meta( self, dataset, overwrite = True, **kwd ): |
|---|
| 103 | """Unimplemented method, allows guessing of metadata from contents of file""" |
|---|
| 104 | return True |
|---|
| 105 | def missing_meta( self, dataset, check = [], skip = [] ): |
|---|
| 106 | """ |
|---|
| 107 | Checks for empty metadata values, Returns True if non-optional metadata is missing |
|---|
| 108 | Specifying a list of 'check' values will only check those names provided; when used, optionality is ignored |
|---|
| 109 | Specifying a list of 'skip' items will return True even when a named metadata value is missing |
|---|
| 110 | """ |
|---|
| 111 | if check: |
|---|
| 112 | to_check = [ ( to_check, dataset.metadata.get( to_check ) ) for to_check in check ] |
|---|
| 113 | else: |
|---|
| 114 | to_check = dataset.metadata.items() |
|---|
| 115 | for key, value in to_check: |
|---|
| 116 | if key in skip or ( not check and dataset.metadata.spec[key].get( "optional" ) ): |
|---|
| 117 | continue #we skip check for optional and nonrequested values here |
|---|
| 118 | if not value: |
|---|
| 119 | return True |
|---|
| 120 | return False |
|---|
| 121 | def set_max_optional_metadata_filesize( self, max_value ): |
|---|
| 122 | try: |
|---|
| 123 | max_value = int( max_value ) |
|---|
| 124 | except: |
|---|
| 125 | return |
|---|
| 126 | self.__class__._max_optional_metadata_filesize = max_value |
|---|
| 127 | def get_max_optional_metadata_filesize( self ): |
|---|
| 128 | rval = self.__class__._max_optional_metadata_filesize |
|---|
| 129 | if rval is None: |
|---|
| 130 | return -1 |
|---|
| 131 | return rval |
|---|
| 132 | max_optional_metadata_filesize = property( get_max_optional_metadata_filesize, set_max_optional_metadata_filesize ) |
|---|
| 133 | def set_peek( self, dataset, is_multi_byte=False ): |
|---|
| 134 | """Set the peek and blurb text""" |
|---|
| 135 | if not dataset.dataset.purged: |
|---|
| 136 | dataset.peek = '' |
|---|
| 137 | dataset.blurb = 'data' |
|---|
| 138 | else: |
|---|
| 139 | dataset.peek = 'file does not exist' |
|---|
| 140 | dataset.blurb = 'file purged from disk' |
|---|
| 141 | def display_peek(self, dataset ): |
|---|
| 142 | """Create HTML table, used for displaying peek""" |
|---|
| 143 | out = ['<table cellspacing="0" cellpadding="3">'] |
|---|
| 144 | try: |
|---|
| 145 | if not dataset.peek: |
|---|
| 146 | dataset.set_peek() |
|---|
| 147 | data = dataset.peek |
|---|
| 148 | lines = data.splitlines() |
|---|
| 149 | for line in lines: |
|---|
| 150 | line = line.strip() |
|---|
| 151 | if not line: |
|---|
| 152 | continue |
|---|
| 153 | if type( line ) is unicode: |
|---|
| 154 | out.append( '<tr><td>%s</td></tr>' % escape( line ) ) |
|---|
| 155 | else: |
|---|
| 156 | out.append( '<tr><td>%s</td></tr>' % escape( unicode( line, 'utf-8' ) ) ) |
|---|
| 157 | out.append( '</table>' ) |
|---|
| 158 | out = "".join( out ) |
|---|
| 159 | except Exception, exc: |
|---|
| 160 | out = "Can't create peek %s" % str( exc ) |
|---|
| 161 | return out |
|---|
| 162 | def display_name(self, dataset): |
|---|
| 163 | """Returns formatted html of dataset name""" |
|---|
| 164 | try: |
|---|
| 165 | if type ( dataset.name ) is unicode: |
|---|
| 166 | return escape( dataset.name ) |
|---|
| 167 | else: |
|---|
| 168 | return escape( unicode( dataset.name, 'utf-8 ') ) |
|---|
| 169 | except: |
|---|
| 170 | return "name unavailable" |
|---|
| 171 | def display_info(self, dataset): |
|---|
| 172 | """Returns formatted html of dataset info""" |
|---|
| 173 | try: |
|---|
| 174 | # Change new line chars to html |
|---|
| 175 | info = escape( dataset.info ) |
|---|
| 176 | if info.find( '\r\n' ) >= 0: |
|---|
| 177 | info = info.replace( '\r\n', '<br/>' ) |
|---|
| 178 | if info.find( '\r' ) >= 0: |
|---|
| 179 | info = info.replace( '\r', '<br/>' ) |
|---|
| 180 | if info.find( '\n' ) >= 0: |
|---|
| 181 | info = info.replace( '\n', '<br/>' ) |
|---|
| 182 | |
|---|
| 183 | # Convert to unicode to display non-ascii characters. |
|---|
| 184 | if type( info ) is not unicode: |
|---|
| 185 | info = unicode( info, 'utf-8') |
|---|
| 186 | |
|---|
| 187 | return info |
|---|
| 188 | except: |
|---|
| 189 | return "info unavailable" |
|---|
| 190 | def validate(self, dataset): |
|---|
| 191 | """Unimplemented validate, return no exceptions""" |
|---|
| 192 | return list() |
|---|
| 193 | def repair_methods(self, dataset): |
|---|
| 194 | """Unimplemented method, returns dict with method/option for repairing errors""" |
|---|
| 195 | return None |
|---|
| 196 | def get_mime(self): |
|---|
| 197 | """Returns the mime type of the datatype""" |
|---|
| 198 | return 'application/octet-stream' |
|---|
| 199 | def add_display_app ( self, app_id, label, file_function, links_function ): |
|---|
| 200 | """ |
|---|
| 201 | Adds a display app to the datatype. |
|---|
| 202 | app_id is a unique id |
|---|
| 203 | label is the primary display label, e.g., display at 'UCSC' |
|---|
| 204 | file_function is a string containing the name of the function that returns a properly formatted display |
|---|
| 205 | links_function is a string containing the name of the function that returns a list of (link_name,link) |
|---|
| 206 | """ |
|---|
| 207 | self.supported_display_apps = self.supported_display_apps.copy() |
|---|
| 208 | self.supported_display_apps[app_id] = {'label':label,'file_function':file_function,'links_function':links_function} |
|---|
| 209 | def remove_display_app (self, app_id): |
|---|
| 210 | """Removes a display app from the datatype""" |
|---|
| 211 | self.supported_display_apps = self.supported_display_apps.copy() |
|---|
| 212 | try: |
|---|
| 213 | del self.supported_display_apps[app_id] |
|---|
| 214 | except: |
|---|
| 215 | log.exception('Tried to remove display app %s from datatype %s, but this display app is not declared.' % ( type, self.__class__.__name__ ) ) |
|---|
| 216 | def clear_display_apps( self ): |
|---|
| 217 | self.supported_display_apps = {} |
|---|
| 218 | def add_display_application( self, display_application ): |
|---|
| 219 | """New style display applications""" |
|---|
| 220 | assert display_application.id not in self.display_applications, 'Attempted to add a display application twice' |
|---|
| 221 | self.display_applications[ display_application.id ] = display_application |
|---|
| 222 | def get_display_application( self, key, default = None ): |
|---|
| 223 | return self.display_applications.get( key, default ) |
|---|
| 224 | def get_display_applications_by_dataset( self, dataset, trans ): |
|---|
| 225 | rval = odict() |
|---|
| 226 | for key, value in self.display_applications.iteritems(): |
|---|
| 227 | value = value.filter_by_dataset( dataset, trans ) |
|---|
| 228 | if value.links: |
|---|
| 229 | rval[key] = value |
|---|
| 230 | return rval |
|---|
| 231 | def get_display_types(self): |
|---|
| 232 | """Returns display types available""" |
|---|
| 233 | return self.supported_display_apps.keys() |
|---|
| 234 | def get_display_label(self, type): |
|---|
| 235 | """Returns primary label for display app""" |
|---|
| 236 | try: |
|---|
| 237 | return self.supported_display_apps[type]['label'] |
|---|
| 238 | except: |
|---|
| 239 | return 'unknown' |
|---|
| 240 | def as_display_type(self, dataset, type, **kwd): |
|---|
| 241 | """Returns modified file contents for a particular display type """ |
|---|
| 242 | try: |
|---|
| 243 | if type in self.get_display_types(): |
|---|
| 244 | return getattr (self, self.supported_display_apps[type]['file_function']) (dataset, **kwd) |
|---|
| 245 | except: |
|---|
| 246 | log.exception('Function %s is referred to in datatype %s for displaying as type %s, but is not accessible' % (self.supported_display_apps[type]['file_function'], self.__class__.__name__, type) ) |
|---|
| 247 | return "This display type (%s) is not implemented for this datatype (%s)." % ( type, dataset.ext) |
|---|
| 248 | def get_display_links( self, dataset, type, app, base_url, target_frame='_blank', **kwd ): |
|---|
| 249 | """ |
|---|
| 250 | Returns a list of tuples of (name, link) for a particular display type. No check on |
|---|
| 251 | 'access' permissions is done here - if you can view the dataset, you can also save it |
|---|
| 252 | or send it to a destination outside of Galaxy, so Galaxy security restrictions do not |
|---|
| 253 | apply anyway. |
|---|
| 254 | """ |
|---|
| 255 | try: |
|---|
| 256 | if type in self.get_display_types(): |
|---|
| 257 | return target_frame, getattr ( self, self.supported_display_apps[type]['links_function'] ) ( dataset, type, app, base_url, **kwd ) |
|---|
| 258 | except: |
|---|
| 259 | log.exception( 'Function %s is referred to in datatype %s for generating links for type %s, but is not accessible' \ |
|---|
| 260 | % ( self.supported_display_apps[type]['links_function'], self.__class__.__name__, type ) ) |
|---|
| 261 | return [] |
|---|
| 262 | def get_converter_types(self, original_dataset, datatypes_registry): |
|---|
| 263 | """Returns available converters by type for this dataset""" |
|---|
| 264 | return datatypes_registry.get_converters_by_datatype(original_dataset.ext) |
|---|
| 265 | def find_conversion_destination( self, dataset, accepted_formats, datatypes_registry, **kwd ): |
|---|
| 266 | """Returns ( target_ext, existing converted dataset )""" |
|---|
| 267 | return datatypes_registry.find_conversion_destination_for_dataset_by_extensions( dataset, accepted_formats, **kwd ) |
|---|
| 268 | def convert_dataset(self, trans, original_dataset, target_type, return_output = False, visible = True, deps=None): |
|---|
| 269 | """This function adds a job to the queue to convert a dataset to another type. Returns a message about success/failure.""" |
|---|
| 270 | converter = trans.app.datatypes_registry.get_converter_by_target_type( original_dataset.ext, target_type ) |
|---|
| 271 | |
|---|
| 272 | if converter is None: |
|---|
| 273 | raise Exception( "A converter does not exist for %s to %s." % ( original_dataset.ext, target_type ) ) |
|---|
| 274 | #Generate parameter dictionary |
|---|
| 275 | params = {} |
|---|
| 276 | #determine input parameter name and add to params |
|---|
| 277 | input_name = 'input1' |
|---|
| 278 | for key, value in converter.inputs.items(): |
|---|
| 279 | if (deps) and (value.name in deps): |
|---|
| 280 | params[value.name] = deps[value.name] |
|---|
| 281 | elif value.type == 'data': |
|---|
| 282 | input_name = key |
|---|
| 283 | |
|---|
| 284 | params[input_name] = original_dataset |
|---|
| 285 | #Run converter, job is dispatched through Queue |
|---|
| 286 | converted_dataset = converter.execute( trans, incoming = params, set_output_hid = visible )[1] |
|---|
| 287 | if len(params) > 0: |
|---|
| 288 | trans.log_event( "Converter params: %s" % (str(params)), tool_id=converter.id ) |
|---|
| 289 | if not visible: |
|---|
| 290 | for name, value in converted_dataset.iteritems(): |
|---|
| 291 | value.visible = False |
|---|
| 292 | if return_output: |
|---|
| 293 | return converted_dataset |
|---|
| 294 | return "The file conversion of %s on data %s has been added to the Queue." % (converter.name, original_dataset.hid) |
|---|
| 295 | #We need to clear associated files before we set metadata |
|---|
| 296 | #so that as soon as metadata starts to be set, e.g. implicitly converted datasets are deleted and no longer available 'while' metadata is being set, not just after |
|---|
| 297 | #We'll also clear after setting metadata, for backwards compatibility |
|---|
| 298 | def after_setting_metadata( self, dataset ): |
|---|
| 299 | """This function is called on the dataset after metadata is set.""" |
|---|
| 300 | dataset.clear_associated_files( metadata_safe = True ) |
|---|
| 301 | def before_setting_metadata( self, dataset ): |
|---|
| 302 | """This function is called on the dataset before metadata is set.""" |
|---|
| 303 | dataset.clear_associated_files( metadata_safe = True ) |
|---|
| 304 | def __new_composite_file( self, name, optional = False, mimetype = None, description = None, substitute_name_with_metadata = None, is_binary = False, space_to_tab = False, **kwds ): |
|---|
| 305 | kwds[ 'name' ] = name |
|---|
| 306 | kwds[ 'optional' ] = optional |
|---|
| 307 | kwds[ 'mimetype' ] = mimetype |
|---|
| 308 | kwds[ 'description' ] = description |
|---|
| 309 | kwds[ 'substitute_name_with_metadata' ] = substitute_name_with_metadata |
|---|
| 310 | kwds[ 'is_binary' ] = is_binary |
|---|
| 311 | kwds[ 'space_to_tab' ] = space_to_tab |
|---|
| 312 | return Bunch( **kwds ) |
|---|
| 313 | def add_composite_file( self, name, **kwds ): |
|---|
| 314 | #self.composite_files = self.composite_files.copy() |
|---|
| 315 | self.composite_files[ name ] = self.__new_composite_file( name, **kwds ) |
|---|
| 316 | def __substitute_composite_key( self, key, composite_file, dataset = None ): |
|---|
| 317 | if composite_file.substitute_name_with_metadata: |
|---|
| 318 | if dataset: |
|---|
| 319 | meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) ) |
|---|
| 320 | else: |
|---|
| 321 | meta_value = self.spec[composite_file.substitute_name_with_metadata].default |
|---|
| 322 | return key % meta_value |
|---|
| 323 | return key |
|---|
| 324 | @property |
|---|
| 325 | def writable_files( self, dataset = None ): |
|---|
| 326 | files = odict() |
|---|
| 327 | if self.composite_type != 'auto_primary_file': |
|---|
| 328 | files[ self.primary_file_name ] = self.__new_composite_file( self.primary_file_name ) |
|---|
| 329 | for key, value in self.get_composite_files( dataset = dataset ).iteritems(): |
|---|
| 330 | files[ key ] = value |
|---|
| 331 | return files |
|---|
| 332 | def get_composite_files( self, dataset = None ): |
|---|
| 333 | def substitute_composite_key( key, composite_file ): |
|---|
| 334 | if composite_file.substitute_name_with_metadata: |
|---|
| 335 | if dataset: |
|---|
| 336 | meta_value = str( dataset.metadata.get( composite_file.substitute_name_with_metadata ) ) |
|---|
| 337 | else: |
|---|
| 338 | meta_value = self.metadata_spec[ composite_file.substitute_name_with_metadata ].default |
|---|
| 339 | return key % meta_value |
|---|
| 340 | return key |
|---|
| 341 | files = odict() |
|---|
| 342 | for key, value in self.composite_files.iteritems(): |
|---|
| 343 | files[ substitute_composite_key( key, value ) ] = value |
|---|
| 344 | return files |
|---|
| 345 | def generate_auto_primary_file( self, dataset = None ): |
|---|
| 346 | raise Exception( "generate_auto_primary_file is not implemented for this datatype." ) |
|---|
| 347 | @property |
|---|
| 348 | def has_resolution(self): |
|---|
| 349 | return False |
|---|
| 350 | |
|---|
| 351 | class Text( Data ): |
|---|
| 352 | file_ext = 'txt' |
|---|
| 353 | |
|---|
| 354 | """Add metadata elements""" |
|---|
| 355 | MetadataElement( name="data_lines", default=0, desc="Number of data lines", readonly=True, optional=True, visible=False, no_value=0 ) |
|---|
| 356 | |
|---|
| 357 | def write_from_stream(self, dataset, stream): |
|---|
| 358 | """Writes data from a stream""" |
|---|
| 359 | # write it twice for now |
|---|
| 360 | fd, temp_name = tempfile.mkstemp() |
|---|
| 361 | while 1: |
|---|
| 362 | chunk = stream.read(1048576) |
|---|
| 363 | if not chunk: |
|---|
| 364 | break |
|---|
| 365 | os.write(fd, chunk) |
|---|
| 366 | os.close(fd) |
|---|
| 367 | # rewrite the file with unix newlines |
|---|
| 368 | fp = open(dataset.file_name, 'wt') |
|---|
| 369 | for line in file(temp_name, "U"): |
|---|
| 370 | line = line.strip() + '\n' |
|---|
| 371 | fp.write(line) |
|---|
| 372 | fp.close() |
|---|
| 373 | def set_raw_data(self, dataset, data): |
|---|
| 374 | """Saves the data on the disc""" |
|---|
| 375 | fd, temp_name = tempfile.mkstemp() |
|---|
| 376 | os.write(fd, data) |
|---|
| 377 | os.close(fd) |
|---|
| 378 | # rewrite the file with unix newlines |
|---|
| 379 | fp = open(dataset.file_name, 'wt') |
|---|
| 380 | for line in file(temp_name, "U"): |
|---|
| 381 | line = line.strip() + '\n' |
|---|
| 382 | fp.write(line) |
|---|
| 383 | fp.close() |
|---|
| 384 | os.remove( temp_name ) |
|---|
| 385 | def get_mime(self): |
|---|
| 386 | """Returns the mime type of the datatype""" |
|---|
| 387 | return 'text/plain' |
|---|
| 388 | def set_meta( self, dataset, **kwd ): |
|---|
| 389 | """ |
|---|
| 390 | Set the number of lines of data in dataset, |
|---|
| 391 | skipping all blank lines and comments. |
|---|
| 392 | """ |
|---|
| 393 | data_lines = 0 |
|---|
| 394 | for line in file( dataset.file_name ): |
|---|
| 395 | line = line.strip() |
|---|
| 396 | if line and not line.startswith( '#' ): |
|---|
| 397 | data_lines += 1 |
|---|
| 398 | dataset.metadata.data_lines = data_lines |
|---|
| 399 | def set_peek( self, dataset, line_count=None, is_multi_byte=False ): |
|---|
| 400 | if not dataset.dataset.purged: |
|---|
| 401 | # The file must exist on disk for the get_file_peek() method |
|---|
| 402 | dataset.peek = get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
|---|
| 403 | if line_count is None: |
|---|
| 404 | # See if line_count is stored in the metadata |
|---|
| 405 | if dataset.metadata.data_lines: |
|---|
| 406 | dataset.blurb = "%s lines" % util.commaify( str( dataset.metadata.data_lines ) ) |
|---|
| 407 | else: |
|---|
| 408 | # Number of lines is not known ( this should not happen ), and auto-detect is |
|---|
| 409 | # needed to set metadata |
|---|
| 410 | dataset.blurb = "? lines" |
|---|
| 411 | else: |
|---|
| 412 | dataset.blurb = "%s lines" % util.commaify( str( line_count ) ) |
|---|
| 413 | else: |
|---|
| 414 | dataset.peek = 'file does not exist' |
|---|
| 415 | dataset.blurb = 'file purged from disk' |
|---|
| 416 | |
|---|
| 417 | class Newick( Text ): |
|---|
| 418 | pass |
|---|
| 419 | |
|---|
| 420 | # ------------- Utility methods -------------- |
|---|
| 421 | |
|---|
| 422 | def get_test_fname( fname ): |
|---|
| 423 | """Returns test data filename""" |
|---|
| 424 | path, name = os.path.split(__file__) |
|---|
| 425 | full_path = os.path.join( path, 'test', fname ) |
|---|
| 426 | return full_path |
|---|
| 427 | def nice_size(size): |
|---|
| 428 | """ |
|---|
| 429 | Returns a readably formatted string with the size |
|---|
| 430 | |
|---|
| 431 | >>> nice_size(100) |
|---|
| 432 | '100.0 bytes' |
|---|
| 433 | >>> nice_size(10000) |
|---|
| 434 | '9.8 Kb' |
|---|
| 435 | >>> nice_size(1000000) |
|---|
| 436 | '976.6 Kb' |
|---|
| 437 | >>> nice_size(100000000) |
|---|
| 438 | '95.4 Mb' |
|---|
| 439 | """ |
|---|
| 440 | words = [ 'bytes', 'Kb', 'Mb', 'Gb' ] |
|---|
| 441 | try: |
|---|
| 442 | size = float( size ) |
|---|
| 443 | except: |
|---|
| 444 | return '??? bytes' |
|---|
| 445 | for ind, word in enumerate(words): |
|---|
| 446 | step = 1024 ** (ind + 1) |
|---|
| 447 | if step > size: |
|---|
| 448 | size = size / float(1024 ** ind) |
|---|
| 449 | out = "%.1f %s" % (size, word) |
|---|
| 450 | return out |
|---|
| 451 | return '??? bytes' |
|---|
| 452 | def get_file_peek( file_name, is_multi_byte=False, WIDTH=256, LINE_COUNT=5 ): |
|---|
| 453 | """ |
|---|
| 454 | Returns the first LINE_COUNT lines wrapped to WIDTH |
|---|
| 455 | |
|---|
| 456 | ## >>> fname = get_test_fname('4.bed') |
|---|
| 457 | ## >>> get_file_peek(fname) |
|---|
| 458 | ## 'chr22 30128507 31828507 uc003bnx.1_cds_2_0_chr22_29227_f 0 +\n' |
|---|
| 459 | """ |
|---|
| 460 | lines = [] |
|---|
| 461 | count = 0 |
|---|
| 462 | file_type = None |
|---|
| 463 | data_checked = False |
|---|
| 464 | temp = open( file_name, "U" ) |
|---|
| 465 | while count <= LINE_COUNT: |
|---|
| 466 | line = temp.readline( WIDTH ) |
|---|
| 467 | if line and not is_multi_byte and not data_checked: |
|---|
| 468 | # See if we have a compressed or binary file |
|---|
| 469 | if line[0:2] == util.gzip_magic: |
|---|
| 470 | file_type = 'gzipped' |
|---|
| 471 | break |
|---|
| 472 | else: |
|---|
| 473 | for char in line: |
|---|
| 474 | if ord( char ) > 128: |
|---|
| 475 | file_type = 'binary' |
|---|
| 476 | break |
|---|
| 477 | data_checked = True |
|---|
| 478 | if file_type in [ 'gzipped', 'binary' ]: |
|---|
| 479 | break |
|---|
| 480 | lines.append( line ) |
|---|
| 481 | count += 1 |
|---|
| 482 | temp.close() |
|---|
| 483 | if file_type in [ 'gzipped', 'binary' ]: |
|---|
| 484 | text = "%s file" % file_type |
|---|
| 485 | else: |
|---|
| 486 | try: |
|---|
| 487 | text = unicode( '\n'.join( lines ), 'utf-8' ) |
|---|
| 488 | except UnicodeDecodeError: |
|---|
| 489 | text = "binary/unknown file" |
|---|
| 490 | return text |
|---|