[2] | 1 | """ |
---|
| 2 | velvet datatypes |
---|
| 3 | James E Johnson - University of Minnesota |
---|
| 4 | for velvet assembler tool in galaxy |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | import data |
---|
| 8 | from galaxy.datatypes import sequence |
---|
| 9 | import logging, os, sys, time, tempfile, shutil, string, glob, re |
---|
| 10 | import galaxy.model |
---|
| 11 | from galaxy.datatypes import metadata |
---|
| 12 | from galaxy.datatypes.metadata import MetadataElement |
---|
| 13 | from galaxy import util |
---|
| 14 | from galaxy.datatypes.images import Html |
---|
| 15 | from sniff import * |
---|
| 16 | |
---|
| 17 | log = logging.getLogger(__name__) |
---|
| 18 | |
---|
| 19 | class Amos( data.Text ): |
---|
| 20 | """Class describing the AMOS assembly file """ |
---|
| 21 | file_ext = 'afg' |
---|
| 22 | |
---|
| 23 | def sniff( self, filename ): |
---|
| 24 | # FIXME: this method will read the entire file. |
---|
| 25 | # It should call get_headers() like other sniff methods. |
---|
| 26 | """ |
---|
| 27 | Determines whether the file is an amos assembly file format |
---|
| 28 | Example: |
---|
| 29 | {CTG |
---|
| 30 | iid:1 |
---|
| 31 | eid:1 |
---|
| 32 | seq: |
---|
| 33 | CCTCTCCTGTAGAGTTCAACCGA-GCCGGTAGAGTTTTATCA |
---|
| 34 | . |
---|
| 35 | qlt: |
---|
| 36 | DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD |
---|
| 37 | . |
---|
| 38 | {TLE |
---|
| 39 | src:1027 |
---|
| 40 | off:0 |
---|
| 41 | clr:618,0 |
---|
| 42 | gap: |
---|
| 43 | 250 612 |
---|
| 44 | . |
---|
| 45 | } |
---|
| 46 | } |
---|
| 47 | """ |
---|
| 48 | isAmos = False |
---|
| 49 | try: |
---|
| 50 | fh = open( filename ) |
---|
| 51 | while not isAmos: |
---|
| 52 | line = fh.readline() |
---|
| 53 | if not line: |
---|
| 54 | break #EOF |
---|
| 55 | line = line.strip() |
---|
| 56 | if line: #first non-empty line |
---|
| 57 | if line.startswith( '{' ): |
---|
| 58 | if re.match(r'{(RED|CTG|TLE)$',line): |
---|
| 59 | isAmos = True |
---|
| 60 | fh.close() |
---|
| 61 | except: |
---|
| 62 | pass |
---|
| 63 | return isAmos |
---|
| 64 | |
---|
| 65 | class Sequences( sequence.Fasta ): |
---|
| 66 | """Class describing the Sequences file generated by velveth """ |
---|
| 67 | |
---|
| 68 | def sniff( self, filename ): |
---|
| 69 | """ |
---|
| 70 | Determines whether the file is a velveth produced fasta format |
---|
| 71 | The id line has 3 fields separated by tabs: sequence_name sequence_index cataegory |
---|
| 72 | >SEQUENCE_0_length_35 1 1 |
---|
| 73 | GGATATAGGGCCAACCCAACTCAACGGCCTGTCTT |
---|
| 74 | >SEQUENCE_1_length_35 2 1 |
---|
| 75 | CGACGAATGACAGGTCACGAATTTGGCGGGGATTA |
---|
| 76 | """ |
---|
| 77 | |
---|
| 78 | try: |
---|
| 79 | fh = open( filename ) |
---|
| 80 | while True: |
---|
| 81 | line = fh.readline() |
---|
| 82 | if not line: |
---|
| 83 | break #EOF |
---|
| 84 | line = line.strip() |
---|
| 85 | if line: #first non-empty line |
---|
| 86 | if line.startswith( '>' ): |
---|
| 87 | if not re.match(r'>[^\t]+\t\d+\t\d+$',line): |
---|
| 88 | break |
---|
| 89 | #The next line.strip() must not be '', nor startwith '>' |
---|
| 90 | line = fh.readline().strip() |
---|
| 91 | if line == '' or line.startswith( '>' ): |
---|
| 92 | break |
---|
| 93 | return True |
---|
| 94 | else: |
---|
| 95 | break #we found a non-empty line, but its not a fasta header |
---|
| 96 | fh.close() |
---|
| 97 | except: |
---|
| 98 | pass |
---|
| 99 | return False |
---|
| 100 | |
---|
| 101 | class Roadmaps( data.Text ): |
---|
| 102 | """Class describing the Sequences file generated by velveth """ |
---|
| 103 | |
---|
| 104 | def sniff( self, filename ): |
---|
| 105 | """ |
---|
| 106 | Determines whether the file is a velveth produced RoadMap |
---|
| 107 | 142858 21 1 |
---|
| 108 | ROADMAP 1 |
---|
| 109 | ROADMAP 2 |
---|
| 110 | ... |
---|
| 111 | """ |
---|
| 112 | |
---|
| 113 | try: |
---|
| 114 | fh = open( filename ) |
---|
| 115 | while True: |
---|
| 116 | line = fh.readline() |
---|
| 117 | if not line: |
---|
| 118 | break #EOF |
---|
| 119 | line = line.strip() |
---|
| 120 | if line: #first non-empty line |
---|
| 121 | if not re.match(r'\d+\t\d+\t\d+$',line): |
---|
| 122 | break |
---|
| 123 | #The next line.strip() should be 'ROADMAP 1' |
---|
| 124 | line = fh.readline().strip() |
---|
| 125 | if not re.match(r'ROADMAP \d+$',line): |
---|
| 126 | break |
---|
| 127 | return True |
---|
| 128 | else: |
---|
| 129 | break #we found a non-empty line, but its not a fasta header |
---|
| 130 | fh.close() |
---|
| 131 | except: |
---|
| 132 | pass |
---|
| 133 | return False |
---|
| 134 | |
---|
| 135 | class Velvet( Html ): |
---|
| 136 | MetadataElement( name="base_name", desc="base name for velveth dataset", default="velvet", readonly=True, set_in_upload=True) |
---|
| 137 | MetadataElement( name="paired_end_reads", desc="has paired-end reads", default="False", readonly=False, set_in_upload=True) |
---|
| 138 | MetadataElement( name="long_reads", desc="has long reads", default="False", readonly=False, set_in_upload=True) |
---|
| 139 | MetadataElement( name="short2_reads", desc="has 2nd short reads", default="False", readonly=False, set_in_upload=True) |
---|
| 140 | composite_type = 'auto_primary_file' |
---|
| 141 | allow_datatype_change = False |
---|
| 142 | file_ext = 'html' |
---|
| 143 | |
---|
| 144 | def __init__( self, **kwd ): |
---|
| 145 | Html.__init__( self, **kwd ) |
---|
| 146 | log.debug( "Velvet log info %s" % 'JJ __init__') |
---|
| 147 | self.add_composite_file( 'Sequences', mimetype = 'text/html', description = 'Sequences', substitute_name_with_metadata = None, is_binary = False ) |
---|
| 148 | self.add_composite_file( 'Roadmaps', mimetype = 'text/html', description = 'Roadmaps', substitute_name_with_metadata = None, is_binary = False ) |
---|
| 149 | self.add_composite_file( 'Log', mimetype = 'text/html', description = 'Log', optional = 'True', substitute_name_with_metadata = None, is_binary = False ) |
---|
| 150 | |
---|
| 151 | def generate_primary_file( self, dataset = None ): |
---|
| 152 | log.debug( "Velvet log info %s %s" % ('JJ generate_primary_file',dataset)) |
---|
| 153 | rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>'] |
---|
| 154 | rval.append('<div>This composite dataset is composed of the following files:<p/><ul>') |
---|
| 155 | for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): |
---|
| 156 | fn = composite_name |
---|
| 157 | log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file)) |
---|
| 158 | opt_text = '' |
---|
| 159 | if composite_file.optional: |
---|
| 160 | opt_text = ' (optional)' |
---|
| 161 | if composite_file.get('description'): |
---|
| 162 | rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) |
---|
| 163 | else: |
---|
| 164 | rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) |
---|
| 165 | rval.append( '</ul></div></html>' ) |
---|
| 166 | return "\n".join( rval ) |
---|
| 167 | |
---|
| 168 | def regenerate_primary_file(self,dataset): |
---|
| 169 | """ |
---|
| 170 | cannot do this until we are setting metadata |
---|
| 171 | """ |
---|
| 172 | log.debug( "Velvet log info %s" % 'JJ regenerate_primary_file') |
---|
| 173 | gen_msg = '' |
---|
| 174 | try: |
---|
| 175 | efp = dataset.extra_files_path |
---|
| 176 | flist = os.listdir(efp) |
---|
| 177 | log_path = os.path.join(efp,'Log') |
---|
| 178 | f = open(log_path,'r') |
---|
| 179 | log_content = f.read(1000) |
---|
| 180 | f.close() |
---|
| 181 | log_msg = re.sub('/\S*/','',log_content) |
---|
| 182 | log.debug( "Velveth log info %s" % log_msg) |
---|
| 183 | paired_end_reads = re.search('-(short|long)Paired', log_msg) != None |
---|
| 184 | dataset.metadata.paired_end_reads = paired_end_reads |
---|
| 185 | long_reads = re.search('-long', log_msg) != None |
---|
| 186 | dataset.metadata.long_reads = long_reads |
---|
| 187 | short2_reads = re.search('-short(Paired)?2', log_msg) != None |
---|
| 188 | dataset.metadata.short2_reads = short2_reads |
---|
| 189 | dataset.info = re.sub('.*velveth \S+','hash_length',re.sub('\n',' ',log_msg)) |
---|
| 190 | if paired_end_reads: |
---|
| 191 | gen_msg = gen_msg + ' Paired-End Reads' |
---|
| 192 | if long_reads: |
---|
| 193 | gen_msg = gen_msg + ' Long Reads' |
---|
| 194 | if len(gen_msg) > 0: |
---|
| 195 | gen_msg = 'Uses: ' + gen_msg |
---|
| 196 | except: |
---|
| 197 | log.debug( "Velveth could not read Log file in %s" % efp) |
---|
| 198 | log.debug( "Velveth log info %s" % gen_msg) |
---|
| 199 | rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>'] |
---|
| 200 | # rval.append('<div>Generated:<p/><code> %s </code></div>' %(re.sub('\n','<br>',log_msg))) |
---|
| 201 | rval.append('<div>Generated:<p/> %s </div>' %(gen_msg)) |
---|
| 202 | rval.append('<div>Velveth dataset:<p/><ul>') |
---|
| 203 | for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): |
---|
| 204 | fn = composite_name |
---|
| 205 | log.debug( "Velvet log info %s %s %s" % ('JJ regenerate_primary_file',fn,composite_file)) |
---|
| 206 | if re.search('Log',fn) == None: |
---|
| 207 | opt_text = '' |
---|
| 208 | if composite_file.optional: |
---|
| 209 | opt_text = ' (optional)' |
---|
| 210 | if composite_file.get('description'): |
---|
| 211 | rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) |
---|
| 212 | else: |
---|
| 213 | rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) |
---|
| 214 | rval.append( '</ul></div></html>' ) |
---|
| 215 | f = file(dataset.file_name,'w') |
---|
| 216 | f.write("\n".join( rval )) |
---|
| 217 | f.write('\n') |
---|
| 218 | f.close() |
---|
| 219 | |
---|
| 220 | def set_meta( self, dataset, **kwd ): |
---|
| 221 | Html.set_meta( self, dataset, **kwd ) |
---|
| 222 | self.regenerate_primary_file(dataset) |
---|
| 223 | |
---|
| 224 | if __name__ == '__main__': |
---|
| 225 | import doctest, sys |
---|
| 226 | doctest.testmod(sys.modules[__name__]) |
---|
| 227 | |
---|