1 | """ |
---|
2 | velvet datatypes |
---|
3 | James E Johnson - University of Minnesota |
---|
4 | for velvet assembler tool in galaxy |
---|
5 | """ |
---|
6 | |
---|
7 | import data |
---|
8 | from galaxy.datatypes import sequence |
---|
9 | import logging, os, sys, time, tempfile, shutil, string, glob, re |
---|
10 | import galaxy.model |
---|
11 | from galaxy.datatypes import metadata |
---|
12 | from galaxy.datatypes.metadata import MetadataElement |
---|
13 | from galaxy import util |
---|
14 | from galaxy.datatypes.images import Html |
---|
15 | from sniff import * |
---|
16 | |
---|
17 | log = logging.getLogger(__name__) |
---|
18 | |
---|
19 | class Amos( data.Text ): |
---|
20 | """Class describing the AMOS assembly file """ |
---|
21 | file_ext = 'afg' |
---|
22 | |
---|
23 | def sniff( self, filename ): |
---|
24 | # FIXME: this method will read the entire file. |
---|
25 | # It should call get_headers() like other sniff methods. |
---|
26 | """ |
---|
27 | Determines whether the file is an amos assembly file format |
---|
28 | Example: |
---|
29 | {CTG |
---|
30 | iid:1 |
---|
31 | eid:1 |
---|
32 | seq: |
---|
33 | CCTCTCCTGTAGAGTTCAACCGA-GCCGGTAGAGTTTTATCA |
---|
34 | . |
---|
35 | qlt: |
---|
36 | DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD |
---|
37 | . |
---|
38 | {TLE |
---|
39 | src:1027 |
---|
40 | off:0 |
---|
41 | clr:618,0 |
---|
42 | gap: |
---|
43 | 250 612 |
---|
44 | . |
---|
45 | } |
---|
46 | } |
---|
47 | """ |
---|
48 | isAmos = False |
---|
49 | try: |
---|
50 | fh = open( filename ) |
---|
51 | while not isAmos: |
---|
52 | line = fh.readline() |
---|
53 | if not line: |
---|
54 | break #EOF |
---|
55 | line = line.strip() |
---|
56 | if line: #first non-empty line |
---|
57 | if line.startswith( '{' ): |
---|
58 | if re.match(r'{(RED|CTG|TLE)$',line): |
---|
59 | isAmos = True |
---|
60 | fh.close() |
---|
61 | except: |
---|
62 | pass |
---|
63 | return isAmos |
---|
64 | |
---|
65 | class Sequences( sequence.Fasta ): |
---|
66 | """Class describing the Sequences file generated by velveth """ |
---|
67 | |
---|
68 | def sniff( self, filename ): |
---|
69 | """ |
---|
70 | Determines whether the file is a velveth produced fasta format |
---|
71 | The id line has 3 fields separated by tabs: sequence_name sequence_index cataegory |
---|
72 | >SEQUENCE_0_length_35 1 1 |
---|
73 | GGATATAGGGCCAACCCAACTCAACGGCCTGTCTT |
---|
74 | >SEQUENCE_1_length_35 2 1 |
---|
75 | CGACGAATGACAGGTCACGAATTTGGCGGGGATTA |
---|
76 | """ |
---|
77 | |
---|
78 | try: |
---|
79 | fh = open( filename ) |
---|
80 | while True: |
---|
81 | line = fh.readline() |
---|
82 | if not line: |
---|
83 | break #EOF |
---|
84 | line = line.strip() |
---|
85 | if line: #first non-empty line |
---|
86 | if line.startswith( '>' ): |
---|
87 | if not re.match(r'>[^\t]+\t\d+\t\d+$',line): |
---|
88 | break |
---|
89 | #The next line.strip() must not be '', nor startwith '>' |
---|
90 | line = fh.readline().strip() |
---|
91 | if line == '' or line.startswith( '>' ): |
---|
92 | break |
---|
93 | return True |
---|
94 | else: |
---|
95 | break #we found a non-empty line, but its not a fasta header |
---|
96 | fh.close() |
---|
97 | except: |
---|
98 | pass |
---|
99 | return False |
---|
100 | |
---|
101 | class Roadmaps( data.Text ): |
---|
102 | """Class describing the Sequences file generated by velveth """ |
---|
103 | |
---|
104 | def sniff( self, filename ): |
---|
105 | """ |
---|
106 | Determines whether the file is a velveth produced RoadMap |
---|
107 | 142858 21 1 |
---|
108 | ROADMAP 1 |
---|
109 | ROADMAP 2 |
---|
110 | ... |
---|
111 | """ |
---|
112 | |
---|
113 | try: |
---|
114 | fh = open( filename ) |
---|
115 | while True: |
---|
116 | line = fh.readline() |
---|
117 | if not line: |
---|
118 | break #EOF |
---|
119 | line = line.strip() |
---|
120 | if line: #first non-empty line |
---|
121 | if not re.match(r'\d+\t\d+\t\d+$',line): |
---|
122 | break |
---|
123 | #The next line.strip() should be 'ROADMAP 1' |
---|
124 | line = fh.readline().strip() |
---|
125 | if not re.match(r'ROADMAP \d+$',line): |
---|
126 | break |
---|
127 | return True |
---|
128 | else: |
---|
129 | break #we found a non-empty line, but its not a fasta header |
---|
130 | fh.close() |
---|
131 | except: |
---|
132 | pass |
---|
133 | return False |
---|
134 | |
---|
135 | class Velvet( Html ): |
---|
136 | MetadataElement( name="base_name", desc="base name for velveth dataset", default="velvet", readonly=True, set_in_upload=True) |
---|
137 | MetadataElement( name="paired_end_reads", desc="has paired-end reads", default="False", readonly=False, set_in_upload=True) |
---|
138 | MetadataElement( name="long_reads", desc="has long reads", default="False", readonly=False, set_in_upload=True) |
---|
139 | MetadataElement( name="short2_reads", desc="has 2nd short reads", default="False", readonly=False, set_in_upload=True) |
---|
140 | composite_type = 'auto_primary_file' |
---|
141 | allow_datatype_change = False |
---|
142 | file_ext = 'html' |
---|
143 | |
---|
144 | def __init__( self, **kwd ): |
---|
145 | Html.__init__( self, **kwd ) |
---|
146 | log.debug( "Velvet log info %s" % 'JJ __init__') |
---|
147 | self.add_composite_file( 'Sequences', mimetype = 'text/html', description = 'Sequences', substitute_name_with_metadata = None, is_binary = False ) |
---|
148 | self.add_composite_file( 'Roadmaps', mimetype = 'text/html', description = 'Roadmaps', substitute_name_with_metadata = None, is_binary = False ) |
---|
149 | self.add_composite_file( 'Log', mimetype = 'text/html', description = 'Log', optional = 'True', substitute_name_with_metadata = None, is_binary = False ) |
---|
150 | |
---|
151 | def generate_primary_file( self, dataset = None ): |
---|
152 | log.debug( "Velvet log info %s %s" % ('JJ generate_primary_file',dataset)) |
---|
153 | rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>'] |
---|
154 | rval.append('<div>This composite dataset is composed of the following files:<p/><ul>') |
---|
155 | for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): |
---|
156 | fn = composite_name |
---|
157 | log.debug( "Velvet log info %s %s %s" % ('JJ generate_primary_file',fn,composite_file)) |
---|
158 | opt_text = '' |
---|
159 | if composite_file.optional: |
---|
160 | opt_text = ' (optional)' |
---|
161 | if composite_file.get('description'): |
---|
162 | rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) |
---|
163 | else: |
---|
164 | rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) |
---|
165 | rval.append( '</ul></div></html>' ) |
---|
166 | return "\n".join( rval ) |
---|
167 | |
---|
168 | def regenerate_primary_file(self,dataset): |
---|
169 | """ |
---|
170 | cannot do this until we are setting metadata |
---|
171 | """ |
---|
172 | log.debug( "Velvet log info %s" % 'JJ regenerate_primary_file') |
---|
173 | gen_msg = '' |
---|
174 | try: |
---|
175 | efp = dataset.extra_files_path |
---|
176 | flist = os.listdir(efp) |
---|
177 | log_path = os.path.join(efp,'Log') |
---|
178 | f = open(log_path,'r') |
---|
179 | log_content = f.read(1000) |
---|
180 | f.close() |
---|
181 | log_msg = re.sub('/\S*/','',log_content) |
---|
182 | log.debug( "Velveth log info %s" % log_msg) |
---|
183 | paired_end_reads = re.search('-(short|long)Paired', log_msg) != None |
---|
184 | dataset.metadata.paired_end_reads = paired_end_reads |
---|
185 | long_reads = re.search('-long', log_msg) != None |
---|
186 | dataset.metadata.long_reads = long_reads |
---|
187 | short2_reads = re.search('-short(Paired)?2', log_msg) != None |
---|
188 | dataset.metadata.short2_reads = short2_reads |
---|
189 | dataset.info = re.sub('.*velveth \S+','hash_length',re.sub('\n',' ',log_msg)) |
---|
190 | if paired_end_reads: |
---|
191 | gen_msg = gen_msg + ' Paired-End Reads' |
---|
192 | if long_reads: |
---|
193 | gen_msg = gen_msg + ' Long Reads' |
---|
194 | if len(gen_msg) > 0: |
---|
195 | gen_msg = 'Uses: ' + gen_msg |
---|
196 | except: |
---|
197 | log.debug( "Velveth could not read Log file in %s" % efp) |
---|
198 | log.debug( "Velveth log info %s" % gen_msg) |
---|
199 | rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>'] |
---|
200 | # rval.append('<div>Generated:<p/><code> %s </code></div>' %(re.sub('\n','<br>',log_msg))) |
---|
201 | rval.append('<div>Generated:<p/> %s </div>' %(gen_msg)) |
---|
202 | rval.append('<div>Velveth dataset:<p/><ul>') |
---|
203 | for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems(): |
---|
204 | fn = composite_name |
---|
205 | log.debug( "Velvet log info %s %s %s" % ('JJ regenerate_primary_file',fn,composite_file)) |
---|
206 | if re.search('Log',fn) == None: |
---|
207 | opt_text = '' |
---|
208 | if composite_file.optional: |
---|
209 | opt_text = ' (optional)' |
---|
210 | if composite_file.get('description'): |
---|
211 | rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) ) |
---|
212 | else: |
---|
213 | rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) ) |
---|
214 | rval.append( '</ul></div></html>' ) |
---|
215 | f = file(dataset.file_name,'w') |
---|
216 | f.write("\n".join( rval )) |
---|
217 | f.write('\n') |
---|
218 | f.close() |
---|
219 | |
---|
220 | def set_meta( self, dataset, **kwd ): |
---|
221 | Html.set_meta( self, dataset, **kwd ) |
---|
222 | self.regenerate_primary_file(dataset) |
---|
223 | |
---|
224 | if __name__ == '__main__': |
---|
225 | import doctest, sys |
---|
226 | doctest.testmod(sys.modules[__name__]) |
---|
227 | |
---|