root/galaxy-central/lib/galaxy/datatypes/assembly.py

リビジョン 2, 9.0 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1"""
2velvet datatypes
3James E Johnson - University of Minnesota
4for velvet assembler tool in galaxy
5"""
6
7import data
8from galaxy.datatypes import sequence
9import logging, os, sys, time, tempfile, shutil, string, glob, re
10import galaxy.model
11from galaxy.datatypes import metadata
12from galaxy.datatypes.metadata import MetadataElement
13from galaxy import util
14from galaxy.datatypes.images import Html
15from sniff import *
16
17log = logging.getLogger(__name__)
18
19class Amos( data.Text ):
20    """Class describing the AMOS assembly file """
21    file_ext = 'afg'
22
23    def sniff( self, filename ):
24        # FIXME: this method will read the entire file.
25        # It should call get_headers() like other sniff methods.
26        """
27        Determines whether the file is an amos assembly file format
28        Example:
29        {CTG
30        iid:1
31        eid:1
32        seq:
33        CCTCTCCTGTAGAGTTCAACCGA-GCCGGTAGAGTTTTATCA
34        .
35        qlt:
36        DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD
37        .
38        {TLE
39        src:1027
40        off:0
41        clr:618,0
42        gap:
43        250 612
44        .
45        }
46        }
47        """
48        isAmos = False
49        try:
50            fh = open( filename )
51            while not isAmos:
52                line = fh.readline()
53                if not line:
54                    break #EOF
55                line = line.strip()
56                if line: #first non-empty line
57                    if line.startswith( '{' ):
58                        if re.match(r'{(RED|CTG|TLE)$',line):
59                            isAmos = True
60            fh.close()
61        except:
62            pass
63        return isAmos
64
65class Sequences( sequence.Fasta ):
66    """Class describing the Sequences file generated by velveth """
67
68    def sniff( self, filename ):
69        """
70        Determines whether the file is a velveth produced  fasta format
71        The id line has 3 fields separated by tabs: sequence_name  sequence_index cataegory
72          >SEQUENCE_0_length_35   1       1
73          GGATATAGGGCCAACCCAACTCAACGGCCTGTCTT
74          >SEQUENCE_1_length_35   2       1
75          CGACGAATGACAGGTCACGAATTTGGCGGGGATTA
76        """
77
78        try:
79            fh = open( filename )
80            while True:
81                line = fh.readline()
82                if not line:
83                    break #EOF
84                line = line.strip()
85                if line: #first non-empty line
86                    if line.startswith( '>' ):
87                        if not re.match(r'>[^\t]+\t\d+\t\d+$',line):
88                            break
89                        #The next line.strip() must not be '', nor startwith '>'
90                        line = fh.readline().strip()
91                        if line == '' or line.startswith( '>' ):
92                            break
93                        return True
94                    else:
95                        break #we found a non-empty line, but its not a fasta header
96            fh.close()
97        except:
98            pass
99        return False
100
101class Roadmaps( data.Text ):
102    """Class describing the Sequences file generated by velveth """
103
104    def sniff( self, filename ):
105        """
106        Determines whether the file is a velveth produced RoadMap
107          142858  21      1
108          ROADMAP 1
109          ROADMAP 2
110          ...
111        """
112
113        try:
114            fh = open( filename )
115            while True:
116                line = fh.readline()
117                if not line:
118                    break #EOF
119                line = line.strip()
120                if line: #first non-empty line
121                    if not re.match(r'\d+\t\d+\t\d+$',line):
122                        break
123                    #The next line.strip() should be 'ROADMAP 1'
124                    line = fh.readline().strip()
125                    if not re.match(r'ROADMAP \d+$',line):
126                        break
127                    return True
128                else:
129                    break #we found a non-empty line, but its not a fasta header
130            fh.close()
131        except:
132            pass
133        return False
134
135class Velvet( Html ):
136    MetadataElement( name="base_name", desc="base name for velveth dataset", default="velvet", readonly=True, set_in_upload=True)
137    MetadataElement( name="paired_end_reads", desc="has paired-end reads", default="False", readonly=False, set_in_upload=True)
138    MetadataElement( name="long_reads", desc="has long reads", default="False", readonly=False, set_in_upload=True)
139    MetadataElement( name="short2_reads", desc="has 2nd short reads", default="False", readonly=False, set_in_upload=True)
140    composite_type = 'auto_primary_file'
141    allow_datatype_change = False
142    file_ext = 'html'
143
144    def __init__( self, **kwd ):
145        Html.__init__( self, **kwd )
146        log.debug( "Velvet log info  %s" % 'JJ __init__')
147        self.add_composite_file( 'Sequences', mimetype = 'text/html', description = 'Sequences', substitute_name_with_metadata = None, is_binary = False )
148        self.add_composite_file( 'Roadmaps', mimetype = 'text/html', description = 'Roadmaps', substitute_name_with_metadata = None, is_binary = False )
149        self.add_composite_file( 'Log', mimetype = 'text/html', description = 'Log', optional = 'True', substitute_name_with_metadata = None, is_binary = False )
150
151    def generate_primary_file( self, dataset = None ):
152        log.debug( "Velvet log info  %s %s" % ('JJ generate_primary_file',dataset))
153        rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>']
154        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
155        for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
156            fn = composite_name
157            log.debug( "Velvet log info  %s %s %s" % ('JJ generate_primary_file',fn,composite_file))
158            opt_text = ''
159            if composite_file.optional:
160                opt_text = ' (optional)'
161            if composite_file.get('description'):
162                rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
163            else:
164                rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
165        rval.append( '</ul></div></html>' )
166        return "\n".join( rval )
167
168    def regenerate_primary_file(self,dataset):
169        """
170        cannot do this until we are setting metadata
171        """
172        log.debug( "Velvet log info  %s" % 'JJ regenerate_primary_file')
173        gen_msg = ''
174        try:
175            efp = dataset.extra_files_path
176            flist = os.listdir(efp)
177            log_path = os.path.join(efp,'Log')
178            f = open(log_path,'r')
179            log_content = f.read(1000)
180            f.close()
181            log_msg = re.sub('/\S*/','',log_content)
182            log.debug( "Velveth log info  %s" % log_msg)
183            paired_end_reads = re.search('-(short|long)Paired', log_msg) != None
184            dataset.metadata.paired_end_reads = paired_end_reads
185            long_reads = re.search('-long', log_msg) != None
186            dataset.metadata.long_reads = long_reads
187            short2_reads = re.search('-short(Paired)?2', log_msg) != None
188            dataset.metadata.short2_reads = short2_reads
189            dataset.info = re.sub('.*velveth \S+','hash_length',re.sub('\n',' ',log_msg))
190            if paired_end_reads:
191                 gen_msg = gen_msg + ' Paired-End Reads'
192            if long_reads:
193                 gen_msg = gen_msg + ' Long Reads'
194            if len(gen_msg) > 0:
195                    gen_msg = 'Uses: ' + gen_msg
196        except:
197            log.debug( "Velveth could not read Log file in %s" % efp)
198        log.debug( "Velveth log info  %s" % gen_msg)
199        rval = ['<html><head><title>Velvet Galaxy Composite Dataset </title></head><p/>']
200        # rval.append('<div>Generated:<p/><code> %s </code></div>' %(re.sub('\n','<br>',log_msg)))
201        rval.append('<div>Generated:<p/> %s </div>' %(gen_msg))
202        rval.append('<div>Velveth dataset:<p/><ul>')
203        for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
204            fn = composite_name
205            log.debug( "Velvet log info  %s %s %s" % ('JJ regenerate_primary_file',fn,composite_file))
206            if re.search('Log',fn) == None:
207                opt_text = ''
208                if composite_file.optional:
209                    opt_text = ' (optional)'
210                if composite_file.get('description'):
211                    rval.append( '<li><a href="%s" type="text/plain">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
212                else:
213                    rval.append( '<li><a href="%s" type="text/plain">%s</a>%s</li>' % ( fn, fn, opt_text ) )
214        rval.append( '</ul></div></html>' )
215        f = file(dataset.file_name,'w')
216        f.write("\n".join( rval ))
217        f.write('\n')
218        f.close()
219
220    def set_meta( self, dataset, **kwd ):
221        Html.set_meta( self, dataset, **kwd )
222        self.regenerate_primary_file(dataset)
223
224if __name__ == '__main__':
225    import doctest, sys
226    doctest.testmod(sys.modules[__name__])
227
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。