1 |
|
---|
2 | def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
|
---|
3 | # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to
|
---|
4 | # set data.name in exec_after_process().
|
---|
5 | microbe_info= {}
|
---|
6 | orgs = {}
|
---|
7 |
|
---|
8 | filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
|
---|
9 | for i, line in enumerate( open( filename ) ):
|
---|
10 | line = line.rstrip( '\r\n' )
|
---|
11 | if line and not line.startswith( '#' ):
|
---|
12 | fields = line.split( sep )
|
---|
13 | #read each line, if not enough fields, go to next line
|
---|
14 | try:
|
---|
15 | info_type = fields.pop(0)
|
---|
16 | if info_type.upper() == "ORG":
|
---|
17 | #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
|
---|
18 | org_num = fields.pop(0)
|
---|
19 | name = fields.pop(0)
|
---|
20 | kingdom = fields.pop(0)
|
---|
21 | group = fields.pop(0)
|
---|
22 | chromosomes = fields.pop(0)
|
---|
23 | info_url = fields.pop(0)
|
---|
24 | link_site = fields.pop(0)
|
---|
25 | if org_num not in orgs:
|
---|
26 | orgs[ org_num ] = {}
|
---|
27 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
28 | orgs[ org_num ][ 'name' ] = name
|
---|
29 | orgs[ org_num ][ 'kingdom' ] = kingdom
|
---|
30 | orgs[ org_num ][ 'group' ] = group
|
---|
31 | orgs[ org_num ][ 'chromosomes' ] = chromosomes
|
---|
32 | orgs[ org_num ][ 'info_url' ] = info_url
|
---|
33 | orgs[ org_num ][ 'link_site' ] = link_site
|
---|
34 | elif info_type.upper() == "CHR":
|
---|
35 | #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1
|
---|
36 | org_num = fields.pop(0)
|
---|
37 | chr_acc = fields.pop(0)
|
---|
38 | name = fields.pop(0)
|
---|
39 | length = fields.pop(0)
|
---|
40 | gi = fields.pop(0)
|
---|
41 | gb = fields.pop(0)
|
---|
42 | info_url = fields.pop(0)
|
---|
43 | chr = {}
|
---|
44 | chr[ 'name' ] = name
|
---|
45 | chr[ 'length' ] = length
|
---|
46 | chr[ 'gi' ] = gi
|
---|
47 | chr[ 'gb' ] = gb
|
---|
48 | chr[ 'info_url' ] = info_url
|
---|
49 | if org_num not in orgs:
|
---|
50 | orgs[ org_num ] = {}
|
---|
51 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
52 | orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
|
---|
53 | elif info_type.upper() == "DATA":
|
---|
54 | #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
|
---|
55 | uid = fields.pop(0)
|
---|
56 | org_num = fields.pop(0)
|
---|
57 | chr_acc = fields.pop(0)
|
---|
58 | feature = fields.pop(0)
|
---|
59 | filetype = fields.pop(0)
|
---|
60 | path = fields.pop(0)
|
---|
61 | data = {}
|
---|
62 | data[ 'filetype' ] = filetype
|
---|
63 | data[ 'path' ] = path
|
---|
64 | data[ 'feature' ] = feature
|
---|
65 |
|
---|
66 | if org_num not in orgs:
|
---|
67 | orgs[ org_num ] = {}
|
---|
68 | orgs[ org_num ][ 'chrs' ] = {}
|
---|
69 | if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
|
---|
70 | orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
|
---|
71 | orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
|
---|
72 | else: continue
|
---|
73 | except: continue
|
---|
74 | for org_num in orgs:
|
---|
75 | org = orgs[ org_num ]
|
---|
76 | if org[ 'kingdom' ] not in microbe_info:
|
---|
77 | microbe_info[ org[ 'kingdom' ] ] = {}
|
---|
78 | if org_num not in microbe_info[ org[ 'kingdom' ] ]:
|
---|
79 | microbe_info[ org[ 'kingdom' ] ][org_num] = org
|
---|
80 | return microbe_info
|
---|
81 |
|
---|
82 | #post processing, set build for data and add additional data to history
|
---|
83 | from galaxy import datatypes, config, jobs, tools
|
---|
84 | from shutil import copyfile
|
---|
85 |
|
---|
86 | def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
|
---|
87 | base_dataset = out_data.items()[0][1]
|
---|
88 | history = base_dataset.history
|
---|
89 | if history == None:
|
---|
90 | print "unknown history!"
|
---|
91 | return
|
---|
92 | kingdom = param_dict.get( 'kingdom', None )
|
---|
93 | #group = param_dict.get( 'group', None )
|
---|
94 | org = param_dict.get( 'org', None )
|
---|
95 |
|
---|
96 | #if not (kingdom or group or org):
|
---|
97 | if not (kingdom or org):
|
---|
98 | print "Parameters are not available."
|
---|
99 | #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values |
---|
100 | if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ): |
---|
101 | kingdom = kingdom.value
|
---|
102 | if isinstance( org, tools.parameters.basic.UnvalidatedValue ): |
---|
103 | org = org.value |
---|
104 | |
---|
105 | GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
|
---|
106 | microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
|
---|
107 | new_stdout = ""
|
---|
108 | split_stdout = stdout.split("\n")
|
---|
109 | basic_name = ""
|
---|
110 | for line in split_stdout:
|
---|
111 | fields = line.split("\t")
|
---|
112 | if fields[0] == "#File1":
|
---|
113 | description = fields[1]
|
---|
114 | chr = fields[2]
|
---|
115 | dbkey = fields[3]
|
---|
116 | file_type = fields[4]
|
---|
117 | name, data = out_data.items()[0]
|
---|
118 | data.set_size()
|
---|
119 | basic_name = data.name
|
---|
120 | data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
|
---|
121 | data.dbkey = dbkey
|
---|
122 | data.info = data.name
|
---|
123 | data = app.datatypes_registry.change_datatype( data, file_type )
|
---|
124 | data.init_meta()
|
---|
125 | data.set_peek()
|
---|
126 | app.model.context.add( data )
|
---|
127 | app.model.context.flush()
|
---|
128 | elif fields[0] == "#NewFile":
|
---|
129 | description = fields[1]
|
---|
130 | chr = fields[2]
|
---|
131 | dbkey = fields[3]
|
---|
132 | filepath = fields[4]
|
---|
133 | file_type = fields[5]
|
---|
134 | newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
|
---|
135 | newdata.set_size()
|
---|
136 | newdata.extension = file_type
|
---|
137 | newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
|
---|
138 | app.model.context.add( newdata )
|
---|
139 | app.model.context.flush()
|
---|
140 | app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
|
---|
141 | history.add_dataset( newdata )
|
---|
142 | app.model.context.add( history )
|
---|
143 | app.model.context.flush()
|
---|
144 | try:
|
---|
145 | copyfile(filepath,newdata.file_name)
|
---|
146 | newdata.info = newdata.name
|
---|
147 | newdata.state = jobs.JOB_OK
|
---|
148 | except:
|
---|
149 | newdata.info = "The requested file is missing from the system."
|
---|
150 | newdata.state = jobs.JOB_ERROR
|
---|
151 | newdata.dbkey = dbkey
|
---|
152 | newdata.init_meta()
|
---|
153 | newdata.set_peek()
|
---|
154 | app.model.context.flush()
|
---|