1 | #Dan Blankenberg
|
---|
2 | #Contains file contents and helper methods for HYPHY configurations
|
---|
3 | import tempfile, os
|
---|
4 |
|
---|
5 | def get_filled_temp_filename(contents):
|
---|
6 | fh = tempfile.NamedTemporaryFile('w')
|
---|
7 | filename = fh.name
|
---|
8 | fh.close()
|
---|
9 | fh = open(filename, 'w')
|
---|
10 | fh.write(contents)
|
---|
11 | fh.close()
|
---|
12 | return filename
|
---|
13 |
|
---|
14 | NJ_tree_shared_ibf = """
|
---|
15 | COUNT_GAPS_IN_FREQUENCIES = 0;
|
---|
16 | methodIndex = 1;
|
---|
17 |
|
---|
18 | /*-----------------------------------------------------------------------------------------------------------------------------------------*/
|
---|
19 |
|
---|
20 | function InferTreeTopology(verbFlag)
|
---|
21 | {
|
---|
22 | distanceMatrix = {ds.species,ds.species};
|
---|
23 |
|
---|
24 | MESSAGE_LOGGING = 0;
|
---|
25 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def");
|
---|
26 | InitializeDistances (0);
|
---|
27 |
|
---|
28 | for (i = 0; i<ds.species; i=i+1)
|
---|
29 | {
|
---|
30 | for (j = i+1; j<ds.species; j = j+1)
|
---|
31 | {
|
---|
32 | distanceMatrix[i][j] = ComputeDistanceFormula (i,j);
|
---|
33 | }
|
---|
34 | }
|
---|
35 |
|
---|
36 | MESSAGE_LOGGING = 1;
|
---|
37 | cladesMade = 1;
|
---|
38 |
|
---|
39 |
|
---|
40 | if (ds.species == 2)
|
---|
41 | {
|
---|
42 | d1 = distanceMatrix[0][1]/2;
|
---|
43 | treeNodes = {{0,1,d1__},
|
---|
44 | {1,1,d1__},
|
---|
45 | {2,0,0}};
|
---|
46 |
|
---|
47 | cladesInfo = {{2,0}};
|
---|
48 | }
|
---|
49 | else
|
---|
50 | {
|
---|
51 | if (ds.species == 3)
|
---|
52 | {
|
---|
53 | /* generate least squares estimates here */
|
---|
54 |
|
---|
55 | d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2;
|
---|
56 | d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2;
|
---|
57 | d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2;
|
---|
58 |
|
---|
59 | treeNodes = {{0,1,d1__},
|
---|
60 | {1,1,d2__},
|
---|
61 | {2,1,d3__}
|
---|
62 | {3,0,0}};
|
---|
63 |
|
---|
64 | cladesInfo = {{3,0}};
|
---|
65 | }
|
---|
66 | else
|
---|
67 | {
|
---|
68 | njm = (distanceMatrix > methodIndex)>=ds.species;
|
---|
69 |
|
---|
70 | treeNodes = {2*(ds.species+1),3};
|
---|
71 | cladesInfo = {ds.species-1,2};
|
---|
72 |
|
---|
73 | for (i=Rows(treeNodes)-1; i>=0; i=i-1)
|
---|
74 | {
|
---|
75 | treeNodes[i][0] = njm[i][0];
|
---|
76 | treeNodes[i][1] = njm[i][1];
|
---|
77 | treeNodes[i][2] = njm[i][2];
|
---|
78 | }
|
---|
79 |
|
---|
80 | for (i=Rows(cladesInfo)-1; i>=0; i=i-1)
|
---|
81 | {
|
---|
82 | cladesInfo[i][0] = njm[i][3];
|
---|
83 | cladesInfo[i][1] = njm[i][4];
|
---|
84 | }
|
---|
85 |
|
---|
86 | njm = 0;
|
---|
87 | }
|
---|
88 | }
|
---|
89 | return 1.0;
|
---|
90 | }
|
---|
91 |
|
---|
92 | /*-----------------------------------------------------------------------------------------------------------------------------------------*/
|
---|
93 |
|
---|
94 | function TreeMatrix2TreeString (doLengths)
|
---|
95 | {
|
---|
96 | treeString = "";
|
---|
97 | p = 0;
|
---|
98 | k = 0;
|
---|
99 | m = treeNodes[0][1];
|
---|
100 | n = treeNodes[0][0];
|
---|
101 | treeString*(Rows(treeNodes)*25);
|
---|
102 |
|
---|
103 | while (m)
|
---|
104 | {
|
---|
105 | if (m>p)
|
---|
106 | {
|
---|
107 | if (p)
|
---|
108 | {
|
---|
109 | treeString*",";
|
---|
110 | }
|
---|
111 | for (j=p;j<m;j=j+1)
|
---|
112 | {
|
---|
113 | treeString*"(";
|
---|
114 | }
|
---|
115 | }
|
---|
116 | else
|
---|
117 | {
|
---|
118 | if (m<p)
|
---|
119 | {
|
---|
120 | for (j=m;j<p;j=j+1)
|
---|
121 | {
|
---|
122 | treeString*")";
|
---|
123 | }
|
---|
124 | }
|
---|
125 | else
|
---|
126 | {
|
---|
127 | treeString*",";
|
---|
128 | }
|
---|
129 | }
|
---|
130 | if (n<ds.species)
|
---|
131 | {
|
---|
132 | GetString (nodeName, ds, n);
|
---|
133 | if (doLengths != 1)
|
---|
134 | {
|
---|
135 | treeString*nodeName;
|
---|
136 | }
|
---|
137 | else
|
---|
138 | {
|
---|
139 | treeString*taxonNameMap[nodeName];
|
---|
140 | }
|
---|
141 | }
|
---|
142 | if (doLengths>.5)
|
---|
143 | {
|
---|
144 | nodeName = ":"+treeNodes[k][2];
|
---|
145 | treeString*nodeName;
|
---|
146 | }
|
---|
147 | k=k+1;
|
---|
148 | p=m;
|
---|
149 | n=treeNodes[k][0];
|
---|
150 | m=treeNodes[k][1];
|
---|
151 | }
|
---|
152 |
|
---|
153 | for (j=m;j<p;j=j+1)
|
---|
154 | {
|
---|
155 | treeString*")";
|
---|
156 | }
|
---|
157 |
|
---|
158 | treeString*0;
|
---|
159 | return treeString;
|
---|
160 | }
|
---|
161 | """
|
---|
162 |
|
---|
163 | def get_NJ_tree (filename):
|
---|
164 | return """
|
---|
165 | DISTANCE_PROMPTS = 1;
|
---|
166 | ExecuteAFile ("%s");
|
---|
167 |
|
---|
168 | DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
|
---|
169 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
170 |
|
---|
171 | /* do sequence to branch map */
|
---|
172 |
|
---|
173 | taxonNameMap = {};
|
---|
174 |
|
---|
175 | for (k=0; k<ds.species; k=k+1)
|
---|
176 | {
|
---|
177 | GetString (thisName, ds,k);
|
---|
178 | shortName = (thisName^{{"\\\\..+",""}})&&1;
|
---|
179 | taxonNameMap[shortName] = thisName;
|
---|
180 | SetParameter (ds,k,shortName);
|
---|
181 | }
|
---|
182 |
|
---|
183 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
184 | InferTreeTopology (0);
|
---|
185 | treeString = TreeMatrix2TreeString (1);
|
---|
186 |
|
---|
187 | fprintf (PROMPT_FOR_FILE, CLEAR_FILE, treeString);
|
---|
188 | fscanf (stdin, "String", ps_file);
|
---|
189 |
|
---|
190 | if (Abs(ps_file))
|
---|
191 | {
|
---|
192 | treeString = TreeMatrix2TreeString (2);
|
---|
193 | UseModel (USE_NO_MODEL);
|
---|
194 | Tree givenTree = treeString;
|
---|
195 | baseHeight = TipCount (givenTree)*28;
|
---|
196 | TREE_OUTPUT_OPTIONS = {};
|
---|
197 | TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
|
---|
198 | baseWidth = 0;
|
---|
199 | treeAVL = givenTree^0;
|
---|
200 | drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
|
---|
201 | for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
|
---|
202 | {
|
---|
203 | nodeName = (treeAVL[k3])["Name"];
|
---|
204 | if(Abs((treeAVL[k3])["Children"]) == 0)
|
---|
205 | {
|
---|
206 | mySpecs = {};
|
---|
207 | mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
|
---|
208 | baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
|
---|
209 | }
|
---|
210 | }
|
---|
211 | baseWidth = 40*baseWidth;
|
---|
212 |
|
---|
213 | fprintf (ps_file, CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
|
---|
214 | }
|
---|
215 | """ % (filename)
|
---|
216 |
|
---|
217 | def get_NJ_treeMF (filename):
|
---|
218 | return """
|
---|
219 | ExecuteAFile ("%s");
|
---|
220 |
|
---|
221 | VERBOSITY_LEVEL = -1;
|
---|
222 | fscanf (PROMPT_FOR_FILE, "Lines", inLines);
|
---|
223 |
|
---|
224 | _linesIn = Columns (inLines);
|
---|
225 | isomorphicTreesBySequenceCount = {};
|
---|
226 |
|
---|
227 | /*---------------------------------------------------------*/
|
---|
228 |
|
---|
229 | _currentGene = 1;
|
---|
230 | _currentState = 0;
|
---|
231 | geneSeqs = "";
|
---|
232 | geneSeqs * 128;
|
---|
233 |
|
---|
234 | fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN);
|
---|
235 | treeOutFile = LAST_FILE_PATH;
|
---|
236 |
|
---|
237 | fscanf (stdin,"String", ps_file);
|
---|
238 | if (Abs(ps_file))
|
---|
239 | {
|
---|
240 | fprintf (ps_file, CLEAR_FILE, KEEP_OPEN);
|
---|
241 | }
|
---|
242 |
|
---|
243 | for (l=0; l<_linesIn; l=l+1)
|
---|
244 | {
|
---|
245 | if (Abs(inLines[l]) == 0)
|
---|
246 | {
|
---|
247 | if (_currentState == 1)
|
---|
248 | {
|
---|
249 | geneSeqs * 0;
|
---|
250 | DataSet ds = ReadFromString (geneSeqs);
|
---|
251 | _processAGene (_currentGene,treeOutFile,ps_file);
|
---|
252 | geneSeqs * 128;
|
---|
253 | _currentGene = _currentGene + 1;
|
---|
254 | }
|
---|
255 | }
|
---|
256 | else
|
---|
257 | {
|
---|
258 | if (_currentState == 0)
|
---|
259 | {
|
---|
260 | _currentState = 1;
|
---|
261 | }
|
---|
262 | geneSeqs * inLines[l];
|
---|
263 | geneSeqs * "\\n";
|
---|
264 | }
|
---|
265 | }
|
---|
266 |
|
---|
267 |
|
---|
268 | if (_currentState == 1)
|
---|
269 | {
|
---|
270 | geneSeqs * 0;
|
---|
271 | if (Abs(geneSeqs))
|
---|
272 | {
|
---|
273 | DataSet ds = ReadFromString (geneSeqs);
|
---|
274 | _processAGene (_currentGene,treeOutFile,ps_file);
|
---|
275 | }
|
---|
276 | }
|
---|
277 |
|
---|
278 | fprintf (treeOutFile,CLOSE_FILE);
|
---|
279 | if (Abs(ps_file))
|
---|
280 | {
|
---|
281 | fprintf (ps_file,CLOSE_FILE);
|
---|
282 | }
|
---|
283 | /*---------------------------------------------------------*/
|
---|
284 |
|
---|
285 | function _processAGene (_geneID, nwk_file, ps_file)
|
---|
286 | {
|
---|
287 | if (ds.species == 1)
|
---|
288 | {
|
---|
289 | fprintf (nwk_file, _geneID-1, "\\tNone \\tNone\\n");
|
---|
290 | return 0;
|
---|
291 |
|
---|
292 | }
|
---|
293 |
|
---|
294 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
295 |
|
---|
296 | /* do sequence to branch map */
|
---|
297 |
|
---|
298 | taxonNameMap = {};
|
---|
299 |
|
---|
300 | for (k=0; k<ds.species; k=k+1)
|
---|
301 | {
|
---|
302 | GetString (thisName, ds,k);
|
---|
303 | shortName = (thisName^{{"\\\\..+",""}});
|
---|
304 | taxonNameMap[shortName] = thisName;
|
---|
305 | SetParameter (ds,k,shortName);
|
---|
306 | }
|
---|
307 |
|
---|
308 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
309 | DISTANCE_PROMPTS = (_geneID==1);
|
---|
310 |
|
---|
311 | InferTreeTopology (0);
|
---|
312 | baseTree = TreeMatrix2TreeString (0);
|
---|
313 | UseModel (USE_NO_MODEL);
|
---|
314 |
|
---|
315 | Tree baseTop = baseTree;
|
---|
316 |
|
---|
317 | /* standardize this top */
|
---|
318 |
|
---|
319 | for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1)
|
---|
320 | {
|
---|
321 | testString = (isomorphicTreesBySequenceCount[filteredData.species])[k];
|
---|
322 | Tree testTree = testString;
|
---|
323 | if (testTree == baseTop)
|
---|
324 | {
|
---|
325 | baseTree = testString;
|
---|
326 | break;
|
---|
327 | }
|
---|
328 | }
|
---|
329 | if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species]))
|
---|
330 | {
|
---|
331 | if (k==0)
|
---|
332 | {
|
---|
333 | isomorphicTreesBySequenceCount[filteredData.species] = {};
|
---|
334 | }
|
---|
335 | (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree;
|
---|
336 | }
|
---|
337 |
|
---|
338 | fprintf (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n");
|
---|
339 | if (Abs(ps_file))
|
---|
340 | {
|
---|
341 | treeString = TreeMatrix2TreeString (2);
|
---|
342 | UseModel (USE_NO_MODEL);
|
---|
343 | Tree givenTree = treeString;
|
---|
344 | baseHeight = TipCount (givenTree)*28;
|
---|
345 | TREE_OUTPUT_OPTIONS = {};
|
---|
346 | TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
|
---|
347 | baseWidth = 0;
|
---|
348 | treeAVL = givenTree^0;
|
---|
349 | drawLetter = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
|
---|
350 | for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
|
---|
351 | {
|
---|
352 | nodeName = (treeAVL[k3])["Name"];
|
---|
353 | if(Abs((treeAVL[k3])["Children"]) == 0)
|
---|
354 | {
|
---|
355 | mySpecs = {};
|
---|
356 | mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
|
---|
357 | baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
|
---|
358 | }
|
---|
359 | }
|
---|
360 | baseWidth = 40*baseWidth;
|
---|
361 |
|
---|
362 | fprintf (stdout, _geneID, ":", givenTree,"\\n");
|
---|
363 | fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
|
---|
364 | }
|
---|
365 | return 0;
|
---|
366 | }
|
---|
367 | """ % (filename)
|
---|
368 |
|
---|
369 | BranchLengthsMF = """
|
---|
370 | VERBOSITY_LEVEL = -1;
|
---|
371 |
|
---|
372 | fscanf (PROMPT_FOR_FILE, "Lines", inLines);
|
---|
373 |
|
---|
374 |
|
---|
375 |
|
---|
376 | _linesIn = Columns (inLines);
|
---|
377 |
|
---|
378 |
|
---|
379 |
|
---|
380 | /*---------------------------------------------------------*/
|
---|
381 |
|
---|
382 |
|
---|
383 |
|
---|
384 | _currentGene = 1;
|
---|
385 |
|
---|
386 | _currentState = 0;
|
---|
387 |
|
---|
388 | geneSeqs = "";
|
---|
389 |
|
---|
390 | geneSeqs * 128;
|
---|
391 |
|
---|
392 |
|
---|
393 |
|
---|
394 | for (l=0; l<_linesIn; l=l+1)
|
---|
395 |
|
---|
396 | {
|
---|
397 |
|
---|
398 | if (Abs(inLines[l]) == 0)
|
---|
399 |
|
---|
400 | {
|
---|
401 |
|
---|
402 | if (_currentState == 1)
|
---|
403 |
|
---|
404 | {
|
---|
405 |
|
---|
406 | geneSeqs * 0;
|
---|
407 |
|
---|
408 | DataSet ds = ReadFromString (geneSeqs);
|
---|
409 |
|
---|
410 | _processAGene (_currentGene);
|
---|
411 |
|
---|
412 | geneSeqs * 128;
|
---|
413 |
|
---|
414 | _currentGene = _currentGene + 1;
|
---|
415 |
|
---|
416 | }
|
---|
417 |
|
---|
418 | }
|
---|
419 |
|
---|
420 | else
|
---|
421 |
|
---|
422 | {
|
---|
423 |
|
---|
424 | if (_currentState == 0)
|
---|
425 |
|
---|
426 | {
|
---|
427 |
|
---|
428 | _currentState = 1;
|
---|
429 |
|
---|
430 | }
|
---|
431 |
|
---|
432 | geneSeqs * inLines[l];
|
---|
433 |
|
---|
434 | geneSeqs * "\\n";
|
---|
435 |
|
---|
436 | }
|
---|
437 |
|
---|
438 | }
|
---|
439 |
|
---|
440 |
|
---|
441 |
|
---|
442 | if (_currentState == 1)
|
---|
443 |
|
---|
444 | {
|
---|
445 |
|
---|
446 | geneSeqs * 0;
|
---|
447 |
|
---|
448 | if (Abs(geneSeqs))
|
---|
449 |
|
---|
450 | {
|
---|
451 |
|
---|
452 | DataSet ds = ReadFromString (geneSeqs);
|
---|
453 |
|
---|
454 | _processAGene (_currentGene);
|
---|
455 |
|
---|
456 | }
|
---|
457 |
|
---|
458 | }
|
---|
459 |
|
---|
460 |
|
---|
461 |
|
---|
462 | fprintf (resultFile,CLOSE_FILE);
|
---|
463 |
|
---|
464 |
|
---|
465 |
|
---|
466 | /*---------------------------------------------------------*/
|
---|
467 |
|
---|
468 |
|
---|
469 |
|
---|
470 | function _processAGene (_geneID)
|
---|
471 |
|
---|
472 | {
|
---|
473 |
|
---|
474 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
475 |
|
---|
476 | if (_currentGene == 1)
|
---|
477 |
|
---|
478 | {
|
---|
479 |
|
---|
480 | SelectTemplateModel (filteredData);
|
---|
481 |
|
---|
482 |
|
---|
483 |
|
---|
484 | SetDialogPrompt ("Tree file");
|
---|
485 |
|
---|
486 | fscanf (PROMPT_FOR_FILE, "Tree", givenTree);
|
---|
487 |
|
---|
488 | fscanf (stdin, "String", resultFile);
|
---|
489 |
|
---|
490 |
|
---|
491 |
|
---|
492 | /* do sequence to branch map */
|
---|
493 |
|
---|
494 |
|
---|
495 |
|
---|
496 | validNames = {};
|
---|
497 |
|
---|
498 | taxonNameMap = {};
|
---|
499 |
|
---|
500 |
|
---|
501 |
|
---|
502 | for (k=0; k<TipCount(givenTree); k=k+1)
|
---|
503 |
|
---|
504 | {
|
---|
505 |
|
---|
506 | validNames[TipName(givenTree,k)&&1] = 1;
|
---|
507 |
|
---|
508 | }
|
---|
509 |
|
---|
510 |
|
---|
511 |
|
---|
512 | for (k=0; k<BranchCount(givenTree); k=k+1)
|
---|
513 |
|
---|
514 | {
|
---|
515 |
|
---|
516 | thisName = BranchName(givenTree,k);
|
---|
517 |
|
---|
518 | taxonNameMap[thisName&&1] = thisName;
|
---|
519 |
|
---|
520 | }
|
---|
521 |
|
---|
522 |
|
---|
523 |
|
---|
524 | storeValidNames = validNames;
|
---|
525 |
|
---|
526 | fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n");
|
---|
527 |
|
---|
528 | }
|
---|
529 |
|
---|
530 | else
|
---|
531 |
|
---|
532 | {
|
---|
533 |
|
---|
534 | HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1);
|
---|
535 |
|
---|
536 | validNames = storeValidNames;
|
---|
537 |
|
---|
538 | }
|
---|
539 |
|
---|
540 |
|
---|
541 |
|
---|
542 | for (k=0; k<ds.species; k=k+1)
|
---|
543 |
|
---|
544 | {
|
---|
545 |
|
---|
546 | GetString (thisName, ds,k);
|
---|
547 |
|
---|
548 | shortName = (thisName^{{"\\\\..+",""}})&&1;
|
---|
549 |
|
---|
550 | if (validNames[shortName])
|
---|
551 |
|
---|
552 | {
|
---|
553 |
|
---|
554 | taxonNameMap[shortName] = thisName;
|
---|
555 |
|
---|
556 | validNames - (shortName);
|
---|
557 |
|
---|
558 | SetParameter (ds,k,shortName);
|
---|
559 |
|
---|
560 | }
|
---|
561 |
|
---|
562 | else
|
---|
563 |
|
---|
564 | {
|
---|
565 |
|
---|
566 | fprintf (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n");
|
---|
567 |
|
---|
568 | return 0;
|
---|
569 |
|
---|
570 | }
|
---|
571 |
|
---|
572 | }
|
---|
573 |
|
---|
574 |
|
---|
575 |
|
---|
576 | /* */
|
---|
577 |
|
---|
578 |
|
---|
579 |
|
---|
580 | LikelihoodFunction lf = (filteredData,givenTree);
|
---|
581 |
|
---|
582 | Optimize (res,lf);
|
---|
583 |
|
---|
584 |
|
---|
585 |
|
---|
586 | timer = Time(0)-timer;
|
---|
587 |
|
---|
588 |
|
---|
589 |
|
---|
590 | branchNames = BranchName (givenTree,-1);
|
---|
591 |
|
---|
592 | branchLengths = BranchLength (givenTree,-1);
|
---|
593 |
|
---|
594 |
|
---|
595 |
|
---|
596 |
|
---|
597 |
|
---|
598 | for (k=0; k<Columns(branchNames)-1; k=k+1)
|
---|
599 |
|
---|
600 | {
|
---|
601 |
|
---|
602 | COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
|
---|
603 |
|
---|
604 | COVARIANCE_PRECISION = 0.95;
|
---|
605 |
|
---|
606 | CovarianceMatrix (cmx,lf);
|
---|
607 |
|
---|
608 | if (k==0)
|
---|
609 |
|
---|
610 | {
|
---|
611 |
|
---|
612 | /* compute a scaling factor */
|
---|
613 |
|
---|
614 | ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
|
---|
615 |
|
---|
616 | scaleFactor = BranchLength (givenTree,0);
|
---|
617 |
|
---|
618 | ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
|
---|
619 |
|
---|
620 | }
|
---|
621 |
|
---|
622 | fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
|
---|
623 |
|
---|
624 | }
|
---|
625 |
|
---|
626 |
|
---|
627 |
|
---|
628 | ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
|
---|
629 |
|
---|
630 | global treeScaler = 1;
|
---|
631 |
|
---|
632 | ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
|
---|
633 |
|
---|
634 | COVARIANCE_PARAMETER = "treeScaler";
|
---|
635 |
|
---|
636 | COVARIANCE_PRECISION = 0.95;
|
---|
637 |
|
---|
638 | CovarianceMatrix (cmx,lf);
|
---|
639 |
|
---|
640 | fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
|
---|
641 |
|
---|
642 | ClearConstraints (givenTree);
|
---|
643 |
|
---|
644 | return 0;
|
---|
645 |
|
---|
646 | }
|
---|
647 | """
|
---|
648 |
|
---|
649 | BranchLengths = """
|
---|
650 | DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
|
---|
651 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
652 |
|
---|
653 | SelectTemplateModel (filteredData);
|
---|
654 |
|
---|
655 | SetDialogPrompt ("Tree file");
|
---|
656 | fscanf (PROMPT_FOR_FILE, "Tree", givenTree);
|
---|
657 | fscanf (stdin, "String", resultFile);
|
---|
658 |
|
---|
659 | /* do sequence to branch map */
|
---|
660 |
|
---|
661 | validNames = {};
|
---|
662 | taxonNameMap = {};
|
---|
663 |
|
---|
664 | for (k=0; k<TipCount(givenTree); k=k+1)
|
---|
665 | {
|
---|
666 | validNames[TipName(givenTree,k)&&1] = 1;
|
---|
667 | }
|
---|
668 |
|
---|
669 | for (k=0; k<BranchCount(givenTree); k=k+1)
|
---|
670 | {
|
---|
671 | thisName = BranchName(givenTree,k);
|
---|
672 | taxonNameMap[thisName&&1] = thisName;
|
---|
673 | }
|
---|
674 |
|
---|
675 | for (k=0; k<ds.species; k=k+1)
|
---|
676 | {
|
---|
677 | GetString (thisName, ds,k);
|
---|
678 | shortName = (thisName^{{"\\\\..+",""}})&&1;
|
---|
679 | if (validNames[shortName])
|
---|
680 | {
|
---|
681 | taxonNameMap[shortName] = thisName;
|
---|
682 | validNames - (shortName);
|
---|
683 | SetParameter (ds,k,shortName);
|
---|
684 | }
|
---|
685 | else
|
---|
686 | {
|
---|
687 | fprintf (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree);
|
---|
688 | return 0;
|
---|
689 | }
|
---|
690 | }
|
---|
691 |
|
---|
692 | /* */
|
---|
693 |
|
---|
694 | LikelihoodFunction lf = (filteredData,givenTree);
|
---|
695 |
|
---|
696 | Optimize (res,lf);
|
---|
697 |
|
---|
698 | timer = Time(0)-timer;
|
---|
699 |
|
---|
700 | branchNames = BranchName (givenTree,-1);
|
---|
701 | branchLengths = BranchLength (givenTree,-1);
|
---|
702 |
|
---|
703 | fprintf (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n");
|
---|
704 |
|
---|
705 | for (k=0; k<Columns(branchNames)-1; k=k+1)
|
---|
706 | {
|
---|
707 | COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
|
---|
708 | COVARIANCE_PRECISION = 0.95;
|
---|
709 | CovarianceMatrix (cmx,lf);
|
---|
710 | if (k==0)
|
---|
711 | {
|
---|
712 | /* compute a scaling factor */
|
---|
713 | ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
|
---|
714 | scaleFactor = BranchLength (givenTree,0);
|
---|
715 | ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
|
---|
716 | }
|
---|
717 | fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
|
---|
718 | }
|
---|
719 |
|
---|
720 | ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
|
---|
721 | global treeScaler = 1;
|
---|
722 | ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
|
---|
723 | COVARIANCE_PARAMETER = "treeScaler";
|
---|
724 | COVARIANCE_PRECISION = 0.95;
|
---|
725 | CovarianceMatrix (cmx,lf);
|
---|
726 | ClearConstraints (givenTree);
|
---|
727 | fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
|
---|
728 | fprintf (resultFile,CLOSE_FILE);
|
---|
729 | """
|
---|
730 |
|
---|
731 | SimpleLocalFitter = """
|
---|
732 | VERBOSITY_LEVEL = -1;
|
---|
733 | COUNT_GAPS_IN_FREQUENCIES = 0;
|
---|
734 |
|
---|
735 | /*---------------------------------------------------------*/
|
---|
736 |
|
---|
737 | function returnResultHeaders (dummy)
|
---|
738 | {
|
---|
739 | _analysisHeaders = {};
|
---|
740 | _analysisHeaders[0] = "BLOCK";
|
---|
741 | _analysisHeaders[1] = "BP";
|
---|
742 | _analysisHeaders[2] = "S_sites";
|
---|
743 | _analysisHeaders[3] = "NS_sites";
|
---|
744 | _analysisHeaders[4] = "Stop_codons";
|
---|
745 | _analysisHeaders[5] = "LogL";
|
---|
746 | _analysisHeaders[6] = "AC";
|
---|
747 | _analysisHeaders[7] = "AT";
|
---|
748 | _analysisHeaders[8] = "CG";
|
---|
749 | _analysisHeaders[9] = "CT";
|
---|
750 | _analysisHeaders[10] = "GT";
|
---|
751 | _analysisHeaders[11] = "Tree";
|
---|
752 |
|
---|
753 | for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
|
---|
754 | {
|
---|
755 | branchName = treeBranchNames[_biterator];
|
---|
756 |
|
---|
757 | _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")";
|
---|
758 | _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")";
|
---|
759 | _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")";
|
---|
760 | _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")";
|
---|
761 | }
|
---|
762 |
|
---|
763 | return _analysisHeaders;
|
---|
764 | }
|
---|
765 |
|
---|
766 | /*---------------------------------------------------------*/
|
---|
767 |
|
---|
768 | function runAGeneFit (myID)
|
---|
769 | {
|
---|
770 | DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
|
---|
771 |
|
---|
772 | if (_currentGene==1)
|
---|
773 | {
|
---|
774 | _MG94stdinOverload = {};
|
---|
775 | _MG94stdinOverload ["0"] = "Local";
|
---|
776 | _MG94stdinOverload ["1"] = modelSpecString;
|
---|
777 |
|
---|
778 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
|
---|
779 | _MG94stdinOverload);
|
---|
780 |
|
---|
781 | Tree codonTree = treeString;
|
---|
782 | }
|
---|
783 | else
|
---|
784 | {
|
---|
785 | HarvestFrequencies (observedFreq,filteredData,3,1,1);
|
---|
786 | MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq);
|
---|
787 | vectorOfFrequencies = BuildCodonFrequencies (observedFreq);
|
---|
788 | Model MG94customModel = (MG94custom,vectorOfFrequencies,0);
|
---|
789 |
|
---|
790 | Tree codonTree = treeString;
|
---|
791 | }
|
---|
792 |
|
---|
793 | LikelihoodFunction lf = (filteredData,codonTree);
|
---|
794 |
|
---|
795 | Optimize (res,lf);
|
---|
796 |
|
---|
797 | _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
|
---|
798 | _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
|
---|
799 |
|
---|
800 |
|
---|
801 | _returnMe = {};
|
---|
802 | _returnMe ["BLOCK"] = myID;
|
---|
803 | _returnMe ["LogL"] = res[1][0];
|
---|
804 | _returnMe ["BP"] = _snsAVL ["Sites"];
|
---|
805 | _returnMe ["S_sites"] = _snsAVL ["SSites"];
|
---|
806 | _returnMe ["NS_sites"] = _snsAVL ["NSSites"];
|
---|
807 | _returnMe ["AC"] = AC;
|
---|
808 | _returnMe ["AT"] = AT;
|
---|
809 | _returnMe ["CG"] = CG;
|
---|
810 | _returnMe ["CT"] = CT;
|
---|
811 | _returnMe ["GT"] = GT;
|
---|
812 | _returnMe ["Tree"] = Format(codonTree,0,1);
|
---|
813 |
|
---|
814 | for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
|
---|
815 | {
|
---|
816 | branchName = treeBranchNames[_biterator];
|
---|
817 |
|
---|
818 | _returnMe ["length("+branchName+")"] = (_cL["Total"])[_biterator];
|
---|
819 | _returnMe ["dS("+branchName+")"] = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]);
|
---|
820 | _returnMe ["dN("+branchName+")"] = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]);
|
---|
821 |
|
---|
822 | ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);");
|
---|
823 | _returnMe ["omega("+branchName+")"] = _lom;
|
---|
824 | }
|
---|
825 |
|
---|
826 | return _returnMe;
|
---|
827 | }
|
---|
828 |
|
---|
829 | """
|
---|
830 |
|
---|
831 | SimpleGlobalFitter = """
|
---|
832 | VERBOSITY_LEVEL = -1;
|
---|
833 | COUNT_GAPS_IN_FREQUENCIES = 0;
|
---|
834 |
|
---|
835 | /*---------------------------------------------------------*/
|
---|
836 |
|
---|
837 | function returnResultHeaders (dummy)
|
---|
838 | {
|
---|
839 | _analysisHeaders = {};
|
---|
840 | _analysisHeaders[0] = "BLOCK";
|
---|
841 | _analysisHeaders[1] = "BP";
|
---|
842 | _analysisHeaders[2] = "S_sites";
|
---|
843 | _analysisHeaders[3] = "NS_sites";
|
---|
844 | _analysisHeaders[4] = "Stop_codons";
|
---|
845 | _analysisHeaders[5] = "LogL";
|
---|
846 | _analysisHeaders[6] = "omega";
|
---|
847 | _analysisHeaders[7] = "omega_range";
|
---|
848 | _analysisHeaders[8] = "AC";
|
---|
849 | _analysisHeaders[9] = "AT";
|
---|
850 | _analysisHeaders[10] = "CG";
|
---|
851 | _analysisHeaders[11] = "CT";
|
---|
852 | _analysisHeaders[12] = "GT";
|
---|
853 | _analysisHeaders[13] = "Tree";
|
---|
854 |
|
---|
855 | return _analysisHeaders;
|
---|
856 | }
|
---|
857 |
|
---|
858 | /*---------------------------------------------------------*/
|
---|
859 |
|
---|
860 | function runAGeneFit (myID)
|
---|
861 | {
|
---|
862 | fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n");
|
---|
863 | taxonNameMap = {};
|
---|
864 |
|
---|
865 | for (k=0; k<ds.species; k=k+1)
|
---|
866 | {
|
---|
867 | GetString (thisName, ds,k);
|
---|
868 | shortName = (thisName^{{"\\\\..+",""}})&&1;
|
---|
869 | taxonNameMap[shortName] = thisName;
|
---|
870 | SetParameter (ds,k,shortName);
|
---|
871 | }
|
---|
872 |
|
---|
873 | DataSetFilter filteredData = CreateFilter (ds,1);
|
---|
874 | _nucSites = filteredData.sites;
|
---|
875 |
|
---|
876 | if (Abs(treeString))
|
---|
877 | {
|
---|
878 | givenTreeString = treeString;
|
---|
879 | }
|
---|
880 | else
|
---|
881 | {
|
---|
882 | if (_currentGene==1)
|
---|
883 | {
|
---|
884 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf");
|
---|
885 | }
|
---|
886 | givenTreeString = InferTreeTopology (0);
|
---|
887 | treeString = "";
|
---|
888 | }
|
---|
889 |
|
---|
890 | DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
|
---|
891 |
|
---|
892 | if (_currentGene==1)
|
---|
893 | {
|
---|
894 | _MG94stdinOverload = {};
|
---|
895 | _MG94stdinOverload ["0"] = "Global";
|
---|
896 | _MG94stdinOverload ["1"] = modelSpecString;
|
---|
897 |
|
---|
898 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
|
---|
899 | _MG94stdinOverload);
|
---|
900 |
|
---|
901 | Tree codonTree = givenTreeString;
|
---|
902 | }
|
---|
903 | else
|
---|
904 | {
|
---|
905 | HarvestFrequencies (observedFreq,filteredData,3,1,1);
|
---|
906 | MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq);
|
---|
907 | vectorOfFrequencies = BuildCodonFrequencies (observedFreq);
|
---|
908 | Model MG94customModel = (MG94custom,vectorOfFrequencies,0);
|
---|
909 |
|
---|
910 | Tree codonTree = givenTreeString;
|
---|
911 | }
|
---|
912 |
|
---|
913 | LikelihoodFunction lf = (filteredData,codonTree);
|
---|
914 |
|
---|
915 | Optimize (res,lf);
|
---|
916 |
|
---|
917 | _snsAVL = _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
|
---|
918 | _cL = ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
|
---|
919 |
|
---|
920 |
|
---|
921 | _returnMe = {};
|
---|
922 | _returnMe ["BLOCK"] = myID;
|
---|
923 | _returnMe ["LogL"] = res[1][0];
|
---|
924 | _returnMe ["BP"] = _snsAVL ["Sites"];
|
---|
925 | _returnMe ["S_sites"] = _snsAVL ["SSites"];
|
---|
926 | _returnMe ["NS_sites"] = _snsAVL ["NSSites"];
|
---|
927 | _returnMe ["Stop_codons"] = (_nucSites-filteredData.sites*3)$3;
|
---|
928 | _returnMe ["AC"] = AC;
|
---|
929 | _returnMe ["AT"] = AT;
|
---|
930 | _returnMe ["CG"] = CG;
|
---|
931 | _returnMe ["CT"] = CT;
|
---|
932 | _returnMe ["GT"] = GT;
|
---|
933 | _returnMe ["omega"] = R;
|
---|
934 | COVARIANCE_PARAMETER = "R";
|
---|
935 | COVARIANCE_PRECISION = 0.95;
|
---|
936 | CovarianceMatrix (cmx,lf);
|
---|
937 | _returnMe ["omega_range"] = ""+cmx[0]+"-"+cmx[2];
|
---|
938 | _returnMe ["Tree"] = Format(codonTree,0,1);
|
---|
939 |
|
---|
940 |
|
---|
941 | return _returnMe;
|
---|
942 | }
|
---|
943 | """
|
---|
944 |
|
---|
945 | FastaReader = """
|
---|
946 | fscanf (stdin, "String", _coreAnalysis);
|
---|
947 | fscanf (stdin, "String", _outputDriver);
|
---|
948 |
|
---|
949 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def");
|
---|
950 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf");
|
---|
951 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf");
|
---|
952 | ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf");
|
---|
953 |
|
---|
954 | SetDialogPrompt ("Tree file");
|
---|
955 | fscanf (PROMPT_FOR_FILE, "Tree", givenTree);
|
---|
956 |
|
---|
957 | treeBranchNames = BranchName (givenTree,-1);
|
---|
958 | treeBranchCount = Columns (treeBranchNames)-1;
|
---|
959 | treeString = Format (givenTree,1,1);
|
---|
960 |
|
---|
961 | SetDialogPrompt ("Multiple gene FASTA file");
|
---|
962 | fscanf (PROMPT_FOR_FILE, "Lines", inLines);
|
---|
963 | fscanf (stdin, "String", modelSpecString);
|
---|
964 | fscanf (stdin, "String", _outPath);
|
---|
965 |
|
---|
966 | ExecuteAFile (_outputDriver);
|
---|
967 | ExecuteAFile (_coreAnalysis);
|
---|
968 |
|
---|
969 | /*---------------------------------------------------------*/
|
---|
970 |
|
---|
971 | _linesIn = Columns (inLines);
|
---|
972 | _currentGene = 1;
|
---|
973 | _currentState = 0;
|
---|
974 | /* 0 - waiting for a non-empty line */
|
---|
975 | /* 1 - reading files */
|
---|
976 |
|
---|
977 | geneSeqs = "";
|
---|
978 | geneSeqs * 0;
|
---|
979 |
|
---|
980 | _prepareFileOutput (_outPath);
|
---|
981 |
|
---|
982 | for (l=0; l<_linesIn; l=l+1)
|
---|
983 | {
|
---|
984 | if (Abs(inLines[l]) == 0)
|
---|
985 | {
|
---|
986 | if (_currentState == 1)
|
---|
987 | {
|
---|
988 | geneSeqs * 0;
|
---|
989 | DataSet ds = ReadFromString (geneSeqs);
|
---|
990 | _processAGene (ds.species == treeBranchCount,_currentGene);
|
---|
991 | geneSeqs * 128;
|
---|
992 | _currentGene = _currentGene + 1;
|
---|
993 | }
|
---|
994 | }
|
---|
995 | else
|
---|
996 | {
|
---|
997 | if (_currentState == 0)
|
---|
998 | {
|
---|
999 | _currentState = 1;
|
---|
1000 | }
|
---|
1001 | geneSeqs * inLines[l];
|
---|
1002 | geneSeqs * "\\n";
|
---|
1003 | }
|
---|
1004 | }
|
---|
1005 |
|
---|
1006 | if (_currentState == 1)
|
---|
1007 | {
|
---|
1008 | geneSeqs * 0;
|
---|
1009 | DataSet ds = ReadFromString (geneSeqs);
|
---|
1010 | _processAGene (ds.species == treeBranchCount,_currentGene);
|
---|
1011 | }
|
---|
1012 |
|
---|
1013 | _finishFileOutput (0);
|
---|
1014 | """
|
---|
1015 |
|
---|
1016 | TabWriter = """
|
---|
1017 | /*---------------------------------------------------------*/
|
---|
1018 | function _prepareFileOutput (_outPath)
|
---|
1019 | {
|
---|
1020 | _outputFilePath = _outPath;
|
---|
1021 |
|
---|
1022 | _returnHeaders = returnResultHeaders(0);
|
---|
1023 |
|
---|
1024 | fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]);
|
---|
1025 | for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
|
---|
1026 | {
|
---|
1027 | fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]);
|
---|
1028 | }
|
---|
1029 |
|
---|
1030 |
|
---|
1031 |
|
---|
1032 | fprintf (_outputFilePath,"\\n");
|
---|
1033 | return 0;
|
---|
1034 | }
|
---|
1035 |
|
---|
1036 | /*---------------------------------------------------------*/
|
---|
1037 |
|
---|
1038 | function _processAGene (valid, _geneID)
|
---|
1039 | {
|
---|
1040 | if (valid)
|
---|
1041 | {
|
---|
1042 | returnValue = runAGeneFit (_geneID);
|
---|
1043 | fprintf (_outputFilePath, returnValue[_returnHeaders[0]]);
|
---|
1044 | for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
|
---|
1045 | {
|
---|
1046 | fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]);
|
---|
1047 | }
|
---|
1048 | fprintf (_outputFilePath, "\\n");
|
---|
1049 | }
|
---|
1050 | /*
|
---|
1051 | else
|
---|
1052 | {
|
---|
1053 | fprintf (_outputFilePath,
|
---|
1054 | _geneID, ", Incorrect number of sequences\\n");
|
---|
1055 | }
|
---|
1056 | */
|
---|
1057 | _currentState = 0;
|
---|
1058 | return 0;
|
---|
1059 | }
|
---|
1060 |
|
---|
1061 | /*---------------------------------------------------------*/
|
---|
1062 | function _finishFileOutput (dummy)
|
---|
1063 | {
|
---|
1064 | return 0;
|
---|
1065 | }
|
---|
1066 | """
|
---|
1067 |
|
---|
1068 | def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ):
|
---|
1069 | contents = """
|
---|
1070 | _genomeScreenOptions = {};
|
---|
1071 |
|
---|
1072 | /* all paths are either absolute or relative
|
---|
1073 | to the DATA READER */
|
---|
1074 |
|
---|
1075 | _genomeScreenOptions ["0"] = "%s";
|
---|
1076 | /* which analysis to run on each gene; */
|
---|
1077 | _genomeScreenOptions ["1"] = "%s";
|
---|
1078 | /* what output to produce; */
|
---|
1079 | _genomeScreenOptions ["2"] = "%s";
|
---|
1080 | /* genetic code */
|
---|
1081 | _genomeScreenOptions ["3"] = "%s";
|
---|
1082 | /* tree file */
|
---|
1083 | _genomeScreenOptions ["4"] = "%s";
|
---|
1084 | /* alignment file */
|
---|
1085 | _genomeScreenOptions ["5"] = "%s";
|
---|
1086 | /* nucleotide bias string; can define any of the 203 models */
|
---|
1087 | _genomeScreenOptions ["6"] = "%s";
|
---|
1088 | /* output csv file */
|
---|
1089 |
|
---|
1090 | ExecuteAFile ("%s", _genomeScreenOptions);
|
---|
1091 | """ % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename )
|
---|
1092 | return get_filled_temp_filename(contents)
|
---|
1093 |
|
---|
1094 |
|
---|
1095 | def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename):
|
---|
1096 | contents = """
|
---|
1097 | _genomeScreenOptions = {};
|
---|
1098 |
|
---|
1099 | /* all paths are either absolute or relative
|
---|
1100 | to the NucDataBranchLengths.bf */
|
---|
1101 |
|
---|
1102 | _genomeScreenOptions ["0"] = "%s";
|
---|
1103 | /* the file to analyze; */
|
---|
1104 | _genomeScreenOptions ["1"] = "CUSTOM";
|
---|
1105 | /* use an arbitrary nucleotide model */
|
---|
1106 | _genomeScreenOptions ["2"] = "%s";
|
---|
1107 | /* which model to use */
|
---|
1108 | _genomeScreenOptions ["3"] = "%s";
|
---|
1109 | /* model options */
|
---|
1110 | _genomeScreenOptions ["4"] = "Estimated";
|
---|
1111 | /* rate parameters */
|
---|
1112 | _genomeScreenOptions ["5"] = "%s";
|
---|
1113 | /* base frequencies */
|
---|
1114 | _genomeScreenOptions ["6"] = "%s";
|
---|
1115 | /* the tree to use; */
|
---|
1116 | _genomeScreenOptions ["7"] = "%s";
|
---|
1117 | /* write .csv output to; */
|
---|
1118 |
|
---|
1119 | ExecuteAFile ("%s", _genomeScreenOptions);
|
---|
1120 | """ % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename)
|
---|
1121 | return get_filled_temp_filename(contents)
|
---|
1122 |
|
---|
1123 |
|
---|
1124 | def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename):
|
---|
1125 | contents = """
|
---|
1126 | _genomeScreenOptions = {};
|
---|
1127 |
|
---|
1128 | /* all paths are either absolute or relative
|
---|
1129 | to the BuildNJTree.bf */
|
---|
1130 |
|
---|
1131 | _genomeScreenOptions ["0"] = "%s";
|
---|
1132 | /* the file to analyze; */
|
---|
1133 | _genomeScreenOptions ["1"] = "%s";
|
---|
1134 | /* pick which distance metric to use; TN93 is a good default */
|
---|
1135 | _genomeScreenOptions ["2"] = "%s";
|
---|
1136 | /* write Newick tree output to; */
|
---|
1137 | _genomeScreenOptions ["3"] = "%s";
|
---|
1138 | /* write a postscript tree file to this file; leave blank to not write a tree */
|
---|
1139 |
|
---|
1140 | ExecuteAFile ("%s", _genomeScreenOptions);
|
---|
1141 | """ % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename)
|
---|
1142 | return get_filled_temp_filename(contents)
|
---|
1143 |
|
---|
1144 |
|
---|
1145 | def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename):
|
---|
1146 | contents = """
|
---|
1147 | _genomeScreenOptions = {};
|
---|
1148 |
|
---|
1149 | /* all paths are either absolute or relative
|
---|
1150 | to the BuildNJTreeMF.bf */
|
---|
1151 |
|
---|
1152 | _genomeScreenOptions ["0"] = "%s";
|
---|
1153 | /* the multiple alignment file to analyze; */
|
---|
1154 | _genomeScreenOptions ["1"] = "%s";
|
---|
1155 | /* write Newick tree output to; */
|
---|
1156 | _genomeScreenOptions ["2"] = "%s";
|
---|
1157 | /* write a postscript tree file to this file; leave blank to not write a tree */
|
---|
1158 | _genomeScreenOptions ["3"] = "%s";
|
---|
1159 | /* pick which distance metric to use; TN93 is a good default */
|
---|
1160 |
|
---|
1161 | ExecuteAFile ("%s", _genomeScreenOptions);
|
---|
1162 | """ % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename)
|
---|
1163 | return get_filled_temp_filename(contents)
|
---|