root/galaxy-central/lib/galaxy/tools/util/hyphy_util.py

リビジョン 2, 31.8 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#Dan Blankenberg
2#Contains file contents and helper methods for HYPHY configurations
3import tempfile, os
4
5def get_filled_temp_filename(contents):
6    fh = tempfile.NamedTemporaryFile('w')
7    filename = fh.name
8    fh.close()
9    fh = open(filename, 'w')
10    fh.write(contents)
11    fh.close()
12    return filename
13
14NJ_tree_shared_ibf = """
15COUNT_GAPS_IN_FREQUENCIES = 0;
16methodIndex                  = 1;
17
18/*-----------------------------------------------------------------------------------------------------------------------------------------*/
19
20function InferTreeTopology(verbFlag)
21{
22    distanceMatrix = {ds.species,ds.species};
23
24    MESSAGE_LOGGING              = 0;
25    ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def");
26    InitializeDistances (0);
27       
28    for (i = 0; i<ds.species; i=i+1)
29    {
30        for (j = i+1; j<ds.species; j = j+1)
31        {
32            distanceMatrix[i][j] = ComputeDistanceFormula (i,j);
33        }
34    }
35
36    MESSAGE_LOGGING              = 1;
37    cladesMade                     = 1;
38   
39
40    if (ds.species == 2)
41    {
42        d1 = distanceMatrix[0][1]/2;
43        treeNodes = {{0,1,d1__},
44                     {1,1,d1__},
45                     {2,0,0}};
46                     
47        cladesInfo = {{2,0}};
48    }
49    else
50    {
51        if (ds.species == 3)
52        {
53            /* generate least squares estimates here */
54           
55            d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2;
56            d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2;
57            d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2;
58           
59            treeNodes = {{0,1,d1__},
60                         {1,1,d2__},
61                         {2,1,d3__}
62                         {3,0,0}};
63                         
64            cladesInfo = {{3,0}};       
65        }
66        else
67        {   
68            njm = (distanceMatrix > methodIndex)>=ds.species;
69               
70            treeNodes         = {2*(ds.species+1),3};
71            cladesInfo        = {ds.species-1,2};
72           
73            for (i=Rows(treeNodes)-1; i>=0; i=i-1)
74            {
75                treeNodes[i][0] = njm[i][0];
76                treeNodes[i][1] = njm[i][1];
77                treeNodes[i][2] = njm[i][2];
78            }
79
80            for (i=Rows(cladesInfo)-1; i>=0; i=i-1)
81            {
82                cladesInfo[i][0] = njm[i][3];
83                cladesInfo[i][1] = njm[i][4];
84            }
85           
86            njm = 0;
87        }
88    }
89    return 1.0;
90}
91
92/*-----------------------------------------------------------------------------------------------------------------------------------------*/
93
94function TreeMatrix2TreeString (doLengths)
95{
96    treeString = "";
97    p = 0;
98    k = 0;
99    m = treeNodes[0][1];
100    n = treeNodes[0][0];
101    treeString*(Rows(treeNodes)*25);
102
103    while (m)
104    {   
105        if (m>p)
106        {
107            if (p)
108            {
109                treeString*",";
110            }
111            for (j=p;j<m;j=j+1)
112            {
113                treeString*"(";
114            }
115        }
116        else
117        {
118            if (m<p)
119            {
120                for (j=m;j<p;j=j+1)
121                {
122                    treeString*")";
123                }
124            }   
125            else
126            {
127                treeString*",";
128            }   
129        }
130        if (n<ds.species)
131        {
132            GetString (nodeName, ds, n);
133            if (doLengths != 1)
134            {
135                treeString*nodeName;           
136            }
137            else
138            {   
139                treeString*taxonNameMap[nodeName];
140            }
141        }
142        if (doLengths>.5)
143        {
144            nodeName = ":"+treeNodes[k][2];
145            treeString*nodeName;
146        }
147        k=k+1;
148        p=m;
149        n=treeNodes[k][0];
150        m=treeNodes[k][1];
151    }
152
153    for (j=m;j<p;j=j+1)
154    {
155        treeString*")";
156    }
157   
158    treeString*0;
159    return treeString;
160}
161"""
162
163def get_NJ_tree (filename):
164    return """
165DISTANCE_PROMPTS            = 1;
166ExecuteAFile ("%s");
167
168DataSet                 ds              = ReadDataFile (PROMPT_FOR_FILE);
169DataSetFilter             filteredData = CreateFilter (ds,1);
170
171/* do sequence to branch map */
172
173taxonNameMap = {};
174
175for (k=0; k<ds.species; k=k+1)
176{
177    GetString         (thisName, ds,k);
178    shortName         = (thisName^{{"\\\\..+",""}})&&1;
179    taxonNameMap[shortName] = thisName;
180    SetParameter (ds,k,shortName);
181}
182
183DataSetFilter             filteredData = CreateFilter (ds,1);
184InferTreeTopology         (0);
185treeString                 = TreeMatrix2TreeString (1);
186
187fprintf                    (PROMPT_FOR_FILE, CLEAR_FILE, treeString);
188fscanf                    (stdin, "String", ps_file);
189
190if (Abs(ps_file))
191{
192    treeString = TreeMatrix2TreeString (2);
193    UseModel (USE_NO_MODEL);
194    Tree givenTree = treeString;
195    baseHeight         = TipCount (givenTree)*28;
196    TREE_OUTPUT_OPTIONS = {};
197    TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
198    baseWidth = 0;
199    treeAVL                = givenTree^0;
200    drawLetter            = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
201    for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
202    {
203        nodeName = (treeAVL[k3])["Name"];
204        if(Abs((treeAVL[k3])["Children"]) == 0)
205        {
206            mySpecs = {};
207            mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
208            baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
209        }
210    }
211    baseWidth = 40*baseWidth;
212   
213    fprintf (ps_file,  CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
214}
215""" % (filename)
216
217def get_NJ_treeMF (filename):
218    return  """
219ExecuteAFile ("%s");
220
221VERBOSITY_LEVEL            = -1;
222fscanf                  (PROMPT_FOR_FILE, "Lines", inLines);
223
224_linesIn                        = Columns (inLines);
225isomorphicTreesBySequenceCount  = {};
226
227/*---------------------------------------------------------*/
228
229_currentGene   = 1;
230_currentState = 0;
231geneSeqs      = "";
232geneSeqs      * 128;
233
234fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN);
235treeOutFile = LAST_FILE_PATH;
236
237fscanf  (stdin,"String", ps_file);
238if (Abs(ps_file))
239{
240    fprintf (ps_file, CLEAR_FILE, KEEP_OPEN);
241}
242
243for (l=0; l<_linesIn; l=l+1)
244{
245    if (Abs(inLines[l]) == 0)
246    {
247        if (_currentState == 1)
248        {
249            geneSeqs      * 0;
250            DataSet       ds            = ReadFromString (geneSeqs);
251            _processAGene (_currentGene,treeOutFile,ps_file);
252            geneSeqs * 128;
253            _currentGene = _currentGene + 1;
254        }
255    }
256    else
257    {
258        if (_currentState == 0)
259        {
260            _currentState = 1;
261        }
262        geneSeqs * inLines[l];
263        geneSeqs * "\\n";
264    }
265}
266
267
268if (_currentState == 1)
269{
270    geneSeqs      * 0;
271    if (Abs(geneSeqs))
272    {
273        DataSet       ds            = ReadFromString (geneSeqs);
274        _processAGene (_currentGene,treeOutFile,ps_file);
275    }
276}
277
278fprintf (treeOutFile,CLOSE_FILE);
279if (Abs(ps_file))
280{
281    fprintf (ps_file,CLOSE_FILE);
282}
283/*---------------------------------------------------------*/
284
285function _processAGene (_geneID, nwk_file, ps_file)
286{
287    if (ds.species == 1)
288    {
289        fprintf                    (nwk_file, _geneID-1, "\\tNone \\tNone\\n");
290        return 0;
291       
292    }
293   
294    DataSetFilter             filteredData = CreateFilter (ds,1);
295
296    /* do sequence to branch map */
297
298    taxonNameMap = {};
299
300    for (k=0; k<ds.species; k=k+1)
301    {
302        GetString         (thisName, ds,k);
303        shortName         = (thisName^{{"\\\\..+",""}});
304        taxonNameMap[shortName] = thisName;
305        SetParameter (ds,k,shortName);
306    }
307
308    DataSetFilter             filteredData = CreateFilter (ds,1);
309    DISTANCE_PROMPTS        = (_geneID==1);
310
311    InferTreeTopology         (0);
312    baseTree                = TreeMatrix2TreeString (0);
313    UseModel                 (USE_NO_MODEL);
314   
315    Tree             baseTop    = baseTree;
316   
317    /* standardize this top */
318   
319    for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1)
320    {
321        testString      = (isomorphicTreesBySequenceCount[filteredData.species])[k];
322        Tree testTree = testString;
323        if (testTree == baseTop)
324        {
325            baseTree = testString;
326            break;
327        }
328    }
329    if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species]))
330    {
331        if (k==0)
332        {
333            isomorphicTreesBySequenceCount[filteredData.species] = {};
334        }
335        (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree;
336    }
337   
338    fprintf                    (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n");
339    if (Abs(ps_file))
340    {
341        treeString = TreeMatrix2TreeString (2);
342        UseModel (USE_NO_MODEL);
343        Tree givenTree = treeString;
344        baseHeight         = TipCount (givenTree)*28;
345        TREE_OUTPUT_OPTIONS = {};
346        TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
347        baseWidth = 0;
348        treeAVL                = givenTree^0;
349        drawLetter            = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
350        for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
351        {
352            nodeName = (treeAVL[k3])["Name"];
353            if(Abs((treeAVL[k3])["Children"]) == 0)
354            {
355                mySpecs = {};
356                mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
357                baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
358            }
359        }
360        baseWidth = 40*baseWidth;
361       
362        fprintf (stdout, _geneID, ":", givenTree,"\\n");
363        fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
364    }
365    return 0;
366}
367""" % (filename)
368
369BranchLengthsMF = """
370VERBOSITY_LEVEL            = -1;
371
372fscanf                  (PROMPT_FOR_FILE, "Lines", inLines);
373
374
375
376_linesIn = Columns (inLines);
377
378
379
380/*---------------------------------------------------------*/
381
382
383
384_currentGene   = 1;
385
386_currentState = 0;
387
388geneSeqs      = "";
389
390geneSeqs      * 128;
391
392
393
394for (l=0; l<_linesIn; l=l+1)
395
396{
397
398    if (Abs(inLines[l]) == 0)
399
400    {
401
402        if (_currentState == 1)
403
404        {
405
406            geneSeqs      * 0;
407
408            DataSet       ds            = ReadFromString (geneSeqs);
409
410            _processAGene (_currentGene);
411
412             geneSeqs * 128;
413
414            _currentGene = _currentGene + 1;
415
416        }
417
418    }
419
420    else
421
422    {
423
424        if (_currentState == 0)
425
426        {
427
428            _currentState = 1;
429
430        }
431
432        geneSeqs * inLines[l];
433
434        geneSeqs * "\\n";
435
436    }
437
438}
439
440
441
442if (_currentState == 1)
443
444{
445
446    geneSeqs      * 0;
447
448    if (Abs(geneSeqs))
449
450    {
451
452        DataSet       ds            = ReadFromString (geneSeqs);
453
454        _processAGene (_currentGene);
455
456    }
457
458}
459
460
461
462fprintf (resultFile,CLOSE_FILE);
463
464
465
466/*---------------------------------------------------------*/
467
468
469
470function _processAGene (_geneID)
471
472{
473
474    DataSetFilter             filteredData = CreateFilter (ds,1);
475
476    if (_currentGene == 1)
477
478    {
479
480        SelectTemplateModel        (filteredData);
481
482       
483
484        SetDialogPrompt           ("Tree file");
485
486        fscanf                    (PROMPT_FOR_FILE, "Tree",  givenTree);
487
488        fscanf                    (stdin, "String", resultFile);
489
490       
491
492        /* do sequence to branch map */
493
494       
495
496        validNames = {};
497
498        taxonNameMap = {};
499
500       
501
502        for (k=0; k<TipCount(givenTree); k=k+1)
503
504        {
505
506            validNames[TipName(givenTree,k)&&1] = 1;
507
508        }
509
510       
511
512        for (k=0; k<BranchCount(givenTree); k=k+1)
513
514        {
515
516            thisName = BranchName(givenTree,k);
517
518            taxonNameMap[thisName&&1] = thisName;
519
520        }
521
522       
523
524        storeValidNames = validNames;
525
526        fprintf         (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n");
527
528    }
529
530    else
531
532    {
533
534        HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1);
535
536        validNames = storeValidNames;
537
538    }
539
540   
541
542    for (k=0; k<ds.species; k=k+1)
543
544    {
545
546        GetString (thisName, ds,k);
547
548        shortName = (thisName^{{"\\\\..+",""}})&&1;
549
550        if (validNames[shortName])
551
552        {
553
554            taxonNameMap[shortName] = thisName;
555
556            validNames - (shortName);
557
558            SetParameter (ds,k,shortName);
559
560        }
561
562        else
563
564        {
565
566            fprintf         (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n");
567
568            return 0;
569
570        }
571
572    }
573
574   
575
576    /* */
577
578   
579
580    LikelihoodFunction lf = (filteredData,givenTree);
581
582    Optimize                 (res,lf);
583
584   
585
586    timer = Time(0)-timer;
587
588   
589
590    branchNames   = BranchName   (givenTree,-1);
591
592    branchLengths = BranchLength (givenTree,-1);
593
594   
595
596   
597
598    for (k=0; k<Columns(branchNames)-1; k=k+1)
599
600    {
601
602        COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
603
604        COVARIANCE_PRECISION = 0.95;
605
606        CovarianceMatrix (cmx,lf);
607
608        if (k==0)
609
610        {
611
612            /* compute a scaling factor */
613
614            ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
615
616            scaleFactor = BranchLength (givenTree,0);
617
618            ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
619
620        }
621
622        fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
623
624    }
625
626   
627
628    ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
629
630    global treeScaler = 1;
631
632    ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
633
634    COVARIANCE_PARAMETER = "treeScaler";
635
636    COVARIANCE_PRECISION = 0.95;
637
638    CovarianceMatrix (cmx,lf);
639
640    fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
641
642    ClearConstraints (givenTree);
643
644    return 0;
645
646}
647"""
648
649BranchLengths = """
650DataSet                 ds              = ReadDataFile (PROMPT_FOR_FILE);
651DataSetFilter             filteredData = CreateFilter (ds,1);
652
653SelectTemplateModel        (filteredData);
654
655SetDialogPrompt         ("Tree file");
656fscanf                    (PROMPT_FOR_FILE, "Tree",  givenTree);
657fscanf                    (stdin, "String", resultFile);
658
659/* do sequence to branch map */
660
661validNames = {};
662taxonNameMap = {};
663
664for (k=0; k<TipCount(givenTree); k=k+1)
665{
666    validNames[TipName(givenTree,k)&&1] = 1;
667}
668
669for (k=0; k<BranchCount(givenTree); k=k+1)
670{
671    thisName = BranchName(givenTree,k);
672    taxonNameMap[thisName&&1] = thisName;
673}
674
675for (k=0; k<ds.species; k=k+1)
676{
677    GetString (thisName, ds,k);
678    shortName = (thisName^{{"\\\\..+",""}})&&1;
679    if (validNames[shortName])
680    {
681        taxonNameMap[shortName] = thisName;
682        validNames - (shortName);
683        SetParameter (ds,k,shortName);
684    }
685    else
686    {
687        fprintf         (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree);
688        return 0;
689    }
690}
691
692/* */
693
694LikelihoodFunction lf = (filteredData,givenTree);
695
696Optimize                 (res,lf);
697
698timer = Time(0)-timer;
699
700branchNames   = BranchName   (givenTree,-1);
701branchLengths = BranchLength (givenTree,-1);
702
703fprintf         (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n");
704
705for (k=0; k<Columns(branchNames)-1; k=k+1)
706{
707    COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
708    COVARIANCE_PRECISION = 0.95;
709    CovarianceMatrix (cmx,lf);
710    if (k==0)
711    {
712        /* compute a scaling factor */
713        ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
714        scaleFactor = BranchLength (givenTree,0);
715        ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
716    }
717    fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
718}
719
720ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
721global treeScaler = 1;
722ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
723COVARIANCE_PARAMETER = "treeScaler";
724COVARIANCE_PRECISION = 0.95;
725CovarianceMatrix (cmx,lf);
726ClearConstraints (givenTree);
727fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
728fprintf (resultFile,CLOSE_FILE);
729"""
730
731SimpleLocalFitter = """
732VERBOSITY_LEVEL               = -1;
733COUNT_GAPS_IN_FREQUENCIES  = 0;
734
735/*---------------------------------------------------------*/
736
737function returnResultHeaders (dummy)
738{
739    _analysisHeaders = {};
740    _analysisHeaders[0] = "BLOCK";
741    _analysisHeaders[1] = "BP";
742    _analysisHeaders[2] = "S_sites";
743    _analysisHeaders[3] = "NS_sites";
744    _analysisHeaders[4] = "Stop_codons";
745    _analysisHeaders[5] = "LogL";
746    _analysisHeaders[6] = "AC";
747    _analysisHeaders[7] = "AT";
748    _analysisHeaders[8] = "CG";
749    _analysisHeaders[9] = "CT";
750    _analysisHeaders[10] = "GT";
751    _analysisHeaders[11] = "Tree";
752
753    for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
754    {
755        branchName = treeBranchNames[_biterator];
756       
757        _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")";
758        _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")";
759        _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")";
760        _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")";
761    }
762
763    return _analysisHeaders;
764}
765
766/*---------------------------------------------------------*/
767
768function runAGeneFit (myID)
769{
770    DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
771
772    if (_currentGene==1)
773    {
774        _MG94stdinOverload = {};
775        _MG94stdinOverload ["0"] = "Local";
776        _MG94stdinOverload ["1"] = modelSpecString;
777
778        ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
779                                 _MG94stdinOverload);
780       
781        Tree    codonTree           = treeString;
782    }
783    else
784    {
785        HarvestFrequencies               (observedFreq,filteredData,3,1,1);
786        MULTIPLY_BY_FREQS             = PopulateModelMatrix ("MG94custom", observedFreq);
787        vectorOfFrequencies         = BuildCodonFrequencies (observedFreq);
788        Model MG94customModel         = (MG94custom,vectorOfFrequencies,0);
789
790        Tree    codonTree            = treeString;
791    }
792
793    LikelihoodFunction lf     = (filteredData,codonTree);
794
795    Optimize                     (res,lf);
796
797    _snsAVL       =                 _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
798    _cL           =                  ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
799
800
801    _returnMe = {};
802    _returnMe ["BLOCK"]              = myID;
803    _returnMe ["LogL"]              = res[1][0];
804    _returnMe ["BP"]                 = _snsAVL ["Sites"];
805    _returnMe ["S_sites"]             = _snsAVL ["SSites"];
806    _returnMe ["NS_sites"]             = _snsAVL ["NSSites"];
807    _returnMe ["AC"]                 = AC;
808    _returnMe ["AT"]                 = AT;
809    _returnMe ["CG"]                 = CG;
810    _returnMe ["CT"]                 = CT;
811    _returnMe ["GT"]                 = GT;
812    _returnMe ["Tree"]                 = Format(codonTree,0,1);
813
814    for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
815    {
816        branchName = treeBranchNames[_biterator];
817
818        _returnMe ["length("+branchName+")"]         = (_cL["Total"])[_biterator];
819        _returnMe ["dS("+branchName+")"]                 = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]);
820        _returnMe ["dN("+branchName+")"]                 = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]);
821       
822        ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);");
823        _returnMe ["omega("+branchName+")"]                 = _lom;
824    }
825   
826    return _returnMe;
827}
828
829"""
830
831SimpleGlobalFitter = """
832VERBOSITY_LEVEL               = -1;
833COUNT_GAPS_IN_FREQUENCIES  = 0;
834
835/*---------------------------------------------------------*/
836
837function returnResultHeaders (dummy)
838{
839    _analysisHeaders = {};
840    _analysisHeaders[0]  = "BLOCK";
841    _analysisHeaders[1]  = "BP";
842    _analysisHeaders[2]  = "S_sites";
843    _analysisHeaders[3]  = "NS_sites";
844    _analysisHeaders[4]  = "Stop_codons";
845    _analysisHeaders[5]  = "LogL";
846    _analysisHeaders[6]  = "omega";
847    _analysisHeaders[7]  = "omega_range";
848    _analysisHeaders[8]  = "AC";
849    _analysisHeaders[9]  = "AT";
850    _analysisHeaders[10] = "CG";
851    _analysisHeaders[11] = "CT";
852    _analysisHeaders[12] = "GT";
853    _analysisHeaders[13] = "Tree";
854
855    return _analysisHeaders;
856}
857
858/*---------------------------------------------------------*/
859
860function runAGeneFit (myID)
861{
862    fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n");
863    taxonNameMap = {};
864   
865    for (k=0; k<ds.species; k=k+1)
866    {
867        GetString         (thisName, ds,k);
868        shortName         = (thisName^{{"\\\\..+",""}})&&1;
869        taxonNameMap[shortName] = thisName;
870        SetParameter (ds,k,shortName);
871    }
872
873    DataSetFilter filteredData = CreateFilter (ds,1);
874    _nucSites                    = filteredData.sites;
875   
876    if (Abs(treeString))
877    {
878        givenTreeString = treeString;
879    }
880    else
881    {
882        if (_currentGene==1)
883        {
884            ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf");
885        }
886        givenTreeString = InferTreeTopology (0);
887        treeString        = "";
888    }
889
890    DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
891
892    if (_currentGene==1)
893    {
894        _MG94stdinOverload = {};
895        _MG94stdinOverload ["0"] = "Global";
896        _MG94stdinOverload ["1"] = modelSpecString;
897
898        ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
899                                 _MG94stdinOverload);
900       
901        Tree    codonTree           = givenTreeString;
902    }
903    else
904    {
905        HarvestFrequencies               (observedFreq,filteredData,3,1,1);
906        MULTIPLY_BY_FREQS             = PopulateModelMatrix ("MG94custom", observedFreq);
907        vectorOfFrequencies         = BuildCodonFrequencies (observedFreq);
908        Model MG94customModel         = (MG94custom,vectorOfFrequencies,0);
909
910        Tree    codonTree            = givenTreeString;
911    }
912
913    LikelihoodFunction lf     = (filteredData,codonTree);
914
915    Optimize                     (res,lf);
916
917    _snsAVL       =                 _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
918    _cL           =                  ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
919
920
921    _returnMe = {};
922    _returnMe ["BLOCK"]              = myID;
923    _returnMe ["LogL"]              = res[1][0];
924    _returnMe ["BP"]                 = _snsAVL ["Sites"];
925    _returnMe ["S_sites"]             = _snsAVL ["SSites"];
926    _returnMe ["NS_sites"]             = _snsAVL ["NSSites"];
927    _returnMe ["Stop_codons"]         = (_nucSites-filteredData.sites*3)$3;
928    _returnMe ["AC"]                 = AC;
929    _returnMe ["AT"]                 = AT;
930    _returnMe ["CG"]                 = CG;
931    _returnMe ["CT"]                 = CT;
932    _returnMe ["GT"]                 = GT;
933    _returnMe ["omega"]             = R;
934    COVARIANCE_PARAMETER             = "R";
935    COVARIANCE_PRECISION             = 0.95;
936    CovarianceMatrix                 (cmx,lf);
937    _returnMe ["omega_range"]         = ""+cmx[0]+"-"+cmx[2];
938    _returnMe ["Tree"]                 = Format(codonTree,0,1);
939
940
941    return _returnMe;
942}
943"""
944
945FastaReader = """
946fscanf            (stdin, "String", _coreAnalysis);
947fscanf            (stdin, "String", _outputDriver);
948
949ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def");
950ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf");
951ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf");
952ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf");
953
954SetDialogPrompt ("Tree file");
955fscanf            (PROMPT_FOR_FILE, "Tree",  givenTree);
956
957treeBranchNames               = BranchName (givenTree,-1);
958treeBranchCount               = Columns    (treeBranchNames)-1;
959treeString                    = Format (givenTree,1,1);
960
961SetDialogPrompt ("Multiple gene FASTA file");
962fscanf          (PROMPT_FOR_FILE, "Lines", inLines);
963fscanf            (stdin, "String",  modelSpecString);
964fscanf            (stdin, "String", _outPath);
965 
966ExecuteAFile    (_outputDriver);
967ExecuteAFile    (_coreAnalysis);
968 
969/*---------------------------------------------------------*/
970
971_linesIn     = Columns (inLines);
972_currentGene   = 1;
973 _currentState = 0;
974/* 0 - waiting for a non-empty line */
975/* 1 - reading files */
976
977geneSeqs       = "";
978geneSeqs        * 0;
979
980_prepareFileOutput (_outPath);
981
982for (l=0; l<_linesIn; l=l+1)
983{
984    if (Abs(inLines[l]) == 0)
985    {
986        if (_currentState == 1)
987        {
988            geneSeqs      * 0;
989            DataSet       ds            = ReadFromString (geneSeqs);
990            _processAGene (ds.species == treeBranchCount,_currentGene);
991            geneSeqs * 128;
992            _currentGene = _currentGene + 1;
993        }
994    }
995    else
996    {
997        if (_currentState == 0)
998        {
999            _currentState = 1;
1000        }
1001        geneSeqs * inLines[l];
1002        geneSeqs * "\\n";
1003    }
1004}
1005
1006if (_currentState == 1)
1007{
1008    geneSeqs      * 0;
1009    DataSet       ds            = ReadFromString (geneSeqs);
1010    _processAGene (ds.species == treeBranchCount,_currentGene);
1011}
1012
1013_finishFileOutput (0);
1014"""
1015
1016TabWriter = """
1017/*---------------------------------------------------------*/
1018function _prepareFileOutput (_outPath)
1019{
1020    _outputFilePath = _outPath;
1021   
1022    _returnHeaders     = returnResultHeaders(0);
1023   
1024    fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]);
1025    for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
1026    {
1027        fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]);
1028    }
1029
1030
1031   
1032    fprintf (_outputFilePath,"\\n");
1033    return 0;
1034}
1035
1036/*---------------------------------------------------------*/
1037
1038function _processAGene (valid, _geneID)
1039{
1040    if (valid)
1041    {
1042        returnValue = runAGeneFit (_geneID);
1043        fprintf (_outputFilePath, returnValue[_returnHeaders[0]]);
1044        for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
1045        {
1046            fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]);
1047        }
1048        fprintf (_outputFilePath, "\\n");
1049    }
1050    /*
1051    else
1052    {
1053        fprintf (_outputFilePath,
1054                _geneID, ", Incorrect number of sequences\\n");
1055    }
1056    */
1057    _currentState = 0;
1058    return 0;
1059}
1060
1061/*---------------------------------------------------------*/
1062function _finishFileOutput (dummy)
1063{
1064    return 0;
1065}
1066"""
1067
1068def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ):
1069    contents = """
1070_genomeScreenOptions = {};
1071
1072/* all paths are either absolute or relative
1073to the DATA READER */
1074
1075_genomeScreenOptions ["0"] = "%s";
1076    /* which analysis to run on each gene; */   
1077_genomeScreenOptions ["1"] = "%s";
1078    /* what output to produce; */   
1079_genomeScreenOptions ["2"] = "%s";
1080    /* genetic code */   
1081_genomeScreenOptions ["3"] = "%s";
1082    /* tree file */
1083_genomeScreenOptions ["4"] = "%s";
1084    /* alignment file */
1085_genomeScreenOptions ["5"] = "%s";
1086    /* nucleotide bias string; can define any of the 203 models */
1087_genomeScreenOptions ["6"] = "%s";
1088    /* output csv file */
1089   
1090ExecuteAFile ("%s", _genomeScreenOptions);   
1091""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename )
1092    return get_filled_temp_filename(contents)
1093   
1094   
1095def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename):
1096    contents = """
1097_genomeScreenOptions = {};
1098
1099/* all paths are either absolute or relative
1100to the NucDataBranchLengths.bf */
1101
1102_genomeScreenOptions ["0"] = "%s";
1103    /* the file to analyze; */   
1104_genomeScreenOptions ["1"] = "CUSTOM";
1105    /* use an arbitrary nucleotide model */   
1106_genomeScreenOptions ["2"] = "%s";
1107    /* which model to use */
1108_genomeScreenOptions ["3"] = "%s";
1109    /* model options */
1110_genomeScreenOptions ["4"] = "Estimated";
1111    /* rate parameters */
1112_genomeScreenOptions ["5"] = "%s";
1113    /* base frequencies */
1114_genomeScreenOptions ["6"] = "%s";
1115    /* the tree to use; */   
1116_genomeScreenOptions ["7"] = "%s";
1117    /* write .csv output to; */   
1118   
1119ExecuteAFile ("%s", _genomeScreenOptions);   
1120""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename)
1121    return get_filled_temp_filename(contents)
1122   
1123   
1124def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename):
1125    contents = """
1126_genomeScreenOptions = {};
1127
1128/* all paths are either absolute or relative
1129to the BuildNJTree.bf */
1130
1131_genomeScreenOptions ["0"] = "%s";
1132    /* the file to analyze; */   
1133_genomeScreenOptions ["1"] = "%s";
1134    /* pick which distance metric to use; TN93 is a good default */   
1135_genomeScreenOptions ["2"] = "%s";
1136    /* write Newick tree output to; */   
1137_genomeScreenOptions ["3"] = "%s";
1138    /* write a postscript tree file to this file; leave blank to not write a tree */   
1139   
1140ExecuteAFile ("%s", _genomeScreenOptions);   
1141""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename)
1142    return get_filled_temp_filename(contents)
1143   
1144   
1145def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename):
1146    contents = """
1147_genomeScreenOptions = {};
1148
1149/* all paths are either absolute or relative
1150to the BuildNJTreeMF.bf */
1151
1152_genomeScreenOptions ["0"] = "%s";
1153    /* the multiple alignment file to analyze; */   
1154_genomeScreenOptions ["1"] = "%s";
1155    /* write Newick tree output to; */   
1156_genomeScreenOptions ["2"] = "%s";
1157    /* write a postscript tree file to this file; leave blank to not write a tree */   
1158_genomeScreenOptions ["3"] = "%s";
1159    /* pick which distance metric to use; TN93 is a good default */   
1160   
1161ExecuteAFile ("%s", _genomeScreenOptions);       
1162""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename)
1163    return get_filled_temp_filename(contents)
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。