From 2065f1a5365bbf7f826a7b412d11db9a0b14643e Mon Sep 17 00:00:00 2001
From: Sergei <spond@temple.edu>
Date: Tue, 23 Apr 2024 16:53:48 -0400
Subject: [PATCH] Convergence tweaks

---
 .../SelectionAnalyses/BUSTED.bf               | 13 ++--
 src/core/likefunc.cpp                         | 60 ++++++++++---------
 tests/hbltests/libv3/BUSTED.wbf               |  6 +-
 3 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
index c65d0917e..e1f513cde 100644
--- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
+++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
@@ -558,8 +558,6 @@ for (busted.partition_index = 0; busted.partition_index < busted.partition_count
    //         };
 }
 
-            
-            
 
 busted.initial.test_mean    = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"))["0"])[terms.fit.MLE];
 busted.initial_grid         = estimators.LHC (busted.initial_ranges,busted.initial_grid.N);
@@ -600,7 +598,7 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al
 */
 
   
-busted.nm.precision = -0.00025*busted.final_partitioned_mg_results[terms.fit.log_likelihood];
+busted.nm.precision = Max (-0.00001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5);
 
 debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");
        
@@ -611,7 +609,8 @@ if (Type (debug.checkpoint) != "String") {
     
     
     busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]);
-  
+    
+      
     busted.grid_search.results =  estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map,                    
         {
             terms.run_options.retain_lf_object: TRUE,
@@ -630,8 +629,12 @@ if (Type (debug.checkpoint) != "String") {
         }
     );
     
+   
+    
     
     parameters.RemoveConstraint (busted.tmp_fixed );
+    //console.log (busted.tmp_fixed);
+    PARAMETER_GROUPING + Columns (busted.tmp_fixed);
 
     //PRODUCE_OPTIMIZATION_LOG        = 1;
                                                 
@@ -639,7 +642,7 @@ if (Type (debug.checkpoint) != "String") {
             "retain-lf-object": TRUE,
             terms.run_options.optimization_settings : 
                 {
-                    "OPTIMIZATION_METHOD" : "coordinate-wise",
+                    "OPTIMIZATION_METHOD" : "hybrid",
                     //"OPTIMIZATION_PRECISION" : 1.
                 } 
                                     
diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp
index 636d984ac..69f088c55 100644
--- a/src/core/likefunc.cpp
+++ b/src/core/likefunc.cpp
@@ -4398,7 +4398,10 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
     precision           = get_optimization_setting (kOptimizationPrecision, 0.001);
     maxItersPerVar      = get_optimization_setting (kMaximumIterationsPerVariable, 5000.);
     
-    
+    if (CheckEqual (precision, 0.0)) {
+        ReportWarning (kOptimizationPrecision & " was set to 0. Resetting to a default value of 0.001");
+        precision = 0.001;
+    }
     
     _FString * method_string = get_optimization_setting_string (kOptimizationMethod);
     
@@ -4547,9 +4550,9 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
             
             hyFloat grad_precision;
             if (maxSoFar > -INFINITY) {
-                grad_precision = MIN (1000., -maxSoFar / 100.);
+                grad_precision = MIN (1000., -maxSoFar / 500.);
             } else {
-                grad_precision = -0.01;
+                grad_precision = -0.005;
             }
             
             if (gradientBlocks.nonempty()) {
@@ -4572,7 +4575,7 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
             hyFloat current_precision = MAX(1., precision);
             while (current_precision > precision) {
               ConjugateGradientDescent(current_precision, bestSoFar, true);
-              current_precision *= 0.1;
+              current_precision *= sqrt(0.1);
             }
             ConjugateGradientDescent(precision, bestSoFar, true);
         }
@@ -4596,7 +4599,6 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
         currentPrecision = kOptimizationGradientDescent==7?sqrt(precision):intermediateP;
     }
 
-
     if (optimization_mode == kOptimizationCoordinateWise) {
 
         bool      forward = false;
@@ -4874,7 +4876,7 @@ _Matrix*        _LikelihoodFunction::Optimize (_AssociativeList const * options)
                         _Matrix             bestMSoFar;
                         GetAllIndependent   (bestMSoFar);
                         hyFloat prec = Minimum (diffs[0], diffs[1]),
-                                grad_precision = diffs[0] + diffs[1];
+                                grad_precision = Maximum(diffs[0],diffs[1]);
 
                         prec = Minimum (Maximum (prec, precision), 1.);
 
@@ -6438,11 +6440,11 @@ void    _LikelihoodFunction::GetGradientStepBound (_Matrix& gradient,hyFloat& le
 
 void    _LikelihoodFunction::ComputeGradient (_Matrix& gradient,  hyFloat& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) {
     hyFloat funcValue;
-    static const hyFloat kMaxD = 1.e4;
+    static const hyFloat kMaxD = 1.e8;
     
     if (order==1) {
         funcValue = Compute();
-        //printf ("\n%ld %20.18g\n", likeFuncEvalCallCount, funcValue);
+        //printf ("\n%ld %20.18g (%g)\n", likeFuncEvalCallCount, funcValue, gradientStep);
         
         /*
          if (verbosity_level > 100) {
@@ -6460,8 +6462,9 @@ void    _LikelihoodFunction::ComputeGradient (_Matrix& gradient,  hyFloat& gradi
                 hyFloat    currentValue  = GetIthIndependent(index),
                            ub            = GetIthIndependentBound(index,false)-currentValue,
                            lb            = currentValue-GetIthIndependentBound(index,true),
-                           testStep      = currentValue * gradientStep;//MAX(currentValue * gradientStep,gradientStep);
-                    
+                           testStep      = MAX(currentValue * gradientStep,gradientStep);
+                
+                     
                 //printf ("%ld %s %20.18g\n", index, GetIthIndependentName (index)->get_str(), currentValue);
                 hyFloat    check_vv      = currentValue;
 
@@ -6478,11 +6481,11 @@ void    _LikelihoodFunction::ComputeGradient (_Matrix& gradient,  hyFloat& gradi
                   }
                 }
 
-
+ 
                 if (!CheckEqual (testStep,0.0)) {
-                    /*if (verbosity_level > 100) {
-                        printf ("Gradient step for %s is %.16g @%.16g %\n", GetIthIndependentVar(index)->GetName()->get_str(), testStep, currentValue);
-                    }*/
+                    //if (verbosity_level > 100) {
+                        //printf ("Gradient step for %s is %.16g @%.16g %\n", GetIthIndependentVar(index)->GetName()->get_str(), testStep, currentValue);
+                    //}
                     SetIthIndependent(index,currentValue+testStep);
                     hyFloat dF = Compute();
                     gradient[index]=(dF-funcValue)/testStep;// * DerivativeCorrection (index, currentValue);
@@ -6495,9 +6498,9 @@ void    _LikelihoodFunction::ComputeGradient (_Matrix& gradient,  hyFloat& gradi
                         printf ("Negative value stashed %15.12lg\n", currentValue);
                     }
                     hyFloat check_vv = GetIthIndependent(index);*/
-                    if (verbosity_level > 150) {
-                        printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t @%20.18g\t f(x) = %20.18g, f(x+h) = %20.18g, h = %20.18g, correction = %20.18g, grad = %20.18g\n", index, GetIthIndependentName(index)->get_str(), currentValue, funcValue, dF, testStep, DerivativeCorrection (index, currentValue), gradient.theData[index]);
-                    }
+                    //if (verbosity_level > 150) {
+                        //printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t @%20.18g\t f(x) = %20.18g, f(x+h) = %20.18g, delta(F) = %20.18g, h = %20.18g, correction = %20.18g, grad = %20.18g\n", index, GetIthIndependentName(index)->get_str(), currentValue, funcValue, dF, dF-funcValue, testStep, DerivativeCorrection (index, currentValue), gradient.theData[index]);
+                    //}
                     SetIthIndependent(index,currentValue);
                 } else {
                     gradient[index]= 0.;
@@ -6688,8 +6691,8 @@ hyFloat    _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision
 
     if (gradL > 0.0) {
         
-        current_direction   = gradient * (1./gradL);
-        gradient = current_direction;
+        current_direction   = gradient;// * (1./gradL);
+        //gradient = current_direction;
         
         
         for (long index = 0; index< MAX (dim, 10) && index < iterationLimit; index++, currentPrecision*=0.25) {
@@ -6716,33 +6719,30 @@ hyFloat    _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision
             ComputeGradient (gradient, gradientStep, bestVal, freeze, 1, false);
             gradL = gradient.AbsValue ();
             
-            if (CheckEqual(gradL,0.0)) {
+            /*if (CheckEqual(gradL,0.0)) {
                 break;
             } else {
                 gradient *= 1./gradL;
-            }
+            }*/
  
             previous_direction = current_direction;
             hyFloat beta = 0., scalar_product = 0.;
 
             // use Polak–Ribière direction
-            for (unsigned long i = 0UL; i < dim; i++) {
+            /*for (unsigned long i = 0UL; i < dim; i++) {
                 scalar_product += previous_gradient.theData[i] * previous_gradient.theData[i];
                 beta += gradient.theData[i] * ( previous_gradient.theData[i] - gradient.theData[i]);
-            }
+            }*/
             
-            LoggerAddGradientPhase (line_search_precision, beta, scalar_product);
-            LoggerAllVariables ();
-            LoggerLogL (maxSoFar);
 
             // use Dai–Yuan
-            /*
+            
              for (unsigned long i = 0UL; i < dim; i++) {
                beta += gradient.theData[i] * gradient.theData[i];
                scalar_product += previous_direction.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]);
              }
              beta = -beta;
-            */
+             
 
             // Hestenes-Stiefel
             /*
@@ -6753,6 +6753,10 @@ hyFloat    _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision
              beta = -beta;
             */
 
+            LoggerAddGradientPhase (line_search_precision, beta, scalar_product);
+            LoggerAllVariables ();
+            LoggerLogL (maxSoFar);
+
             beta /= scalar_product;
             beta = MAX (beta, 0.0);
             previous_direction = current_direction;
diff --git a/tests/hbltests/libv3/BUSTED.wbf b/tests/hbltests/libv3/BUSTED.wbf
index 77b8f3544..d42f6e2a6 100644
--- a/tests/hbltests/libv3/BUSTED.wbf
+++ b/tests/hbltests/libv3/BUSTED.wbf
@@ -15,14 +15,14 @@ LoadFunctionLibrary ("libv3/IOFunctions.bf");
 
 
 assert (check_value (
-    ((busted.json["fits"])["Unconstrained model"])["Log Likelihood"], -3435.55, 0.001), "Incorrect log-likelihood for the full model");
+    ((busted.json["fits"])["Unconstrained model"])["Log Likelihood"], -3413.83, 0.001), "Incorrect log-likelihood for the full model");
 
 assert (check_value (
-    ((busted.json["test results"])["p-value"]),0.18531, 0.01), "p-value for the test is incorrect");
+    ((busted.json["test results"])["p-value"]),0.2264, 0.01), "p-value for the test is incorrect");
 
 
 assert (check_value (
-    +(busted.json["Evidence Ratios"])["optimized null"],189.46, 0.01), "Incorrect sum of evidence ratios");
+    +(busted.json["Evidence Ratios"])["optimized null"],188.49, 0.01), "Incorrect sum of evidence ratios");