From 2065f1a5365bbf7f826a7b412d11db9a0b14643e Mon Sep 17 00:00:00 2001 From: Sergei Date: Tue, 23 Apr 2024 16:53:48 -0400 Subject: [PATCH] Convergence tweaks --- .../SelectionAnalyses/BUSTED.bf | 13 ++-- src/core/likefunc.cpp | 60 ++++++++++--------- tests/hbltests/libv3/BUSTED.wbf | 6 +- 3 files changed, 43 insertions(+), 36 deletions(-) diff --git a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf index c65d0917e..e1f513cde 100644 --- a/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf +++ b/res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf @@ -558,8 +558,6 @@ for (busted.partition_index = 0; busted.partition_index < busted.partition_count // }; } - - busted.initial.test_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"))["0"])[terms.fit.MLE]; busted.initial_grid = estimators.LHC (busted.initial_ranges,busted.initial_grid.N); @@ -600,7 +598,7 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al */ -busted.nm.precision = -0.00025*busted.final_partitioned_mg_results[terms.fit.log_likelihood]; +busted.nm.precision = Max (-0.00001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5); debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT"); @@ -611,7 +609,8 @@ if (Type (debug.checkpoint) != "String") { busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]); - + + busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map, { terms.run_options.retain_lf_object: TRUE, @@ -630,8 +629,12 @@ if (Type (debug.checkpoint) != "String") { } ); + + parameters.RemoveConstraint (busted.tmp_fixed ); + //console.log (busted.tmp_fixed); + PARAMETER_GROUPING + Columns (busted.tmp_fixed); //PRODUCE_OPTIMIZATION_LOG = 1; @@ -639,7 +642,7 @@ if (Type (debug.checkpoint) != "String") { "retain-lf-object": TRUE, terms.run_options.optimization_settings : { - "OPTIMIZATION_METHOD" : "coordinate-wise", + "OPTIMIZATION_METHOD" : "hybrid", //"OPTIMIZATION_PRECISION" : 1. } diff --git a/src/core/likefunc.cpp b/src/core/likefunc.cpp index 636d984ac..69f088c55 100644 --- a/src/core/likefunc.cpp +++ b/src/core/likefunc.cpp @@ -4398,7 +4398,10 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) precision = get_optimization_setting (kOptimizationPrecision, 0.001); maxItersPerVar = get_optimization_setting (kMaximumIterationsPerVariable, 5000.); - + if (CheckEqual (precision, 0.0)) { + ReportWarning (kOptimizationPrecision & " was set to 0. Resetting to a default value of 0.001"); + precision = 0.001; + } _FString * method_string = get_optimization_setting_string (kOptimizationMethod); @@ -4547,9 +4550,9 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) hyFloat grad_precision; if (maxSoFar > -INFINITY) { - grad_precision = MIN (1000., -maxSoFar / 100.); + grad_precision = MIN (1000., -maxSoFar / 500.); } else { - grad_precision = -0.01; + grad_precision = -0.005; } if (gradientBlocks.nonempty()) { @@ -4572,7 +4575,7 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) hyFloat current_precision = MAX(1., precision); while (current_precision > precision) { ConjugateGradientDescent(current_precision, bestSoFar, true); - current_precision *= 0.1; + current_precision *= sqrt(0.1); } ConjugateGradientDescent(precision, bestSoFar, true); } @@ -4596,7 +4599,6 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) currentPrecision = kOptimizationGradientDescent==7?sqrt(precision):intermediateP; } - if (optimization_mode == kOptimizationCoordinateWise) { bool forward = false; @@ -4874,7 +4876,7 @@ _Matrix* _LikelihoodFunction::Optimize (_AssociativeList const * options) _Matrix bestMSoFar; GetAllIndependent (bestMSoFar); hyFloat prec = Minimum (diffs[0], diffs[1]), - grad_precision = diffs[0] + diffs[1]; + grad_precision = Maximum(diffs[0],diffs[1]); prec = Minimum (Maximum (prec, precision), 1.); @@ -6438,11 +6440,11 @@ void _LikelihoodFunction::GetGradientStepBound (_Matrix& gradient,hyFloat& le void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradientStep, _Matrix& values,_SimpleList& freeze, long order, bool normalize) { hyFloat funcValue; - static const hyFloat kMaxD = 1.e4; + static const hyFloat kMaxD = 1.e8; if (order==1) { funcValue = Compute(); - //printf ("\n%ld %20.18g\n", likeFuncEvalCallCount, funcValue); + //printf ("\n%ld %20.18g (%g)\n", likeFuncEvalCallCount, funcValue, gradientStep); /* if (verbosity_level > 100) { @@ -6460,8 +6462,9 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi hyFloat currentValue = GetIthIndependent(index), ub = GetIthIndependentBound(index,false)-currentValue, lb = currentValue-GetIthIndependentBound(index,true), - testStep = currentValue * gradientStep;//MAX(currentValue * gradientStep,gradientStep); - + testStep = MAX(currentValue * gradientStep,gradientStep); + + //printf ("%ld %s %20.18g\n", index, GetIthIndependentName (index)->get_str(), currentValue); hyFloat check_vv = currentValue; @@ -6478,11 +6481,11 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi } } - + if (!CheckEqual (testStep,0.0)) { - /*if (verbosity_level > 100) { - printf ("Gradient step for %s is %.16g @%.16g %\n", GetIthIndependentVar(index)->GetName()->get_str(), testStep, currentValue); - }*/ + //if (verbosity_level > 100) { + //printf ("Gradient step for %s is %.16g @%.16g %\n", GetIthIndependentVar(index)->GetName()->get_str(), testStep, currentValue); + //} SetIthIndependent(index,currentValue+testStep); hyFloat dF = Compute(); gradient[index]=(dF-funcValue)/testStep;// * DerivativeCorrection (index, currentValue); @@ -6495,9 +6498,9 @@ void _LikelihoodFunction::ComputeGradient (_Matrix& gradient, hyFloat& gradi printf ("Negative value stashed %15.12lg\n", currentValue); } hyFloat check_vv = GetIthIndependent(index);*/ - if (verbosity_level > 150) { - printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t @%20.18g\t f(x) = %20.18g, f(x+h) = %20.18g, h = %20.18g, correction = %20.18g, grad = %20.18g\n", index, GetIthIndependentName(index)->get_str(), currentValue, funcValue, dF, testStep, DerivativeCorrection (index, currentValue), gradient.theData[index]); - } + //if (verbosity_level > 150) { + //printf ("_LikelihoodFunction::ComputeGradient %ld\t%s\t @%20.18g\t f(x) = %20.18g, f(x+h) = %20.18g, delta(F) = %20.18g, h = %20.18g, correction = %20.18g, grad = %20.18g\n", index, GetIthIndependentName(index)->get_str(), currentValue, funcValue, dF, dF-funcValue, testStep, DerivativeCorrection (index, currentValue), gradient.theData[index]); + //} SetIthIndependent(index,currentValue); } else { gradient[index]= 0.; @@ -6688,8 +6691,8 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision if (gradL > 0.0) { - current_direction = gradient * (1./gradL); - gradient = current_direction; + current_direction = gradient;// * (1./gradL); + //gradient = current_direction; for (long index = 0; index< MAX (dim, 10) && index < iterationLimit; index++, currentPrecision*=0.25) { @@ -6716,33 +6719,30 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision ComputeGradient (gradient, gradientStep, bestVal, freeze, 1, false); gradL = gradient.AbsValue (); - if (CheckEqual(gradL,0.0)) { + /*if (CheckEqual(gradL,0.0)) { break; } else { gradient *= 1./gradL; - } + }*/ previous_direction = current_direction; hyFloat beta = 0., scalar_product = 0.; // use Polak–Ribière direction - for (unsigned long i = 0UL; i < dim; i++) { + /*for (unsigned long i = 0UL; i < dim; i++) { scalar_product += previous_gradient.theData[i] * previous_gradient.theData[i]; beta += gradient.theData[i] * ( previous_gradient.theData[i] - gradient.theData[i]); - } + }*/ - LoggerAddGradientPhase (line_search_precision, beta, scalar_product); - LoggerAllVariables (); - LoggerLogL (maxSoFar); // use Dai–Yuan - /* + for (unsigned long i = 0UL; i < dim; i++) { beta += gradient.theData[i] * gradient.theData[i]; scalar_product += previous_direction.theData[i] * ( gradient.theData[i] - previous_gradient.theData[i]); } beta = -beta; - */ + // Hestenes-Stiefel /* @@ -6753,6 +6753,10 @@ hyFloat _LikelihoodFunction::ConjugateGradientDescent (hyFloat step_precision beta = -beta; */ + LoggerAddGradientPhase (line_search_precision, beta, scalar_product); + LoggerAllVariables (); + LoggerLogL (maxSoFar); + beta /= scalar_product; beta = MAX (beta, 0.0); previous_direction = current_direction; diff --git a/tests/hbltests/libv3/BUSTED.wbf b/tests/hbltests/libv3/BUSTED.wbf index 77b8f3544..d42f6e2a6 100644 --- a/tests/hbltests/libv3/BUSTED.wbf +++ b/tests/hbltests/libv3/BUSTED.wbf @@ -15,14 +15,14 @@ LoadFunctionLibrary ("libv3/IOFunctions.bf"); assert (check_value ( - ((busted.json["fits"])["Unconstrained model"])["Log Likelihood"], -3435.55, 0.001), "Incorrect log-likelihood for the full model"); + ((busted.json["fits"])["Unconstrained model"])["Log Likelihood"], -3413.83, 0.001), "Incorrect log-likelihood for the full model"); assert (check_value ( - ((busted.json["test results"])["p-value"]),0.18531, 0.01), "p-value for the test is incorrect"); + ((busted.json["test results"])["p-value"]),0.2264, 0.01), "p-value for the test is incorrect"); assert (check_value ( - +(busted.json["Evidence Ratios"])["optimized null"],189.46, 0.01), "Incorrect sum of evidence ratios"); + +(busted.json["Evidence Ratios"])["optimized null"],188.49, 0.01), "Incorrect sum of evidence ratios");