Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.5.61 #1704

Merged
merged 28 commits into from
May 1, 2024
Merged

2.5.61 #1704

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0d4e363
HBL updates
spond Mar 7, 2024
917c2a9
RELAX init conditions when unclassified is present
spond Mar 7, 2024
b2e918c
MSS related scaling updates and support functions
spond Mar 8, 2024
51344f3
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
spond Mar 8, 2024
114318e
Fix for 0 weights
spond Mar 8, 2024
12d007a
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
spond Mar 8, 2024
8dc8dd5
Working on improving large set constraint performance
spond Mar 8, 2024
8a1166d
MSS-joint
spond Mar 8, 2024
68fcd05
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
spond Mar 8, 2024
7cbfc6f
HBL tweaks
spond Mar 11, 2024
a1c7b33
Alignment fixes
spond Mar 12, 2024
51d3e8e
Adding ORF detection
spond Mar 12, 2024
7e19993
Alignment.cpp fixes
spond Mar 13, 2024
9f82638
MSS Syn REV Codon rescale
spond Mar 13, 2024
b18f305
Fixes for auto-renaming issues skipping through checks
spond Mar 19, 2024
a153a0d
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
spond Mar 19, 2024
4735225
fixing iterating over numbers is shared_load_file
spond Mar 19, 2024
69e197a
MSS joint fit
spond Mar 21, 2024
7c27b0c
Fixing the zero mean edge case for initial guessing in BUSTED and RELAX
spond Apr 3, 2024
9485534
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
spond Apr 3, 2024
f0ccc43
Merge remote-tracking branch 'origin' into develop
spond Apr 8, 2024
2e65940
Fixing BUSTED-E initial conditions
spond Apr 12, 2024
c963eab
BUSTED IC tweaks
spond Apr 19, 2024
2065f1a
Convergence tweaks
spond Apr 23, 2024
d294a9f
Commenting out some developmental features
spond May 1, 2024
dc41521
Commenting out some developmental features
spond May 1, 2024
fc23875
Commenting out some developmental features
spond May 1, 2024
b4f0263
Merge branch 'develop' of https://github.com/veg/hyphy into develop
spond May 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ option(NOAVX OFF)
option(NOSSE4 OFF)
option(NONEON OFF)
option(NOZLIB OFF)
option(NOBLAS OFF)

if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(NOAVX ON)
Expand Down Expand Up @@ -427,7 +428,7 @@ include_directories(
contrib/SQLite-3.8.2 # SQLite
)

if (${BLAS_FOUND})
if (${BLAS_FOUND} AND NOT NOBLAS)
add_definitions (-D_SLKP_USE_APPLE_BLAS)
set(DEFAULT_LIBRARIES "${DEFAULT_LIBRARIES}")
endif()
Expand All @@ -451,7 +452,7 @@ else(${OPENMP_FOUND})
target_link_libraries(hyphy PRIVATE ${DEFAULT_LIBRARIES})
endif(${OPENMP_FOUND})

if(${BLAS_FOUND})
if(${BLAS_FOUND} AND NOT NOBLAS)
target_link_libraries(hyphy PRIVATE ${BLAS_LIBRARIES})
endif ()

Expand Down Expand Up @@ -490,7 +491,7 @@ if(${MPI_FOUND})
target_link_libraries(HYPHYMPI ${DEFAULT_LIBRARIES} ${MPI_LIBRARIES})
endif(${OPENMP_FOUND})

if(${BLAS_FOUND})
if(${BLAS_FOUND} AND NOT NOBLAS)
target_link_libraries(HYPHYMPI ${BLAS_LIBRARIES})
endif ()

Expand Down Expand Up @@ -598,6 +599,9 @@ target_link_libraries(HYPHYDEBUG ${DEFAULT_LIBRARIES})

add_custom_target(DEBUG DEPENDS HYPHYDEBUG)

if(${BLAS_FOUND})
target_link_libraries(HYPHYDEBUG ${BLAS_LIBRARIES})
endif ()

set_target_properties(
HYPHYDEBUG
Expand Down
52 changes: 44 additions & 8 deletions res/TemplateBatchFiles/MSS-joint-fitter.bf
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ io.ReportProgressMessageMD("mss", "data" , "* Loaded a list with **" + mss_selec
KeywordArgument ("code", "Which genetic code should be used", "Universal");
mss.genetic_code = alignments.LoadGeneticCode (None);

KeywordArgument ("omega", "How should alignment-level omega be treated?", "Fix");

mss.omega_option = io.SelectAnOption ({
{"Fix", "[Default] Fix omega estimates at those obtained with the standard MG94xREV model"}
{"Fit", "Fit omega (per alignment) together with the MSS model"}
}, "How should alignment-level omega be treated?");


mss.file_records = {};
mss.file_info = {};
Expand Down Expand Up @@ -160,10 +167,18 @@ mss_selector.header = {
{"TreeLength"},
{"Log(L)"}
};

KeywordArgument ("model", "Substitution model to use","SynREV");

mss.model_option = io.SelectAnOption ({
{"SynREV", "[Default] SynREV model (one rate per aa)"}
{"SynREVCodon", "SynREVCodon (one rate per codon pair)"}
}, "Which model?");



ExecuteCommands ( "mss.codon_classes = model.codon.MSS.prompt_and_define (terms.global, mss.genetic_code[terms.code])",
{"--mss-type" : "SynREV"}
{"--mss-type" : mss.model_option}
);

io.ReportProgressMessageMD("mss", "fit0" , "Individual file statistics and simple model fits\n");
Expand Down Expand Up @@ -219,6 +234,7 @@ io.ReportProgressMessageMD("mss", "fit0done", "**LogL = " + mss.baselineLL + "**
mss.json ["baseline"] = {terms.json.log_likelihood : mss.baselineLL, terms.json.AICc : mss.baseline_AIC};

function mss.MSS_generator (type) {
mss.codon_classes [utility.getGlobalValue("terms.model.MSS.normalize")] = TRUE;
model = Call ("models.codon.MSS.ModelDescription",type, mss.genetic_code[terms.code], mss.codon_classes);
return model;
}
Expand All @@ -234,10 +250,13 @@ mss.lf_id = "MSS.COMPOSITE.LF";
mss.model_map_overall = {};


//#profile START;
mss.constraints = {};

for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
mss.prefix = mss.file_prefix [mss_selector.path];

console.log (">" + mss.counter + " / " + mss_selector.path);

mss.model_name = "`mss.prefix`.model";
mss.tree_name = "`mss.prefix`.tree";
Expand Down Expand Up @@ -271,14 +290,17 @@ for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
(initial_values [terms.branch_length])[0],
terms.model.branch_length_constrain,
TRUE);

models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name));

if (mss.omega_option == "Fix") {
models.FixParameterSetRegExp (terms.parameters.omega_ratio, ^(mss.model_name));
mss.constraint_count += 1;
}
models.FixParameterSetRegExp (terms.nucleotideRatePrefix, ^(mss.model_name));


if (mss.counter == 0) {
mss.reference_set = ^(mss.model_name);
mss.mss_rate_list = model.GetParameters_RegExp( ^(mss.model_name),"^" + terms.parameters.synonymous_rate + "");
mss.mss_rate_list = model.GetParameters_RegExp( ^(mss.model_name),"^" + terms.MeanScaler(""));
mss.model_dimension = utility.Array1D (mss.mss_rate_list);
mss.scaler_prefix = 'mss.scaler_parameter_';
mss.scaler_mapping = {};
Expand All @@ -290,15 +312,18 @@ for (mss.counter, mss_selector.path; in; mss_selector.file_list) {
for (mss.i2 = 0; mss.i2 < mss.model_dimension; mss.i2 += 1) {
parameters.DeclareGlobal (mss.scaler_prefix + mss.i2, None);
}

mss.json ["mapping"] = mss.scaler_index;
} else {
//utility.getGlobalValue ("terms.parameters.synonymous_rate");
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_between);
models.BindGlobalParameters ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, terms.parameters.synonymous_rate + terms.model.MSS.syn_rate_within);
}
mss.constraints * models.BindGlobalParametersDeferred ({"0" : mss.reference_set , "1" : ^(mss.model_name)}, "^" + terms.MeanScaler(""));
}
}

parameters.BatchApplyConstraints (mss.constraints);

//#profile _hyphy_profile_dump;
//utility.FinishAndPrintProfile (_hyphy_profile_dump);
utility.SetEnvVariable ("AUTO_PARALLELIZE_OPTIMIZE", 1);

utility.ExecuteInGlobalNamespace ("LikelihoodFunction `mss.lf_id` = (`&mss.lf_components`)");

Expand All @@ -317,6 +342,17 @@ io.ReportProgressMessageMD("mss", "fitAlldone", "**LogL = " + res[1][0] + "**"
mss.json["joint-model"] = estimators.ExtractMLEsOptions (mss.lf_id, mss.model_map_overall, {terms.globals_only : TRUE});
//estimators.RemoveBranchLengthConstraints (mss.json["joint-model"]);

function pfilter (key, value) {
if (key[0] != "[" || (key $ "ratio")[0] >= 0) {
mss.filtered [key] = value;
}
}

mss.filtered = {};
((mss.json["joint-model"])[terms.global])["pfilter"][""];
(mss.json ["joint-model"])[terms.global] = mss.filtered;


io.SpoolJSON (mss.json, mss.json[terms.data.file]);

KeywordArgument ("save-fit", "Write the resulting model fit file to this (large!) file", "/dev/null");
Expand Down
85 changes: 51 additions & 34 deletions res/TemplateBatchFiles/SelectionAnalyses/BUSTED.bf
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ Version 4.5 adds an 'error absorption' component [experimental]
};


console.log (busted.analysis_description);
io.DisplayAnalysisBanner (busted.analysis_description);

busted.FG = "Test";
Expand Down Expand Up @@ -536,8 +535,7 @@ if (busted.do_srv) {
//PARAMETER_GROUPING + busted.srv_distribution["weights"];
PARAMETER_GROUPING + utility.Concat (busted.srv_distribution["rates"],busted.srv_distribution["weights"]);

busted.init_grid_setup (busted.srv_distribution, FALSE);

busted.init_grid_setup (busted.srv_distribution, FALSE);
}

if (busted.do_srv_hmm) {
Expand All @@ -553,33 +551,29 @@ busted.global_scaler_list = {};

for (busted.partition_index = 0; busted.partition_index < busted.partition_count; busted.partition_index += 1) {
busted.global_scaler_list [busted.partition_index] = "busted.bl.scaler_" + busted.partition_index;
parameters.DeclareGlobalWithRanges (busted.global_scaler_list [busted.partition_index], 3, 0, 1000);
busted.initial_ranges [busted.global_scaler_list [busted.partition_index]] = {
terms.lower_bound : 2.0,
terms.upper_bound : 4.0
};
parameters.DeclareGlobalWithRanges (busted.global_scaler_list [busted.partition_index], 1, 0, 1000);
// busted.initial_ranges [busted.global_scaler_list [busted.partition_index]] = {
// terms.lower_bound : 1.0,
// terms.upper_bound : 3.0
// };
}




busted.initial.test_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+test.+"))["0"])[terms.fit.MLE];
busted.initial_grid = estimators.LHC (busted.initial_ranges,busted.initial_grid.N);

//estimators.CreateInitialGrid (busted.initial_grid, busted.initial_grid.N, busted.initial_grid_presets);

busted.initial_grid = utility.Map (busted.initial_grid, "_v_",
'busted._renormalize_with_weights (_v_, "busted.distribution", busted.initial.test_mean, busted.error_sink)'
);

if (busted.has_background) { //GDD rate category
if (busted.has_background) { //has background
busted.initial.background_mean = ((selection.io.extract_global_MLE_re (busted.final_partitioned_mg_results, "^" + terms.parameters.omega_ratio + ".+background.+"))["0"])[terms.fit.MLE];

busted.initial_grid = utility.Map (busted.initial_grid, "_v_",
'busted._renormalize (_v_, "busted.background_distribution", busted.initial.background_mean, busted.error_sink)'
'busted._renormalize_with_weights (_v_, "busted.background_distribution", busted.initial.background_mean, busted.error_sink)'
);
}


busted.model_map = {};

for (busted.partition_index = 0; busted.partition_index < busted.partition_count; busted.partition_index += 1) {
Expand All @@ -604,7 +598,7 @@ io.ReportProgressMessageMD ("BUSTED", "main", "Performing the full (dN/dS > 1 al
*/


busted.nm.precision = -0.00025*busted.final_partitioned_mg_results[terms.fit.log_likelihood];
busted.nm.precision = Max (-0.00001*busted.final_partitioned_mg_results[terms.fit.log_likelihood],0.5);

debug.checkpoint = utility.GetEnvVariable ("DEBUG_CHECKPOINT");

Expand All @@ -615,7 +609,8 @@ if (Type (debug.checkpoint) != "String") {


busted.tmp_fixed = models.FixParameterSetRegExpFromReference (terms.nucleotideRatePrefix,busted.test.bsrel_model, busted.final_partitioned_mg_results[terms.global]);



busted.grid_search.results = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.final_partitioned_mg_results, busted.model_object_map,
{
terms.run_options.retain_lf_object: TRUE,
Expand All @@ -634,16 +629,20 @@ if (Type (debug.checkpoint) != "String") {
}
);




parameters.RemoveConstraint (busted.tmp_fixed );
//console.log (busted.tmp_fixed);
PARAMETER_GROUPING + Columns (busted.tmp_fixed);

//PRODUCE_OPTIMIZATION_LOG = 1;

busted.full_model = estimators.FitLF (busted.filter_names, busted.trees, busted.model_map, busted.grid_search.results, busted.model_object_map, {
"retain-lf-object": TRUE,
terms.run_options.optimization_settings :
{
"OPTIMIZATION_METHOD" : "coordinate-wise",
"OPTIMIZATION_METHOD" : "hybrid",
//"OPTIMIZATION_PRECISION" : 1.
}

Expand Down Expand Up @@ -1260,57 +1259,75 @@ lfunction busted._renormalize (v, distro, mean, skip_first) {
//------------------------------------------------------------------------------

lfunction busted._renormalize_with_weights (v, distro, mean, skip_first) {

parameters.SetValues (v);
m = parameters.GetStickBreakingDistribution (^distro);
d = Rows (m);


mean = Max (mean, 1e-3);

if (skip_first) {
m0 = m[0][0]*m[0][1];
} else {
m0 = 0;
}


over_one = m[d-1][0] * m[d-1][1];

if (over_one >= mean*0.95) {
//console.log ("OVERAGE");
new_weight = mean * Random (0.9, 0.95) / m[d-1][0];
diff = (m[d-1][1] - new_weight)/(d-1);
diff = (m[d-1][1] - new_weight)/(d-1-(skip_first != 0));
for (k = (skip_first != 0); k < d-1; k += 1) {
m[k][1] += diff;
}
m[d-1][1] = new_weight;
}


over_one = m[d-1][0] * m[d-1][1];
under_one = (+(m[-1][0] $ m[-1][1])) / (1-m[d-1][1]); // current mean

under_one = (+(m[-1][0] $ m[-1][1]) - m0) / (1-m[d-1][1]); // current mean

for (i = (skip_first != 0); i < d-1; i+=1) {
m[i][0] = m[i][0] * mean / under_one;
}

under_one = +(m[-1][0] $ m[-1][1]);

if (skip_first) {
m_rest = m [{{1,0}}][{{d-1,1}}];
under_one = +(m_rest[-1][0] $ m_rest[-1][1]);
} else {
under_one = +(m[-1][0] $ m[-1][1]);
}


for (i = (skip_first != 0); i < d; i+=1) {
m[i][0] = m[i][0] * mean / under_one;
}

m = m%0;
wts = parameters.SetStickBreakingDistributionWeigths (m[-1][1]);


if (skip_first) {
m_rest = m [{{1,0}}][{{d-1,1}}];
m_rest = m_rest % 0;
for (i = 1; i < d; i+=1) {
m[i][0] = m_rest[i-1][0];
m[i][1] = m_rest[i-1][1];
}
} else {
m = m%0;
}

//console.log (v);

wts = parameters.SetStickBreakingDistributionWeigths (m[-1][1]);


for (i = (skip_first != 0); i < d; i+=1) {
(v[((^distro)["rates"])[i]])[^"terms.fit.MLE"] = m[i][0];
}
for (i = (skip_first != 0); i < d-1; i+=1) {
(v[((^distro)["weights"])[i]])[^"terms.fit.MLE"] = wts[i];
}

//console.log (v);

//assert (0);
return v;

}
Expand Down Expand Up @@ -1383,7 +1400,7 @@ function busted.init_grid_setup (omega_distro, error_sink) {
busted.initial_grid [_name_] = {{100,500,1000,5000}};
busted.initial_ranges [_name_] = {
terms.lower_bound : 100,
terms.upper_bound : 10000
terms.upper_bound : 1000
};
} else {
busted.initial_ranges [_name_] = {
Expand Down Expand Up @@ -1416,12 +1433,12 @@ function busted.init_grid_setup (omega_distro, error_sink) {
if (error_sink && _index_ == 0) {
busted.initial_grid [_name_] = {
{
0, 0.001, 0.005, 0.1
0, 0.001, 0.0025, 0.025
}
};
busted.initial_ranges [_name_] = {
terms.lower_bound : 0,
terms.upper_bound : 0.01,
terms.upper_bound : 0.005,
};
} else {
busted.initial_ranges [_name_] = {
Expand Down
Loading
Loading