Skip to content

Commit

Permalink
LBFGS back to DNM, add EM-LBFGS as default
Browse files Browse the repository at this point in the history
  • Loading branch information
helske committed Nov 23, 2024
1 parent eedd993 commit e927dd6
Show file tree
Hide file tree
Showing 16 changed files with 567 additions and 118 deletions.
9 changes: 4 additions & 5 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,8 @@ permute_clusters <- function(model, pcp_mle) {
#' nonparametric or parametric bootstrap should be used. The former samples
#' sequences with replacement, whereas the latter simulates new datasets based
#' on the model.
#' @param method Estimation method used in bootstrapping. Defaults to `"LBFGS"`
#' as it is typically faster that `"EM"`, although `"EM"` can be more robust to
#' converge to global optimum at each bootstrap sample.
#' @param method Estimation method used in bootstrapping. Defaults to
#' `"EM-LBFGS"`.
#' @param ... Additional arguments to [estimate_nhmm()] or [estimate_mnhmm()].
#' @return The original model with additional element `model$boot`, which
#' contains A n * B matrix where n is the number of coefficients. The order of
Expand All @@ -91,7 +90,7 @@ bootstrap_coefs <- function(model, ...) {
#' @export
bootstrap_coefs.nhmm <- function(model, B = 1000,
type = c("nonparametric", "parametric"),
method = "LBFGS", ...) {
method = "EM-LBFGS", ...) {
type <- match.arg(type)
stopifnot_(
checkmate::test_int(x = B, lower = 0L),
Expand Down Expand Up @@ -148,7 +147,7 @@ bootstrap_coefs.nhmm <- function(model, B = 1000,
#' @export
bootstrap_coefs.mnhmm <- function(model, B = 1000,
type = c("nonparametric", "parametric"),
method = "LBFGS", ...) {
method = "EM-LBFGS", ...) {
type <- match.arg(type)
stopifnot_(
checkmate::test_int(x = B, lower = 0L),
Expand Down
5 changes: 3 additions & 2 deletions R/lbfgs_mnhmm.R → R/dnm_mnhmm.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lbfgs_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
control_restart, save_all_solutions) {
M <- model$n_symbols
S <- model$n_states
Expand Down Expand Up @@ -202,7 +202,8 @@ lbfgs_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
time = end_time - start_time,
lambda = lambda,
pseudocount = 0,
method = "LBFGS"
method = "DNM",
algorithm = control$algorithm
)
model
}
5 changes: 3 additions & 2 deletions R/lbfgs_nhmm.R → R/dnm_nhmm.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lbfgs_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
dnm_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
control_restart, save_all_solutions) {
M <- model$n_symbols
S <- model$n_states
Expand Down Expand Up @@ -162,7 +162,8 @@ lbfgs_nhmm <- function(model, inits, init_sd, restarts, lambda, control,
time = end_time - start_time,
lambda = lambda,
pseudocount = 0,
method = "LBFGS"
method = "DNM",
algorithm = control$algorithm
)
model
}
292 changes: 292 additions & 0 deletions R/em_lbfgs_mnhmm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
em_lbfgs_nhmm <- function(model, inits, init_sd, restarts, lambda, pseudocount,
control, control_restart, control_mstep,
save_all_solutions) {
M <- model$n_symbols
S <- model$n_states
T_ <- model$length_of_sequences
C <- model$n_channels
D <- model$n_clusters
n_i <- attr(model, "np_pi")
n_s <- attr(model, "np_A")
n_o <- attr(model, "np_B")
n_d <- attr(model, "np_omega")
icpt_only_omega <- attr(model$X_omega, "icpt_only")
icpt_only_pi <- attr(model$X_pi, "icpt_only")
icpt_only_A <- attr(model$X_A, "icpt_only")
icpt_only_B <- attr(model$X_B, "icpt_only")
iv_A <- attr(model$X_A, "iv")
iv_B <- attr(model$X_B, "iv")
tv_A <- attr(model$X_A, "tv")
tv_B <- attr(model$X_B, "tv")
X_omega <- model$X_omega
X_pi <- model$X_pi
X_A <- model$X_A
X_B <- model$X_B
K_omega <- nrow(X_omega)
K_pi <- nrow(X_pi)
K_A <- nrow(X_A)
K_B <- nrow(X_B)
Ti <- model$sequence_lengths
n_obs <- nobs(model)
need_grad <- grepl("NLOPT_LD_", control$algorithm)
obs <- create_obsArray(model)
if (C == 1) {
obs <- array(obs, dim(obs)[2:3])
}
all_solutions <- NULL
if (C == 1L) {
if (need_grad) {
objectivef <- function(pars) {
eta_pi <- create_eta_pi_mnhmm(pars[seq_len(n_i)], S, K_pi, D)
eta_A <- create_eta_A_mnhmm(pars[n_i + seq_len(n_s)], S, K_A, D)
eta_B <- create_eta_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
)
eta_omega <- create_eta_omega_mnhmm(
pars[n_i + n_s + n_o + seq_len(n_d)], D, K_omega
)
out <- log_objective_mnhmm_singlechannel(
eta_omega, X_omega, eta_pi, X_pi, eta_A, X_A, eta_B, X_B,
obs, Ti, icpt_only_omega, icpt_only_pi, icpt_only_A, icpt_only_B,
iv_A, iv_B, tv_A, tv_B
)
list(
objective = - (out$loglik - 0.5 * lambda * sum(pars^2)) / n_obs,
gradient = - (unlist(out[-1]) - lambda * pars) / n_obs
)
}
} else {
objectivef <- function(pars) {
eta_pi <- create_eta_pi_mnhmm(pars[seq_len(n_i)], S, K_pi, D)
eta_A <- create_eta_A_mnhmm(pars[n_i + seq_len(n_s)], S, K_A, D)
eta_B <- create_eta_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
)
eta_omega <- create_eta_omega_mnhmm(
pars[n_i + n_s + n_o + seq_len(n_d)], D, K_omega
)
out <- forward_mnhmm_singlechannel(
eta_omega, X_omega, eta_pi, X_pi, eta_A, X_A, eta_B, X_B,
obs, Ti, icpt_only_omega, icpt_only_pi, icpt_only_A, icpt_only_B,
iv_A, iv_B, tv_A, tv_B
)

- (sum(apply(out[, T_, ], 2, logSumExp)) - 0.5 * lambda * sum(pars^2)) / n_obs
}
}
} else {
if (need_grad) {
objectivef <- function(pars) {
eta_pi <- create_eta_pi_mnhmm(pars[seq_len(n_i)], S, K_pi, D)
eta_A <- create_eta_A_mnhmm(
pars[n_i + seq_len(n_s)],
S, K_A, D
)
eta_B <- unlist(
create_eta_multichannel_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
),
recursive = FALSE
)
eta_omega <- create_eta_omega_mnhmm(
pars[n_i + n_s + n_o + seq_len(n_d)], D, K_omega
)
out <- log_objective_mnhmm_multichannel(
eta_omega, X_omega, eta_pi, X_pi, eta_A, X_A, eta_B, X_B,
obs, Ti, icpt_only_omega, icpt_only_pi, icpt_only_A, icpt_only_B,
iv_A, iv_B, tv_A, tv_B
)
list(
objective = - (out$loglik - 0.5 * lambda * sum(pars^2)) / n_obs,
gradient = - (unlist(out[-1]) - lambda * pars) / n_obs
)
}
} else {
objectivef <- function(pars) {
eta_pi <- create_eta_pi_mnhmm(pars[seq_len(n_i)], S, K_pi, D)
eta_A <- create_eta_A_mnhmm(
pars[n_i + seq_len(n_s)], S, K_A, D
)
eta_B <- unlist(
create_eta_multichannel_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
),
recursive = FALSE
)
eta_omega <- create_eta_omega_mnhmm(
pars[n_i + n_s + n_o + seq_len(n_d)], D, K_omega
)
out <- forward_mnhmm_multichannel(
eta_omega, X_omega, eta_pi, X_pi, eta_A, X_A, eta_B, X_B,
obs, Ti, icpt_only_omega, icpt_only_pi, icpt_only_A, icpt_only_B,
iv_A, iv_B, tv_A, tv_B
)

- (sum(apply(out[, T_, ], 2, logSumExp)) - 0.5 * lambda * sum(pars^2)) / n_obs
}
}
}

start_time <- proc.time()
if (restarts > 0L) {
p <- progressr::progressor(along = seq_len(restarts))
out <- future.apply::future_lapply(seq_len(restarts), function(i) {
init <- create_initial_values(inits, model, init_sd)
if (C == 1) {
fit <- EM_LBFGS_mnhmm_singlechannel(
init$omega, model$X_omega, init$pi, model$X_pi, init$A, model$X_A,
init$B, model$X_B, obs, Ti, icpt_only_omega, icpt_only_pi,
icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B,
n_obs, control_restart$maxeval,
control_restart$ftol_abs, control_restart$ftol_rel,
control_restart$xtol_abs, control_restart$xtol_rel,
control_restart$print_level, control_mstep$maxeval,
control_mstep$ftol_abs, control_mstep$ftol_rel,
control_mstep$xtol_abs, control_mstep$xtol_rel,
control_mstep$print_level, lambda, pseudocount)
} else {
eta_B <- unlist(init$B, recursive = FALSE)
fit <- EM_LBFGS_mnhmm_multichannel(
init$omega, model$X_omega, init$pi, model$X_pi, init$A, model$X_A,
eta_B, model$X_B, obs, Ti, icpt_only_omega, icpt_only_pi,
icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B,
n_obs, control_restart$maxeval,
control_restart$ftol_abs, control_restart$ftol_rel,
control_restart$xtol_abs, control_restart$xtol_rel,
control_restart$print_level, control_mstep$maxeval,
control_mstep$ftol_abs, control_mstep$ftol_rel,
control_mstep$xtol_abs, control_mstep$xtol_rel,
control_mstep$print_level, lambda, pseudocount)
}
if (fit$return_code == 0) {
init <- unlist(
create_initial_values(
stats::setNames(
fit[c("eta_omega", "eta_pi", "eta_A", "eta_B")],
c("omega", "pi", "A", "B")
),
model,
init_sd = 0
)
)
fit <- nloptr(
x0 = init, eval_f = objectivef,
opts = control_restart
)
p()
fit
} else {
list(status = fit$return_code, objective = Inf)
}
},
future.seed = TRUE)

logliks <- -unlist(lapply(out, "[[", "objective")) * n_obs
return_codes <- unlist(lapply(out, "[[", "status"))
successful <- which(return_codes > 0)
if (length(successful) == 0) {
warning_(
c("All optimizations terminated due to error.",
"Error of first restart: ", error_msg(return_codes[1]))
)
}
optimum <- successful[which.max(logliks[successful])]
init <- out[[optimum]]$solution
if (save_all_solutions) {
all_solutions <- out
}
} else {
init <- create_initial_values(inits, model, init_sd)
}
if (C == 1) {
out <- EM_LBFGS_mnhmm_singlechannel(
init$omega, model$X_omega, init$pi, model$X_pi, init$A, model$X_A,
init$B, model$X_B, obs, Ti, icpt_only_omega, icpt_only_pi,
icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B,
n_obs, control$maxeval,
control$ftol_abs, control$ftol_rel,
control$xtol_abs, control$xtol_rel,
control$print_level, control_mstep$maxeval,
control_mstep$ftol_abs, control_mstep$ftol_rel,
control_mstep$xtol_abs, control_mstep$xtol_rel,
control_mstep$print_level, lambda, pseudocount)
} else {
eta_B <- unlist(init$B, recursive = FALSE)
out <- EM_LBFGS_mnhmm_multichannel(
init$omega, model$X_omega, init$pi, model$X_pi, init$A, model$X_A,
eta_B, model$X_B, obs, Ti, icpt_only_omega, icpt_only_pi,
icpt_only_A, icpt_only_B, iv_A, iv_B, tv_A, tv_B,
n_obs, control$maxeval,
control$ftol_abs, control$ftol_rel,
control$xtol_abs, control$xtol_rel,
control$print_level, control_mstep$maxeval,
control_mstep$ftol_abs, control_mstep$ftol_rel,
control_mstep$xtol_abs, control_mstep$xtol_rel,
control_mstep$print_level, lambda, pseudocount)
}
if (out$return_code == 0) {
init <- unlist(
create_initial_values(
stats::setNames(
fit[c("eta_omega", "eta_pi", "eta_A", "eta_B")],
c("omega", "pi", "A", "B")
),
model,
init_sd = 0
)
)
} else {
warning_(
paste("EM-step terminated due to error:", error_msg(out$return_code),
"Running L-BFGS using initial values for EM.")
)
init <- unlist(create_initial_values(inits, model, init_sd))
}
out <- nloptr(
x0 = init, eval_f = objectivef,
opts = control
)
end_time <- proc.time()
if (out$status < 0) {
warning_(
paste("Optimization terminated due to error:", error_msg(out$status))
)
}

pars <- out$solution
model$etas$pi <- create_eta_pi_mnhmm(pars[seq_len(n_i)], S, K_pi, D)
model$gammas$pi <- c(eta_to_gamma_mat_field(model$etas$pi))
model$etas$A <- create_eta_A_mnhmm(pars[n_i + seq_len(n_s)], S, K_A, D)
model$gammas$A <- c(eta_to_gamma_cube_field(model$etas$A))
if (C == 1L) {
model$etas$B <- create_eta_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
)
model$gammas$B <- c(eta_to_gamma_cube_field(model$etas$B))
} else {
model$etas$B <- create_eta_multichannel_B_mnhmm(
pars[n_i + n_s + seq_len(n_o)], S, M, K_B, D
)
l <- lengths(model$etas$B)
gamma_B <- c(eta_to_gamma_cube_field(unlist(model$etas$B, recursive = FALSE)))
model$gammas$B <- unname(split(gamma_B, rep(seq_along(l), l)))
}
model$etas$omega <- create_eta_omega_mnhmm(
pars[n_i + n_s + n_o + seq_len(n_d)], D, K_omega
)
model$gammas$omega <- eta_to_gamma_mat(model$etas$omega)
model$estimation_results <- list(
loglik = -out$objective * n_obs,
return_code = out$status,
message = out$message,
iterations = out$iterations,
logliks_of_restarts = if(restarts > 0L) logliks else NULL,
return_codes_of_restarts = if(restarts > 0L) return_codes else NULL,
all_solutions = all_solutions,
time = end_time - start_time,
lambda = lambda,
pseudocount = 0,
method = "EM-LBFGS"
)
model
}
Loading

0 comments on commit e927dd6

Please sign in to comment.