From a14aef55ec145af282acb233c7524c8a97be9a2c Mon Sep 17 00:00:00 2001 From: Jouni Helske Date: Wed, 4 Sep 2024 23:02:14 +0300 Subject: [PATCH] fix docs and imports etc --- NAMESPACE | 6 ++++ R/average_marginal_prediction.R | 5 +++- R/estimate_nhmm.R | 1 + R/forwardBackward.R | 1 + R/get_coefs.R | 1 + R/get_probs.R | 3 ++ R/hidden_paths.R | 3 +- R/most_probable_cluster.R | 7 +++-- R/plot.amp.R | 12 ++++++-- R/posterior_probs.R | 1 + R/print.R | 2 +- R/seqHMM-deprecated.R | 25 ---------------- R/seqHMM-package.R | 5 +++- R/utilities.R | 2 +- man/coef.Rd | 2 ++ man/estimate_mnhmm.Rd | 2 ++ man/estimate_nhmm.Rd | 2 ++ man/forward_backward.Rd | 2 ++ man/get_probs.Rd | 5 ++++ man/hidden_paths.Rd | 4 ++- man/plot.amp.Rd | 5 +++- man/posterior_probs.Rd | 2 ++ man/seqHMM-defunct.Rd | 51 --------------------------------- 23 files changed, 61 insertions(+), 88 deletions(-) delete mode 100644 man/seqHMM-defunct.Rd diff --git a/NAMESPACE b/NAMESPACE index 4987f334..756ab05b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -103,6 +103,8 @@ importFrom(TraMineR,seqlegend) importFrom(TraMineR,seqlength) importFrom(TraMineR,seqplot) importFrom(TraMineR,seqstatf) +importFrom(dplyr,across) +importFrom(dplyr,all_of) importFrom(future,multisession) importFrom(future,plan) importFrom(future,sequential) @@ -124,15 +126,19 @@ importFrom(graphics,strwidth) importFrom(graphics,text) importFrom(methods,hasArg) importFrom(numDeriv,jacobian) +importFrom(rlang,":=") importFrom(rstan,optimizing) importFrom(rstantools,rstan_config) importFrom(stats,BIC) importFrom(stats,cmdscale) +importFrom(stats,coef) importFrom(stats,complete.cases) importFrom(stats,logLik) importFrom(stats,model.matrix) +importFrom(stats,predict) importFrom(stats,rnorm) importFrom(stats,runif) +importFrom(stats,update) importFrom(stats,vcov) importFrom(utils,menu) useDynLib(seqHMM, .registration = TRUE) diff --git a/R/average_marginal_prediction.R b/R/average_marginal_prediction.R index d9a0b66f..44dc21bd 100644 --- a/R/average_marginal_prediction.R +++ b/R/average_marginal_prediction.R @@ -16,6 +16,9 @@ average_marginal_prediction <- function( model, variable, values, marginalize_B_over = "sequences", newdata = NULL, nsim = 0, probs = c(0.025, 0.5, 0.975)) { + # avoid warnings of NSEs + cluster <- state <- estimate <- state_from <- state_to <- time_var <- + channel <- observation <- NULL stopifnot_( inherits(model, "nhmm") || inherits(model, "mnhmm"), "Argument {.arg model} must be a {.cls nhmm} or {.cls mnhmm} object." @@ -106,7 +109,7 @@ average_marginal_prediction <- function( estimate = unlist(pred$pi) ) |> dplyr::group_by(cluster, state) |> - summarise(estimate = mean(estimate)) + dplyr::summarise(estimate = mean(estimate)) A <- data.frame( cluster = rep(model$cluster_names, each = S^2 * T * N), diff --git a/R/estimate_nhmm.R b/R/estimate_nhmm.R index a9c6e51f..9577a83a 100644 --- a/R/estimate_nhmm.R +++ b/R/estimate_nhmm.R @@ -9,6 +9,7 @@ #' an `stslist` object (see [TraMineR::seqdef()]) containing the #' sequences. In case of multichannel data, `observations` should be a vector #' of response variable names in `data`, or a list of `stslist` objects. +#' @param n_states A positive integer defining the number of hidden states. #' @param initial_formula of class [formula()] for the #' initial state probabilities. #' @param transition_formula of class [formula()] for the diff --git a/R/forwardBackward.R b/R/forwardBackward.R index 6e0f8108..a08f8f2b 100644 --- a/R/forwardBackward.R +++ b/R/forwardBackward.R @@ -16,6 +16,7 @@ #' @param as_data_frame If `TRUE` (default), the output is returned as a #' data.frame. Otherwise, a list of array(s) is returned. Ignored if #' `log_space` is `FALSE`, in which case list of arrays is always returned. +#' @param ... Ignored. #' @return If `as_data_frame` is `TRUE` a `data.frame` with #' log-values of forward and backward probabilities. If `FALSE` or #' `log_space = FALSE`, a list with components diff --git a/R/get_coefs.R b/R/get_coefs.R index a7a783b8..7ef0d9ea 100644 --- a/R/get_coefs.R +++ b/R/get_coefs.R @@ -8,6 +8,7 @@ #' estimates are returned. #' @param probs Vector defining the quantiles of interest. Default is #' `c(0.025, 0.5, 0.975)`. +#' @param ... Ignored. #' @rdname coef #' @export coef.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) { diff --git a/R/get_probs.R b/R/get_probs.R index 4c4ae4eb..eb54930e 100644 --- a/R/get_probs.R +++ b/R/get_probs.R @@ -2,12 +2,15 @@ #' or MNHMM #' #' @param model An object of class `nhmm` or `mnhmm`. +#' @param newdata An optional data frame containing the new data to be used in +#' computing the probabilities. #' @param nsim Non-negative integer defining the number of samples from the #' normal approximation of the model parameters used in #' computing the approximate quantiles of the estimates. If `0`, only point #' estimates are returned. #' @param probs Vector defining the quantiles of interest. Default is #' `c(0.025, 0.5, 0.975)`. +#' @param ... Ignored. #' @rdname get_probs #' @export get_probs <- function(model, ...) { diff --git a/R/hidden_paths.R b/R/hidden_paths.R index d2c9bffd..21ed5060 100644 --- a/R/hidden_paths.R +++ b/R/hidden_paths.R @@ -7,7 +7,8 @@ #' @param model A hidden Markov model. #' @param respect_void If `TRUE` (default), states at the time points #' corresponding to `TraMineR`'s void in the observed sequences are set to void -#' in the hidden state sequences as well.#' +#' in the hidden state sequences as well. +#' @param ... Ignored. #' @return The most probable paths of hidden states as an `stslist` object #' (see [seqdef()]). The log-probability is included as an attribute #' `log_prop`. diff --git a/R/most_probable_cluster.R b/R/most_probable_cluster.R index 877ca4d6..0ce3d062 100644 --- a/R/most_probable_cluster.R +++ b/R/most_probable_cluster.R @@ -22,11 +22,14 @@ most_probable_cluster <- function(x, type = "viterbi", hp) { posterior_cluster_probabilities <- function(x) { pp <- posterior_probs(x, as_data_frame = FALSE) posterior_cluster_probabilities <- matrix(0, x$n_sequences, x$n_clusters) - n_states <- rep(x$n_states, length.out = x$n_channels) + n_states <- rep(x$n_states, length.out = x$n_clusters) p <- 0 for (i in seq_len(x$n_clusters)) { posterior_cluster_probabilities[, i] <- - colSums(pp[(p + 1):(p + n_states[i]), 1, ]) + colSums(array( + pp[(p + 1):(p + n_states[i]), 1, ], + dim = c(n_states[i], x$n_sequences) + )) p <- p + n_states[i] } dimnames(posterior_cluster_probabilities) <- list( diff --git a/R/plot.amp.R b/R/plot.amp.R index a3fb9390..906522a1 100644 --- a/R/plot.amp.R +++ b/R/plot.amp.R @@ -1,12 +1,14 @@ #' Visualize Average Marginal Effects #' -#' @importFrom ggplot2 ggplot aes geom_pointrange geom_ribbon geom_line facet_wrap -#' @param x Output from [amp()]. +#' @param x Output from [average_marginal_prediction()]. +#' @param type Type of plot to create. One of `"initial"`, `"transition"`, +#' `"emission"`, or `"cluster"`. #' @param alpha Transparency level for [ggplot2::geom_ribbon()]. plot.amp <- function(x, type, probs = c(0.025, 0.975), alpha = 0.25) { type <- match.arg(type, c("initial", "transition", "emission", "cluster")) - cluster <- time <- state <- state_from <- state_to <- observation <- NULL + cluster <- time <- state <- state_from <- state_to <- observation <- + estimate <- NULL stopifnot_( checkmate::test_numeric( x = probs, lower = 0, upper = 1, any.missing = FALSE, min.len = 2L, @@ -17,6 +19,10 @@ plot.amp <- function(x, type, probs = c(0.025, 0.975), alpha = 0.25) { ) lwr <- paste0("q", 100 * probs[1]) upr <- paste0("q", 100 * probs[2]) + stopifnot_( + all(c(lwr, upr) %in% names(x$initial)), + "The probabilities in {.arg probs} are not available in the {.arg x}." + ) if (type == "initial") { p <- ggplot(x$initial, aes(estimate, state)) + geom_pointrange(aes(ymin = .data[[lwr]], ymax = .data[[upr]])) diff --git a/R/posterior_probs.R b/R/posterior_probs.R index da8620e4..49f2b743 100644 --- a/R/posterior_probs.R +++ b/R/posterior_probs.R @@ -10,6 +10,7 @@ #' non-homogenous models. #' @param as_data_frame If `TRUE` (default), the output is returned as a #' data.frame. Otherwise, a 3d array is returned. +#' @param ... Ignored. #' @return Posterior probabilities. In case of multiple observations, #' these are computed independently for each sequence. #' @examples diff --git a/R/print.R b/R/print.R index 1ceb0c1f..26c019c8 100644 --- a/R/print.R +++ b/R/print.R @@ -77,7 +77,7 @@ print.mhmm <- function(x, digits = 3, ...) { cat("\nNumber of clusters:", x$n_clusters) } } - cat("Coefficients :\n") + cat("\nCoefficients :\n") print(x$coefficients, digits = digits, ...) if (attr(x, "type") != "lcm") { diff --git a/R/seqHMM-deprecated.R b/R/seqHMM-deprecated.R index a0ee5e63..e2807a9b 100644 --- a/R/seqHMM-deprecated.R +++ b/R/seqHMM-deprecated.R @@ -8,28 +8,3 @@ #' #' @name seqHMM-deprecated NULL - -#' Defunct function(s) in the seqHMM package -#' -#' These functions are no longer available in the seqHMM package. -#' -#' @name seqHMM-defunct -fit_hmm <- function( - model, em_step = TRUE, global_step = FALSE, local_step = FALSE, - control_em = list(), control_global = list(), control_local = list(), - lb, ub, threads = 1, log_space = FALSE, ...) { - .Defunct("fit_model", package = "seqHMM") -} -#' @name seqHMM-defunct -fit_mhmm <- function( - model, em_step = TRUE, global_step = FALSE, local_step = FALSE, - control_em = list(), control_global = list(), control_local = list(), - lb, ub, threads = 1, log_space = FALSE, ...) { - .Defunct("fit_model", package = "seqHMM") -} -#' @name seqHMM-defunct -trim_hmm <- function( - model, maxit = 0, return_loglik = FALSE, zerotol = 1e-8, - verbose = TRUE, ...) { - .Defunct("trim_model", package = "seqHMM") -} diff --git a/R/seqHMM-package.R b/R/seqHMM-package.R index 4142e6de..f7bc8bdf 100644 --- a/R/seqHMM-package.R +++ b/R/seqHMM-package.R @@ -30,12 +30,15 @@ #' @importFrom RcppParallel RcppParallelLibs CxxFlags #' @importFrom Rcpp loadModule evalCpp #' @importFrom Matrix .bdiag -#' @importFrom stats logLik cmdscale complete.cases model.matrix BIC rnorm runif vcov +#' @importFrom stats logLik cmdscale complete.cases model.matrix BIC rnorm runif vcov predict update coef #' @importFrom TraMineR alphabet seqstatf seqdef seqlegend seqdist seqdistmc seqplot seqlength is.stslist #' @importFrom grDevices col2rgb rainbow #' @importFrom graphics barplot par plot plot.new polygon strwidth text #' @importFrom methods hasArg #' @importFrom utils menu +#' @importFrom ggplot2 ggplot aes geom_pointrange geom_ribbon geom_line facet_wrap +#' @importFrom rlang := +#' @importFrom dplyr across all_of #' @references Helske S. and Helske J. (2019). Mixture Hidden Markov Models for Sequence Data: The seqHMM Package in R, #' Journal of Statistical Software, 88(3), 1-32. doi:10.18637/jss.v088.i03 "_PACKAGE" diff --git a/R/utilities.R b/R/utilities.R index 1a2f1349..665627fb 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -185,7 +185,7 @@ stop_ <- function(message, ..., call = rlang::caller_env()) { #' @param f A formula object. #' @noRd intercept_only <- function(f) { - identical(deparse(stats::update(f, 0 ~ .)), "0 ~ 1") + identical(deparse(update(f, 0 ~ .)), "0 ~ 1") } #' Create obsArray for Various C++ functions #' diff --git a/man/coef.Rd b/man/coef.Rd index 2ef115a4..2a9b7d58 100644 --- a/man/coef.Rd +++ b/man/coef.Rd @@ -20,6 +20,8 @@ estimates are returned.} \item{probs}{Vector defining the quantiles of interest. Default is \code{c(0.025, 0.5, 0.975)}.} + +\item{...}{Ignored.} } \description{ Get the Estimated Regression Coefficients of Non-Homogeneous Hidden Markov diff --git a/man/estimate_mnhmm.Rd b/man/estimate_mnhmm.Rd index b8b40f26..200db8ae 100644 --- a/man/estimate_mnhmm.Rd +++ b/man/estimate_mnhmm.Rd @@ -33,6 +33,8 @@ an \code{stslist} object (see \code{\link[TraMineR:seqdef]{TraMineR::seqdef()}}) sequences. In case of multichannel data, \code{observations} should be a vector of response variable names in \code{data}, or a list of \code{stslist} objects.} +\item{n_states}{A positive integer defining the number of hidden states.} + \item{n_clusters}{A positive integer defining the number of clusters (mixtures).} diff --git a/man/estimate_nhmm.Rd b/man/estimate_nhmm.Rd index d9d9bddd..fe89c343 100644 --- a/man/estimate_nhmm.Rd +++ b/man/estimate_nhmm.Rd @@ -30,6 +30,8 @@ an \code{stslist} object (see \code{\link[TraMineR:seqdef]{TraMineR::seqdef()}}) sequences. In case of multichannel data, \code{observations} should be a vector of response variable names in \code{data}, or a list of \code{stslist} objects.} +\item{n_states}{A positive integer defining the number of hidden states.} + \item{initial_formula}{of class \code{\link[=formula]{formula()}} for the initial state probabilities.} diff --git a/man/forward_backward.Rd b/man/forward_backward.Rd index 5aaa1584..e9be258b 100644 --- a/man/forward_backward.Rd +++ b/man/forward_backward.Rd @@ -35,6 +35,8 @@ forward_backward(model, ...) \arguments{ \item{model}{A hidden Markov model.} +\item{...}{Ignored.} + \item{forward_only}{If \code{TRUE}, only forward probabilities are computed. The default is \code{FALSE}.} diff --git a/man/get_probs.Rd b/man/get_probs.Rd index 2f44bbdb..d316ee31 100644 --- a/man/get_probs.Rd +++ b/man/get_probs.Rd @@ -16,6 +16,11 @@ get_probs(model, ...) \arguments{ \item{model}{An object of class \code{nhmm} or \code{mnhmm}.} +\item{...}{Ignored.} + +\item{newdata}{An optional data frame containing the new data to be used in +computing the probabilities.} + \item{nsim}{Non-negative integer defining the number of samples from the normal approximation of the model parameters used in computing the approximate quantiles of the estimates. If \code{0}, only point diff --git a/man/hidden_paths.Rd b/man/hidden_paths.Rd index f2273713..d9a75b13 100644 --- a/man/hidden_paths.Rd +++ b/man/hidden_paths.Rd @@ -21,9 +21,11 @@ hidden_paths(model, ...) \arguments{ \item{model}{A hidden Markov model.} +\item{...}{Ignored.} + \item{respect_void}{If \code{TRUE} (default), states at the time points corresponding to \code{TraMineR}'s void in the observed sequences are set to void -in the hidden state sequences as well.#'} +in the hidden state sequences as well.} } \value{ The most probable paths of hidden states as an \code{stslist} object diff --git a/man/plot.amp.Rd b/man/plot.amp.Rd index 7d847f07..4a97341a 100644 --- a/man/plot.amp.Rd +++ b/man/plot.amp.Rd @@ -7,7 +7,10 @@ \method{plot}{amp}(x, type, probs = c(0.025, 0.975), alpha = 0.25) } \arguments{ -\item{x}{Output from \code{\link[=amp]{amp()}}.} +\item{x}{Output from \code{\link[=average_marginal_prediction]{average_marginal_prediction()}}.} + +\item{type}{Type of plot to create. One of \code{"initial"}, \code{"transition"}, +\code{"emission"}, or \code{"cluster"}.} \item{alpha}{Transparency level for \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_ribbon()}}.} } diff --git a/man/posterior_probs.Rd b/man/posterior_probs.Rd index b18e900a..e95e67ac 100644 --- a/man/posterior_probs.Rd +++ b/man/posterior_probs.Rd @@ -21,6 +21,8 @@ posterior_probs(model, ...) \arguments{ \item{model}{A hidden Markov model.} +\item{...}{Ignored.} + \item{log_space}{Internally compute posterior probabilities in logarithmic scale. The default is \code{TRUE}, which is also only option for non-homogenous models.} diff --git a/man/seqHMM-defunct.Rd b/man/seqHMM-defunct.Rd deleted file mode 100644 index 7d0f18ee..00000000 --- a/man/seqHMM-defunct.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seqHMM-deprecated.R -\name{seqHMM-defunct} -\alias{seqHMM-defunct} -\alias{fit_hmm} -\alias{fit_mhmm} -\alias{trim_hmm} -\title{Defunct function(s) in the seqHMM package} -\usage{ -fit_hmm( - model, - em_step = TRUE, - global_step = FALSE, - local_step = FALSE, - control_em = list(), - control_global = list(), - control_local = list(), - lb, - ub, - threads = 1, - log_space = FALSE, - ... -) - -fit_mhmm( - model, - em_step = TRUE, - global_step = FALSE, - local_step = FALSE, - control_em = list(), - control_global = list(), - control_local = list(), - lb, - ub, - threads = 1, - log_space = FALSE, - ... -) - -trim_hmm( - model, - maxit = 0, - return_loglik = FALSE, - zerotol = 1e-08, - verbose = TRUE, - ... -) -} -\description{ -These functions are no longer available in the seqHMM package. -}