docs: correct usage of default / initial value (#291)

* docs: correct usage of default / initial value * fix workflow * fix gh actions * typo * ... * try and fix workflow * typo * try to fix ci * don't use pip * ... * fix rfsrc learners * typo * fix typos * Update R/learner_lightgbm_regr_lightgbm.R * document * fix parameter tests * style
mlr-org · Sep 11, 2023 · 7ff1264 · 7ff1264
1 parent 32cf522
commit 7ff1264
Show file tree

Hide file tree

Showing 40 changed files with 155 additions and 275 deletions.
diff --git a/.github/workflows/rcmdcheck.yml b/.github/workflows/rcmdcheck.yml
@@ -56,11 +56,12 @@ jobs:
 
       - name: Install Python
         run: |
-          pak::pkg_install('rstudio/reticulate')
+          pak::pkg_install('reticulate')
           reticulate::install_miniconda()
+          reticulate::use_condaenv('r-reticulate')
           install.packages('keras')
-          keras::install_keras(extra_packages = c('IPython', 'requests', 'certifi', 'urllib3', 'tensorflow-hub', 'tabnet'))
-          reticulate::py_install(c('torch', 'pycox'), pip = TRUE)
+          keras::install_keras(extra_packages = c('IPython', 'requests', 'certifi', 'urllib3', 'tensorflow-hub', 'tabnet'), method = "conda")
+          reticulate::py_install(c('pytorch', 'pycox'), method = "conda")
         shell: Rscript {0}
 
       - uses: r-lib/actions/check-r-package@v2
diff --git a/R/learner_LiblineaR_regr_liblinear.R b/R/learner_LiblineaR_regr_liblinear.R
@@ -15,10 +15,10 @@
 #' * `type = 12` – L2-regularized L2-loss support vector regression (dual)
 #' * `type = 13` – L2-regularized L1-loss support vector regression (dual)
 #'
-#' @section Custom mlr3 defaults:
+#' @section Initial parameter values:
 #' - `svr_eps`:
 #'   - Actual default: `NULL`
-#'   - Adjusted default: 0.001
+#'   - Initial value: 0.001
 #'   - Reason for change: `svr_eps` is type dependent and the "type" is handled
 #'   by the mlr3learner. The default value is set to th default of the respective
 #'   "type".

diff --git a/R/learner_dbarts_regr_bart.R b/R/learner_dbarts_regr_bart.R
@@ -9,19 +9,16 @@
 #' @template learner
 #' @templateVar id regr.bart
 #'
-#' @section Initial parameter values:
+#' @section Custom mlr3 parameters:
 #' * Parameter: offset
 #'   * The parameter is removed, because only `dbarts::bart2` allows an offset during training,
 #'     and therefore the offset parameter in `dbarts:::predict.bart` is irrelevant for
 #'     `dbarts::dbart`.
 #' * Parameter: nchain, combineChains, combinechains
 #'   * The parameters are removed as parallelization of multiple models is handled by future.
 #'
-#' @section Custom mlr3 defaults:
-#' * Parameter: keeptrees
-#'   * Original: FALSE
-#'   * New: TRUE
-#'   * Reason: Required for prediction
+#' @section Initial parameter values:
+#' * `keeptrees` is initialized to `TRUE` because it is required for prediction.
 #'
 #' @references
 #' `r format_bib("sparapani2021nonparametric", "chipman2010bart")`
@@ -71,7 +68,7 @@ LearnerRegrBart = R6Class("LearnerRegrBart",
         packages = c("mlr3extralearners", "dbarts"),
         feature_types = c("integer", "numeric", "factor", "ordered"),
         # TODO: add "se" to the list of predict types.
-        predict_types = c("response"),
+        predict_types = "response",
         param_set = ps,
         properties = c("weights"),
         man = "mlr3extralearners::mlr_learners_regr.bart",

diff --git a/R/learner_flexsurv_surv_flexible.R b/R/learner_flexsurv_surv_flexible.R
@@ -21,10 +21,10 @@
 #' and covariates \eqn{X^T = (X_0,...,X_P)^T}{X^T = (X0,...,XP)^T}, where \eqn{X_0}{X0} is a column
 #' of \eqn{1}s: \eqn{lp = \beta X}{lp = \betaX}.
 #'
-#' @section Custom mlr3 defaults:
+#' @section Initial parameter values:
 #' - `k`:
 #'   - Actual default: `0`
-#'   - Adjusted default: `1`
+#'   - Initial value: `1`
 #'   - Reason for change: The default value of `0` is equivalent to, and a much less efficient
 #'   implementation of, [LearnerSurvParametric].
 #'

diff --git a/R/learner_gbm_classif_gbm.R b/R/learner_gbm_classif_gbm.R
@@ -9,16 +9,9 @@
 #' @template learner
 #' @templateVar id classif.gbm
 #'
-#' @section Custom mlr3 defaults:
-#' - `keep.data`:
-#'   - Actual default: TRUE
-#'   - Adjusted default: FALSE
-#'   - Reason for change: `keep.data = FALSE` saves memory during model fitting.
-#' - `n.cores`:
-#'   - Actual default: NULL
-#'   - Adjusted default: 1
-#'   - Reason for change: Suppressing the automatic internal parallelization if
-#'     `cv.folds` > 0.
+#' @section Initial parameter values:
+#' - `keep.data` is initialized to `FALSE` to save memory.
+#' - `n.cores` is initialized to 1 to avoid conflicts with parallelization through future.
 #'
 #' @references
 #' `r format_bib("friedman2002stochastic")`

diff --git a/R/learner_glmnet_surv_cv_glmnet.R b/R/learner_glmnet_surv_cv_glmnet.R
@@ -6,8 +6,8 @@
 #' Generalized linear models with elastic net regularization.
 #' Calls [glmnet::cv.glmnet()] from package \CRANpkg{glmnet}.
 #'
-#' @section Custom mlr3 defaults:
-#' - `family` The default is set to `"cox"`.
+#' @section Custom mlr3 parameters:
+#' - `family` is set to `"cox"` and cannot be changed.
 #'
 #' @templateVar id surv.cv_glmnet
 #' @template learner

diff --git a/R/learner_glmnet_surv_glmnet.R b/R/learner_glmnet_surv_glmnet.R
@@ -6,8 +6,8 @@
 #' Generalized linear models with elastic net regularization.
 #' Calls [glmnet::glmnet()] from package \CRANpkg{glmnet}.
 #'
-#' @section Custom mlr3 defaults:
-#' - `family` The default is set to `"cox"`.
+# @section Custom mlr3 parameters:
+#' - `family` is set to `"cox"` and cannot be changed.
 #'
 #' @details
 #' Caution: This learner is different to learners calling [glmnet::cv.glmnet()]

diff --git a/R/learner_lightgbm_classif_lightgbm.R b/R/learner_lightgbm_classif_lightgbm.R
@@ -14,27 +14,27 @@
 #' @templateVar id classif.lightgbm
 #'
 #' @section Initial parameter values:
-#' * `convert_categorical`:
-#'   Additional parameter. If this parameter is set to `TRUE` (default), all factor and logical
-#'   columns are converted to integers and the parameter categorical_feature of lightgbm is set to
-#'   those columns.
-#' * `num_class`:
-#'  This parameter is automatically inferred for multiclass tasks and does not have to be set.
-#' @section Custom mlr3 defaults:
 #' * `num_threads`:
 #'   * Actual default: 0L
-#'   * Adjusted default: 1L
+#'   * Initial value: 1L
 #'   * Reason for change: Prevents accidental conflicts with `future`.
 #' * `verbose`:
 #'   * Actual default: 1L
-#'   * Adjusted default: -1L
+#'   * Initial value: -1L
 #'   * Reason for change: Prevents accidental conflicts with mlr messaging system.
+#' @section Custom mlr3 defaults:
 #' * `objective`:
-#'   Depending if the task is binary / multiclass, the default is set to `"binary"` or
+#'   Depending if the task is binary / multiclass, the default is `"binary"` or
 #'   `"multiclasss"`.
 #' @section Custom mlr3 parameters:
 #' * `early_stopping`
 #'   Whether to use the test set for early stopping. Default is `FALSE`.
+#' * `convert_categorical`:
+#'   Additional parameter. If this parameter is set to `TRUE` (default), all factor and logical
+#'   columns are converted to integers and the parameter categorical_feature of lightgbm is set to
+#'   those columns.
+#' * `num_class`:
+#'  This parameter is automatically inferred for multiclass tasks and does not have to be set.
 #'
 #' @references
 #' `r format_bib("ke2017lightgbm")`

diff --git a/R/learner_lightgbm_regr_lightgbm.R b/R/learner_lightgbm_regr_lightgbm.R
@@ -14,23 +14,22 @@
 #' @templateVar id regr.lightgbm
 #'
 #' @section Initial parameter values:
-#' * `convert_categorical`:
-#'   Additional parameter. If this parameter is set to `TRUE` (default), all factor and logical
-#'   columns are converted to integers and the parameter categorical_feature of lightgbm is set to
-#'   those columns.
-#' @section Custom mlr3 defaults:
 #' * `num_threads`:
 #'   * Actual default: 0L
-#'   * Adjusted default: 1L
+#'   * Initital value: 1L
 #'   * Reason for change: Prevents accidental conflicts with `future`.
 #' * `verbose`:
 #'   * Actual default: 1L
-#'   * Adjusted default: -1L
+#'   * Initial value: -1L
 #'   * Reason for change: Prevents accidental conflicts with mlr messaging system.
 #'
 #' @section Custom mlr3 parameters:
 #' * `early_stopping`
 #'   Whether to use the test set for early stopping. Default is `FALSE`.
+#' * `convert_categorical`:
+#'   Additional parameter. If this parameter is set to `TRUE` (default), all factor and logical
+#'   columns are converted to integers and the parameter categorical_feature of lightgbm is set to
+#'   those columns.
 #'
 #' @references
 #' `r format_bib("ke2017lightgbm")`
@@ -199,7 +198,7 @@ LearnerRegrLightGBM = R6Class("LearnerRegrLightGBM",
         id = "regr.lightgbm",
         packages = c("mlr3extralearners", "lightgbm"),
         feature_types = c("numeric", "integer", "logical", "factor"),
-        predict_types = c("response"),
+        predict_types = "response",
         param_set = ps,
         properties = c("weights", "missings", "importance", "hotstart_forward"),
         man = "mlr3extralearners::mlr_learners_regr.lightgbm",

diff --git a/R/learner_mboost_regr_gamboost.R b/R/learner_mboost_regr_gamboost.R
@@ -52,7 +52,7 @@ LearnerRegrGAMBoost = R6Class("LearnerRegrGAMBoost",
         id = "regr.gamboost",
         packages = c("mlr3extralearners", "mboost"),
         feature_types = c("integer", "numeric", "factor", "ordered"),
-        predict_types = c("response"),
+        predict_types = "response",
         param_set = ps,
         properties = "weights",
         man = "mlr3extralearners::mlr_learners_regr.gamboost",

diff --git a/R/learner_mboost_regr_glmboost.R b/R/learner_mboost_regr_glmboost.R
@@ -50,7 +50,7 @@ LearnerRegrGLMBoost = R6Class("LearnerRegrGLMBoost",
         id = "regr.glmboost",
         packages = c("mlr3extralearners", "mboost"),
         feature_types = c("integer", "numeric", "factor", "ordered"),
-        predict_types = c("response"),
+        predict_types = "response",
         param_set = ps,
         properties = "weights",
         man = "mlr3extralearners::mlr_learners_regr.glmboost",

diff --git a/R/learner_obliqueRSF_surv_obliqueRSF.R b/R/learner_obliqueRSF_surv_obliqueRSF.R
@@ -10,17 +10,14 @@
 #' @template learner
 #' @templateVar id surv.obliqueRSF
 #'
-#' @section Initial parameter values:
+#' @section Custom mlr3 parameters:
 #' - `mtry`:
 #'   - This hyperparameter can alternatively be set via the added hyperparameter `mtry_ratio`
 #'     as `mtry = max(ceiling(mtry_ratio * n_features), 1)`.
 #'     Note that `mtry` and `mtry_ratio` are mutually exclusive.
 #'
-#' @section Custom mlr3 defaults:
-#' - `verbose`:
-#'   - Actual default: `TRUE`
-#'   - Adjusted default: `FALSE`
-#'   - Reason for change: mlr3 already has it's own verbose set to `TRUE` by default
+#' @section Initial parameter values:
+#' - `verbose` is initialized to `FALSE`
 #'
 #' @references
 #' `r format_bib("jaeger_2019")`

diff --git a/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R b/R/learner_randomForestSRC_classif_imbalanced_rfsrc.R
@@ -6,21 +6,8 @@
 #' Imbalanced Random forest for classification between two classes.
 #' Calls [randomForestSRC::imbalanced.rfsrc()] from from \CRANpkg{randomForestSRC}.
 #'
-#' @section Custom mlr3 parameters:
-#' - `mtry`:
-#'   - This hyperparameter can alternatively be set via the added hyperparameter `mtry.ratio`
-#'     as `mtry = max(ceiling(mtry.ratio * n_features), 1)`.
-#'     Note that `mtry` and `mtry.ratio` are mutually exclusive.
-#' - `sampsize`:
-#'   - This hyperparameter can alternatively be set via the added hyperparameter `sampsize.ratio`
-#'     as `sampsize = max(ceiling(sampsize.ratio * n_obs), 1)`.
-#'     Note that `sampsize` and `sampsize.ratio` are mutually exclusive.
 #'
-#' @section Custom mlr3 defaults:
-#' - `cores`:
-#'   - Actual default: Auto-detecting the number of cores
-#'   - Adjusted default: 1
-#'   - Reason for change: Threading conflicts with explicit parallelization via \CRANpkg{future}.
+#' @inheritSection mlr_learners_classif.rfsrc Custom mlr3 parameters
 #'
 #' @templateVar id classif.imbalanced_rfsrc
 #' @template learner
@@ -101,12 +88,12 @@ LearnerClassifImbalancedRandomForestSRC = R6Class("LearnerClassifImbalancedRando
         do.trace = p_lgl(default = FALSE, tags = c("train", "predict")),
         statistics = p_lgl(default = FALSE, tags = c("train", "predict")),
         get.tree = p_uty(tags = "predict"),
-          outcome = p_fct(
-            default = "train", levels = c("train", "test"),
-            tags = "predict"),
-          ptn.count = p_int(default = 0L, lower = 0L, tags = "predict"),
-          cores = p_int(default = 1L, lower = 1L, tags = c("train", "predict", "threads")),
-          save.memory = p_lgl(default = FALSE, tags = "train"),
+        outcome = p_fct(
+          default = "train", levels = c("train", "test"),
+          tags = "predict"),
+        ptn.count = p_int(default = 0L, lower = 0L, tags = "predict"),
+        cores = p_int(default = 1L, lower = 1L, tags = c("train", "predict", "threads")),
+        save.memory = p_lgl(default = FALSE, tags = "train"),
         perf.type = p_fct(levels = c("gmean", "misclass", "brier", "none"), tags = "train") # nolint
       )
 
@@ -155,6 +142,7 @@ LearnerClassifImbalancedRandomForestSRC = R6Class("LearnerClassifImbalancedRando
       pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
       pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
       cores = pv$cores %??% 1L
+      pv$cores = NULL
 
       if ("weights" %in% task$properties) {
         pv$case.wt = as.numeric(task$weights$weight) # nolint
@@ -168,6 +156,7 @@ LearnerClassifImbalancedRandomForestSRC = R6Class("LearnerClassifImbalancedRando
       newdata = data.table::setDF(ordered_features(task, self))
       pars = self$param_set$get_values(tags = "predict")
       cores = pars$cores %??% 1L
+      pars$cores = NULL
       pred = invoke(predict,
                     object = self$model,
                     newdata = newdata,

diff --git a/R/learner_randomForestSRC_classif_rfsrc.R b/R/learner_randomForestSRC_classif_rfsrc.R
@@ -9,7 +9,7 @@
 #' @template learner
 #' @templateVar id classif.rfsrc
 #'
-#' @section Initial parameter values:
+#' @section Custom mlr3 parameters:
 #' - `mtry`:
 #'   - This hyperparameter can alternatively be set via the added hyperparameter `mtry.ratio`
 #'     as `mtry = max(ceiling(mtry.ratio * n_features), 1)`.
@@ -18,12 +18,8 @@
 #'   - This hyperparameter can alternatively be set via the added hyperparameter `sampsize.ratio`
 #'     as `sampsize = max(ceiling(sampsize.ratio * n_obs), 1)`.
 #'     Note that `sampsize` and `sampsize.ratio` are mutually exclusive.
-#'
-#' @section Custom mlr3 defaults:
 #' - `cores`:
-#'   - Actual default: Auto-detecting the number of cores
-#'   - Adjusted default: 1
-#'   - Reason for change: Threading conflicts with explicit parallelization via \CRANpkg{future}.
+#'     This value is set as the option `rf.cores` during training and is set to 1 by default.
 #'
 #' @references
 #' `r format_bib("breiman_2001")`
@@ -157,6 +153,7 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC",
       pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
       pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
       cores = pv$cores %??% 1L
+      pv$cores = NULL
 
       if ("weights" %in% task$properties) {
         pv$case.wt = as.numeric(task$weights$weight) # nolint
@@ -171,6 +168,8 @@ LearnerClassifRandomForestSRC = R6Class("LearnerClassifRandomForestSRC",
       newdata = data.table::setDF(ordered_features(task, self))
       pars = self$param_set$get_values(tags = "predict")
       cores = pars$cores %??% 1L
+      pars$cores = NULL
+
       pred = invoke(predict,
         object = self$model,
         newdata = newdata,

diff --git a/R/learner_randomForestSRC_regr_rfsrc.R b/R/learner_randomForestSRC_regr_rfsrc.R
@@ -9,7 +9,7 @@
 #' @template learner
 #' @templateVar id regr.rfsrc
 #'
-#' @inheritSection mlr_learners_classif.rfsrc Initial parameter values
+#' @inheritSection mlr_learners_classif.rfsrc Custom mlr3 parameters
 #'
 #' @references
 #' `r format_bib("breiman_2001")`
@@ -140,6 +140,7 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC",
       pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
       pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
       cores = pv$cores %??% 1L
+      pv$cores = NULL
 
       if ("weights" %in% task$properties) {
         pv$case.wt = as.numeric(task$weights$weight) # nolint
@@ -154,6 +155,7 @@ LearnerRegrRandomForestSRC = R6Class("LearnerRegrRandomForestSRC",
       newdata = ordered_features(task, self)
       pars = self$param_set$get_values(tags = "predict")
       cores = pars$cores %??% 1L
+      pars$cores = NULL
 
       list(
         response = invoke(predict,

diff --git a/R/learner_randomForestSRC_surv_rfsrc.R b/R/learner_randomForestSRC_surv_rfsrc.R
@@ -9,7 +9,7 @@
 #' @template learner
 #' @templateVar id surv.rfsrc
 #'
-#' @inheritSection mlr_learners_classif.rfsrc Initial parameter values
+#' @inheritSection mlr_learners_classif.rfsrc Custom mlr3 parameters
 #'
 #' @details
 #' [randomForestSRC::predict.rfsrc()] returns both cumulative hazard function (chf) and
@@ -150,6 +150,7 @@ delayedAssign(
         pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names))
         pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow)
         cores = pv$cores %??% 1L
+        pv$cores = NULL
 
         if ("weights" %in% task$properties) {
           pv$case.wt = as.numeric(task$weights$weight) # nolint
@@ -168,6 +169,7 @@ delayedAssign(
         pars_predict$estimator = NULL
         pars_predict$var.used = "FALSE"
         cores = pars_predict$cores %??% 1L # additionaly implemented by author
+        pars_predict$cores = NULL
 
         p = invoke(predict, object = self$model, newdata = newdata, .args = pars_predict,
           .opts = list(rf.cores = cores))

diff --git a/R/learner_randomForest_regr_randomForest.R b/R/learner_randomForest_regr_randomForest.R
@@ -52,7 +52,7 @@ LearnerRegrRandomForest = R6Class("LearnerRegrRandomForest",
         id = "regr.randomForest",
         packages = c("mlr3extralearners", "randomForest"),
         feature_types = c("integer", "numeric", "factor", "ordered", "logical"),
-        predict_types = c("response"),
+        predict_types = "response",
         param_set = ps,
         properties = c("weights", "importance", "oob_error"),
         man = "mlr3extralearners::mlr_learners_regr.randomForest",