From 457a23d1fd58ef415950ec31122e17303f19ab0e Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:12 +0200 Subject: [PATCH 01/30] Reapply "remove auxiliary code" This reverts commit f74d5cf26510fad7dd7f7683d64c2642a8093aa4. --- R/PipeOpPredClassifSurvDiscTime.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R index 1ec099a0b..ad31a7f13 100644 --- a/R/PipeOpPredClassifSurvDiscTime.R +++ b/R/PipeOpPredClassifSurvDiscTime.R @@ -68,7 +68,6 @@ PipeOpPredClassifSurvDiscTime = R6Class( cumprod(1 - data[data$id == unique_id, ][["dt_hazard"]]) }, numeric(rows_per_id))) - pred_list = list() unique_end_times = sort(unique(data$tend)) # coerce to distribution and crank pred_list = .surv_return(times = unique_end_times, surv = surv) @@ -77,8 +76,7 @@ PipeOpPredClassifSurvDiscTime = R6Class( # basically a slightly more complex unique() real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0] - # select last row for every id - data = as.data.table(data) + # select last row for every id => observed times id = ped_status = NULL # to fix note data = data[, .SD[.N, list(ped_status)], by = id] From 6753d2e550facc6c4f189e8c14e99ea84fc07b0d Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:14 +0200 Subject: [PATCH 02/30] Reapply "add dictionary sections in docs" This reverts commit 6c502462422500cd8c0315294e9b2e59728217aa. --- R/PipeOpPredClassifSurvDiscTime.R | 10 ++++++++++ R/PipeOpTaskSurvClassifDiscTime.R | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R index ad31a7f13..5fcf3f1f3 100644 --- a/R/PipeOpPredClassifSurvDiscTime.R +++ b/R/PipeOpPredClassifSurvDiscTime.R @@ -16,6 +16,16 @@ #' conditional probability for an event in the \eqn{k}-interval. #' - \eqn{p_k = 1 - h_k = P(T \ge t_k | T \ge t_{k-1})} #' +#' @section Dictionary: +#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the +#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops] +#' or with the associated sugar function [mlr3pipelines::po()]: +#' ``` +#' PipeOpPredClassifSurvDiscTime$new() +#' mlr_pipeops$get("trafopred_classifsurv_disctime") +#' po("trafopred_classifsurv_disctime") +#' ``` +#' #' @section Input and Output Channels: #' The input is a [PredictionClassif] and a [data.table][data.table::data.table] #' with the transformed data both generated by [PipeOpTaskSurvClassifDiscTime]. diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index f89b72055..ae452b45a 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -10,6 +10,16 @@ #' This approach facilitates survival analysis within a classification framework #' using discrete time intervals (Tutz et al. 2016). #' +#' @section Dictionary: +#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the +#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops] +#' or with the associated sugar function [mlr3pipelines::po()]: +#' ``` +#' PipeOpTaskSurvClassifDiscTime$new() +#' mlr_pipeops$get("trafotask_survclassif_disctime") +#' po("trafotask_survclassif_disctime") +#' ``` +#' #' @section Input and Output Channels: #' [PipeOpTaskSurvClassifDiscTime] has one input channel named "input", and two #' output channels, one named "output" and the other "transformed_data". @@ -27,7 +37,7 @@ #' feature included. #' The "transformed_data" is a [data.table] which has all the features of the #' "output" task, including an additional column `time2` containing the -#' original times. +#' original observed times. #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime]. #' #' @section State: From dbbad12644e3b16cca23c083c0b7536898918506 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:14 +0200 Subject: [PATCH 03/30] Reapply "updocs" This reverts commit 06b0e8b97d2f3acc08da18e9d8544a2f4e1d02dc. --- man/mlr_pipeops_trafopred_classifsurv_disctime.Rd | 12 ++++++++++++ man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 14 +++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd index b6091af21..96f50868f 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd @@ -21,6 +21,18 @@ conditional probability for an event in the \eqn{k}-interval. \item \eqn{p_k = 1 - h_k = P(T \ge t_k | T \ge t_{k-1})} } } +\section{Dictionary}{ + +This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the +\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops} +or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}: + +\if{html}{\out{
}}\preformatted{PipeOpPredClassifSurvDiscTime$new() +mlr_pipeops$get("trafopred_classifsurv_disctime") +po("trafopred_classifsurv_disctime") +}\if{html}{\out{
}} +} + \section{Input and Output Channels}{ The input is a \link{PredictionClassif} and a \link[data.table:data.table]{data.table} diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index 5d71a5491..8e940f13d 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -12,6 +12,18 @@ whether an event occurred within each time interval. This approach facilitates survival analysis within a classification framework using discrete time intervals (Tutz et al. 2016). } +\section{Dictionary}{ + +This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the +\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops} +or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}: + +\if{html}{\out{
}}\preformatted{PipeOpTaskSurvClassifDiscTime$new() +mlr_pipeops$get("trafotask_survclassif_disctime") +po("trafotask_survclassif_disctime") +}\if{html}{\out{
}} +} + \section{Input and Output Channels}{ \link{PipeOpTaskSurvClassifDiscTime} has one input channel named "input", and two @@ -30,7 +42,7 @@ During prediction, the "input" \link{TaskSurv} is transformed to the "output" feature included. The "transformed_data" is a \link{data.table} which has all the features of the "output" task, including an additional column \code{time2} containing the -original times. +original observed times. This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}. } From 0e323a6b6a30e323de2d3f2e0df8e27e16e71a93 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:14 +0200 Subject: [PATCH 04/30] Reapply "test for same row ids in the test set" This reverts commit aa5a59100ed7b52f29dd0e98998804a54cce079f. --- tests/testthat/test_pipelines.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index a4030a4ce..c7ff532f4 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -127,6 +127,7 @@ test_that("survtoclassif_disctime", { suppressWarnings(cox$train(task)) p2 = cox$predict(task) + expect_equal(p$row_ids, p2$row_ids) expect_equal(p$truth, p2$truth) expect_equal(p$score(), p2$score(), tolerance = 0.01) From 402a74df01d8c358ce18939a2c1728cfbec91566 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:15 +0200 Subject: [PATCH 05/30] Reapply "test file renaming" This reverts commit cb64a3a4eb076d2b81fccff0fc3b2a4e214e99b8. --- .../{test_pipeop_crankcompositor.R => test_pipeop_crankcompose.R} | 0 .../{test_pipeop_distrcompositor.R => test_pipeop_distrcompose.R} | 0 .../{test_pipeop_probregrcompositor.R => test_probregr.R} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test_pipeop_crankcompositor.R => test_pipeop_crankcompose.R} (100%) rename tests/testthat/{test_pipeop_distrcompositor.R => test_pipeop_distrcompose.R} (100%) rename tests/testthat/{test_pipeop_probregrcompositor.R => test_probregr.R} (100%) diff --git a/tests/testthat/test_pipeop_crankcompositor.R b/tests/testthat/test_pipeop_crankcompose.R similarity index 100% rename from tests/testthat/test_pipeop_crankcompositor.R rename to tests/testthat/test_pipeop_crankcompose.R diff --git a/tests/testthat/test_pipeop_distrcompositor.R b/tests/testthat/test_pipeop_distrcompose.R similarity index 100% rename from tests/testthat/test_pipeop_distrcompositor.R rename to tests/testthat/test_pipeop_distrcompose.R diff --git a/tests/testthat/test_pipeop_probregrcompositor.R b/tests/testthat/test_probregr.R similarity index 100% rename from tests/testthat/test_pipeop_probregrcompositor.R rename to tests/testthat/test_probregr.R From 4a07647856c7a20aca6d345e1a66cdd06551de33 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 26 Jul 2024 15:31:15 +0200 Subject: [PATCH 06/30] Reapply "add test for discrete time task reduction" This reverts commit 4be98ce54dbbc4d96f7cb811aae594d038060297. --- ...st_pipeop_trafotask_survclassif_disctime.R | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/testthat/test_pipeop_trafotask_survclassif_disctime.R diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R new file mode 100644 index 000000000..6b2670737 --- /dev/null +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -0,0 +1,57 @@ +test_that("PipeOpTaskSurvClassifDiscTime", { + task = tsk("lung") + test_ids = c(2, 10, 107) + test_task = task$clone()$filter(rows = test_ids) + expect_equal(test_ids, test_task$row_ids) + + po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4) + expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime")) + + res = po_disc$train(list(task)) + + # 0 is added + time_cuts = po_disc$state$cut + expect_numeric(time_cuts, len = 5, lower = 0) + # no transformed data during training + expect_data_table(res[["transformed_data"]], nrows = 0, ncols = 0) + # classification task + output_task = res[[1L]] + expect_task_classif(output_task) + expect_equal(output_task$positive, "1") + expect_equal(output_task$target_names, "ped_status") + # new column added to the task + expect_equal("tend", setdiff(output_task$feature_names, task$feature_names)) + # not all observations have events on the last (4th) interval + expect_lt(output_task$nrow, task$nrow * 4) + + res = po_disc$predict(list(test_task)) + pred_task = res[[1L]] + + expect_task_classif(pred_task) + # every test observation will have one row per interval for prediction + expect_equal(pred_task$nrow, test_task$nrow * 4) + # `tend` matches the cut time points (excluding 0 time point) + tends = pred_task$data(cols = "tend")[[1L]] + expect_equal(sort(unique(tends)), time_cuts[2:5]) + # test row ids are correct + expect_equal(pred_task$row_names$row_name, rep(test_ids, each = 4)) + + transformed_data = res[["transformed_data"]] + # test rows ids are correct + expect_equal(transformed_data$id, rep(test_ids, each = 4)) + # check columns in the transformed data.table + expect_equal(sort(c("id", "ped_status", "time2", pred_task$feature_names)), + sort(colnames(transformed_data))) + + # `ped_status` per interval and per observation is correct + # before observed time ("time2"), "ped_status" = 0 + expect_equal(as.character(unique(transformed_data[tend < time2, ped_status])), + "0") + times = test_task$times() # observed times + status = as.character(test_task$status()) + # after observed time, "ped_status" must be the same as "status" + td = transformed_data[tend > time2] + expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1]) + expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2]) + expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3]) +}) From 8c42eb290453d84c2da8e1e42f5a802930a0dd31 Mon Sep 17 00:00:00 2001 From: studener Date: Sat, 27 Jul 2024 10:13:55 +0200 Subject: [PATCH 07/30] update pipe to pass tests --- R/PipeOpPredClassifSurvDiscTime.R | 3 ++- R/PipeOpTaskSurvClassifDiscTime.R | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R index 5fcf3f1f3..db894dbfb 100644 --- a/R/PipeOpPredClassifSurvDiscTime.R +++ b/R/PipeOpPredClassifSurvDiscTime.R @@ -86,13 +86,14 @@ PipeOpPredClassifSurvDiscTime = R6Class( # basically a slightly more complex unique() real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0] + ids = unique(data$id) # select last row for every id => observed times id = ped_status = NULL # to fix note data = data[, .SD[.N, list(ped_status)], by = id] # create prediction object p = PredictionSurv$new( - row_ids = seq_row(data), + row_ids = ids, crank = pred_list$crank, distr = pred_list$distr, truth = Surv(real_tend, as.integer(as.character(data$ped_status)))) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index ae452b45a..62c9a331e 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -143,6 +143,9 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", # remove offset, tstart, interval for dataframe long_data long_data[, c("offset", "tstart", "interval") := NULL] + reps = table(long_data$id) + ids = rep(task$row_ids, times = reps) + long_data[, id := ids] task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, target = "ped_status", positive = "1") @@ -176,16 +179,26 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", ped_status = id = NULL # fixing global binding notes of data.table new_data[, ped_status := 0] - new_data[new_data[, .I[.N], by = id]$V1, ped_status := status] + + rows_per_id = nrow(new_data) / length(unique(new_data$id)) + new_data$time2 = rep(time, each = rows_per_id) + ids = rep(task$row_ids, each = rows_per_id) + new_data[, id := ids] + + # Set correct ped_status + reps = new_data[, .(count = sum(tend >= time2)), by = id]$count + status = rep(status, times = reps) + new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status] new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1")) # remove offset, tstart, interval for dataframe long_data - new_data[, c("offset", "tstart", "interval") := NULL] + new_data[, c("offset", "tstart", "interval", "time2") := NULL] task_disc = TaskClassif$new(paste0(task$id, "_disc"), new_data, target = "ped_status", positive = "1") task_disc$set_col_roles("id", roles = "name") - new_data$time2 = rep(time, each = sum(new_data$id == 1)) + reps = table(new_data$id) + new_data$time2 = rep(time, each = rows_per_id) list(task_disc, new_data) } ) From 331963c7527929a7c51fa6b621b6fe2d43e5c9a6 Mon Sep 17 00:00:00 2001 From: studener Date: Sat, 27 Jul 2024 10:21:30 +0200 Subject: [PATCH 08/30] fix data.table global bindings --- R/PipeOpTaskSurvClassifDiscTime.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index 62c9a331e..5304fc523 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -177,15 +177,16 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", new_data = pammtools::as_ped(data, formula = form, cut = cut) new_data = as.data.table(new_data) - ped_status = id = NULL # fixing global binding notes of data.table + ped_status = id = tend = time2 = NULL # fixing global binding notes of data.table new_data[, ped_status := 0] + # set correct id rows_per_id = nrow(new_data) / length(unique(new_data$id)) new_data$time2 = rep(time, each = rows_per_id) ids = rep(task$row_ids, each = rows_per_id) new_data[, id := ids] - # Set correct ped_status + # set correct ped_status reps = new_data[, .(count = sum(tend >= time2)), by = id]$count status = rep(status, times = reps) new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status] From 3c554d87d0ec6bb1a0511f9adc3fd7cd2c22829c Mon Sep 17 00:00:00 2001 From: studener Date: Sat, 27 Jul 2024 10:30:41 +0200 Subject: [PATCH 09/30] fix global bindings --- R/PipeOpTaskSurvClassifDiscTime.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index 5304fc523..e5d7a2451 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -145,6 +145,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data[, c("offset", "tstart", "interval") := NULL] reps = table(long_data$id) ids = rep(task$row_ids, times = reps) + id = NULL long_data[, id := ids] task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, @@ -187,7 +188,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", new_data[, id := ids] # set correct ped_status - reps = new_data[, .(count = sum(tend >= time2)), by = id]$count + reps = new_data[, data.table(count = sum(tend >= time2)), by = id]$count status = rep(status, times = reps) new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status] new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1")) From b220585c1a7ca86696702ad2e796c5f4abb055e4 Mon Sep 17 00:00:00 2001 From: studener Date: Sat, 27 Jul 2024 10:35:52 +0200 Subject: [PATCH 10/30] delete file --- test.R | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 test.R diff --git a/test.R b/test.R deleted file mode 100644 index cf3820a3e..000000000 --- a/test.R +++ /dev/null @@ -1,5 +0,0 @@ -keys = as.data.table(mlr_tasks)[task_type == "surv"][["key"]] - -tasks = lapply(keys, function(key) { - tsk(key) -}) From 1b917b8d89f0323cfb582b9018249a1bd32c4d35 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 18:42:34 +0200 Subject: [PATCH 11/30] add "original_ids" col role --- R/aaa.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/aaa.R b/R/aaa.R index 5fc3c6f42..20925bec5 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -51,6 +51,7 @@ register_reflections = function() { x$task_col_roles$surv = x$task_col_roles$regr x$task_col_roles$dens = c("feature", "target", "label", "order", "group", "weight", "stratum") + x$task_col_roles$classif = unique(c(x$task_col_roles$classif, "original_ids")) # for discrete time x$task_properties$surv = x$task_properties$regr x$task_properties$dens = x$task_properties$regr From 8e35065be1d28d98f99222b3a9f2571bcd6af2fb Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 18:43:06 +0200 Subject: [PATCH 12/30] unregister reflections --- R/zzz.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/R/zzz.R b/R/zzz.R index ac879cf8c..4d95d603c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -63,6 +63,7 @@ utils::globalVariables(c( setHook(event, hooks[pkgname != "mlr3proba"], action = "replace") # unregister + unregister_reflections() walk(names(mlr3proba_learners), function(nm) mlr_learners$remove(nm)) walk(names(mlr3proba_tasks), function(nm) mlr_tasks$remove(nm)) walk(names(mlr3proba_measures), function(nm) mlr_measures$remove(nm)) @@ -75,4 +76,28 @@ utils::globalVariables(c( library.dynam.unload("mlr3proba", libpath) } +unregister_reflections = function() { + x = utils::getFromNamespace("mlr_reflections", ns = "mlr3") + + # task + x$task_types[package != "mlr3proba"] + x$task_col_roles$surv = NULL + x$task_col_roles$dens = NULL + x$task_col_roles$classif = setdiff(x$task_col_roles$classif, "original_ids") + x$task_properties$surv = NULL + x$task_properties$dens = NULL + + # learner + x$learner_properties$surv = NULL + x$learner_properties$dens = NULL + x$learner_predict_types$surv = NULL + x$learner_predict_types$dens = NULL + + # measure + x$measure_properties$surv = NULL + x$measure_properties$dens = NULL + x$default_measures$surv = NULL + x$default_measures$dens = NULL +} + leanify_package() From bef1f90e9ff6e93caf615230ff597238b839a9f9 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 23:09:32 +0200 Subject: [PATCH 13/30] refactoring * better doc * refactoring var names (new_data => lond_data, time2 => obs_times) * add ids to "original_ids" role --- R/PipeOpPredClassifSurvDiscTime.R | 2 +- R/PipeOpTaskSurvClassifDiscTime.R | 51 ++++++++++++++++--------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R index db894dbfb..ab86a299b 100644 --- a/R/PipeOpPredClassifSurvDiscTime.R +++ b/R/PipeOpPredClassifSurvDiscTime.R @@ -84,7 +84,7 @@ PipeOpPredClassifSurvDiscTime = R6Class( # select the real tend values by only selecting the last row of each id # basically a slightly more complex unique() - real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0] + real_tend = data$obs_times[seq_len(nrow(data)) %% rows_per_id == 0] ids = unique(data$id) # select last row for every id => observed times diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index e5d7a2451..a09b572c8 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -26,18 +26,19 @@ #' #' During training, the "output" is the "input" [TaskSurv] transformed to a #' [TaskClassif][mlr3::TaskClassif]. -#' The target column is named `ped_status` and indicates whether an event occurred +#' The target column is named `"ped_status"` and indicates whether an event occurred #' in each time interval. -#' An additional feature named `tend` is added to the ouput task, containing the -#' end time of each interval. +#' An additional feature named `"tend"` contains the end time point of each interval. +#' Lastly, the "output" task has a column with the original observation ids, +#' under the role `"original_ids"`. #' The "transformed_data" is an empty [data.table][data.table::data.table]. #' #' During prediction, the "input" [TaskSurv] is transformed to the "output" -#' [TaskClassif][mlr3::TaskClassif] with `ped_status` as target and the `tend` +#' [TaskClassif][mlr3::TaskClassif] with `"ped_status"` as target and the `"tend"` #' feature included. -#' The "transformed_data" is a [data.table] which has all the features of the -#' "output" task, including an additional column `time2` containing the -#' original observed times. +#' The "transformed_data" is a [data.table] which has as columns all the features +#' of the "output" task and in addition the columns `"id"` (original observation ids), +#' `"obs_times"` (observed times per `"id"`) and `"tend"` (end time of each interval). #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime]. #' #' @section State: @@ -143,6 +144,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", # remove offset, tstart, interval for dataframe long_data long_data[, c("offset", "tstart", "interval") := NULL] + # keep id mapping reps = table(long_data$id) ids = rep(task$row_ids, times = reps) id = NULL @@ -150,7 +152,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, target = "ped_status", positive = "1") - task_disc$set_col_roles("id", roles = "name") + task_disc$set_col_roles("id", roles = "original_ids") list(task_disc, data.table()) }, @@ -175,33 +177,34 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", # update form form = formulate(sprintf("Surv(%s, %s)", time_var, event_var), ".") - new_data = pammtools::as_ped(data, formula = form, cut = cut) - new_data = as.data.table(new_data) + long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut)) - ped_status = id = tend = time2 = NULL # fixing global binding notes of data.table - new_data[, ped_status := 0] + ped_status = id = tend = obs_times = NULL # fixing global binding notes of data.table + long_data[, ped_status := 0] # set correct id - rows_per_id = nrow(new_data) / length(unique(new_data$id)) - new_data$time2 = rep(time, each = rows_per_id) + rows_per_id = nrow(long_data) / length(unique(long_data$id)) + long_data$obs_times = rep(time, each = rows_per_id) ids = rep(task$row_ids, each = rows_per_id) - new_data[, id := ids] + long_data[, id := ids] # set correct ped_status - reps = new_data[, data.table(count = sum(tend >= time2)), by = id]$count + reps = long_data[, data.table(count = sum(tend >= obs_times)), by = id]$count status = rep(status, times = reps) - new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status] - new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1")) + long_data[long_data[, .I[tend >= obs_times], by = id]$V1, ped_status := status] + long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1")) # remove offset, tstart, interval for dataframe long_data - new_data[, c("offset", "tstart", "interval", "time2") := NULL] - task_disc = TaskClassif$new(paste0(task$id, "_disc"), new_data, + long_data[, c("offset", "tstart", "interval", "obs_times") := NULL] + task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, target = "ped_status", positive = "1") - task_disc$set_col_roles("id", roles = "name") + task_disc$set_col_roles("id", roles = "original_ids") + + # map observed times back + reps = table(long_data$id) + long_data$obs_times = rep(time, each = rows_per_id) - reps = table(new_data$id) - new_data$time2 = rep(time, each = rows_per_id) - list(task_disc, new_data) + list(task_disc, long_data) } ) ) From a01cdfc4656600b791e81045f7d1db47d1561a45 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 23:11:46 +0200 Subject: [PATCH 14/30] updocs --- man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index 8e940f13d..6a633767b 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -31,18 +31,19 @@ output channels, one named "output" and the other "transformed_data". During training, the "output" is the "input" \link{TaskSurv} transformed to a \link[mlr3:TaskClassif]{TaskClassif}. -The target column is named \code{ped_status} and indicates whether an event occurred +The target column is named \code{"ped_status"} and indicates whether an event occurred in each time interval. -An additional feature named \code{tend} is added to the ouput task, containing the -end time of each interval. +An additional feature named \code{"tend"} contains the end time point of each interval. +Lastly, the "output" task has a column with the original observation ids, +under the role \code{"original_ids"}. The "transformed_data" is an empty \link[data.table:data.table]{data.table}. During prediction, the "input" \link{TaskSurv} is transformed to the "output" -\link[mlr3:TaskClassif]{TaskClassif} with \code{ped_status} as target and the \code{tend} +\link[mlr3:TaskClassif]{TaskClassif} with \code{"ped_status"} as target and the \code{"tend"} feature included. -The "transformed_data" is a \link{data.table} which has all the features of the -"output" task, including an additional column \code{time2} containing the -original observed times. +The "transformed_data" is a \link{data.table} which has as columns all the features +of the "output" task and in addition the columns \code{"id"} (original observation ids), +\code{"obs_times"} (observed times per \code{"id"}) and \code{"tend"} (end time of each interval). This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}. } From e4859a920729e1733e24b640e155e110cb0982f4 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 23:13:01 +0200 Subject: [PATCH 15/30] add package name --- tests/testthat/test_pipelines.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index c7ff532f4..fae131dd9 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -153,12 +153,13 @@ test_that("survtoclassif_disctime", { expect_prediction_surv(p) # Test with rhs - grlrn = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = "1", - graph_learner = TRUE) + grlrn = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), + rhs = "1", graph_learner = TRUE) grlrn$train(task) pred = suppressWarnings(grlrn$predict(task)) - grlrn2 = ppl("survtoclassif_disctime", learner = lrn("classif.featureless"), graph_learner = TRUE) + grlrn2 = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.featureless"), + graph_learner = TRUE) grlrn2$train(task) pred2 = grlrn2$predict(task) @@ -167,16 +168,16 @@ test_that("survtoclassif_disctime", { expect_equal(unname(pred2$score()), 0.5) expect_equal(pred$data$distr, pred2$data$distr) - grlrn = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = "rx + litter", - graph_learner = TRUE) + grlrn = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), + rhs = "rx + litter", graph_learner = TRUE) grlrn$train(task) pred = suppressWarnings(grlrn$predict(task)) - grlrn2 = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = ".", - graph_learner = TRUE) + grlrn2 = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), + rhs = ".", graph_learner = TRUE) grlrn2$train(task) pred2 = suppressWarnings(grlrn2$predict(task)) - # model with more covariates should have better c-index + # model with more covariates should have better C-index expect_gt(pred2$score(), pred$score()) }) From e2b471c63b920e8c35226f7822032ed44d99759b Mon Sep 17 00:00:00 2001 From: john Date: Mon, 29 Jul 2024 23:14:07 +0200 Subject: [PATCH 16/30] update test, split train/test --- ...st_pipeop_trafotask_survclassif_disctime.R | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index 6b2670737..617959851 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -1,13 +1,18 @@ test_that("PipeOpTaskSurvClassifDiscTime", { task = tsk("lung") + + # imitate train/test split manually test_ids = c(2, 10, 107) + train_ids = setdiff(task$row_ids, test_ids) test_task = task$clone()$filter(rows = test_ids) - expect_equal(test_ids, test_task$row_ids) + train_task = task$clone()$filter(rows = train_ids) + expect_equal(test_task$row_ids, test_ids) + expect_equal(train_task$row_ids, train_ids) po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4) expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime")) - res = po_disc$train(list(task)) + res = po_disc$train(list(train_task)) # 0 is added time_cuts = po_disc$state$cut @@ -17,6 +22,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", { # classification task output_task = res[[1L]] expect_task_classif(output_task) + expect_equal(output_task$col_roles$original_ids, "id") expect_equal(output_task$positive, "1") expect_equal(output_task$target_names, "ped_status") # new column added to the task @@ -34,23 +40,26 @@ test_that("PipeOpTaskSurvClassifDiscTime", { tends = pred_task$data(cols = "tend")[[1L]] expect_equal(sort(unique(tends)), time_cuts[2:5]) # test row ids are correct - expect_equal(pred_task$row_names$row_name, rep(test_ids, each = 4)) + expect_equal(pred_task$col_roles$original_ids, "id") + original_ids = pred_task$data(cols = "id")[[1L]] + correct_ids = rep(test_ids, each = 4) + expect_equal(original_ids, correct_ids) transformed_data = res[["transformed_data"]] # test rows ids are correct - expect_equal(transformed_data$id, rep(test_ids, each = 4)) + expect_equal(transformed_data$id, correct_ids) # check columns in the transformed data.table - expect_equal(sort(c("id", "ped_status", "time2", pred_task$feature_names)), + expect_equal(sort(c("id", "ped_status", "obs_times", pred_task$feature_names)), sort(colnames(transformed_data))) # `ped_status` per interval and per observation is correct - # before observed time ("time2"), "ped_status" = 0 - expect_equal(as.character(unique(transformed_data[tend < time2, ped_status])), + # before observed time ("obs_times"), "ped_status" = 0 + expect_equal(as.character(unique(transformed_data[tend < obs_times, ped_status])), "0") times = test_task$times() # observed times status = as.character(test_task$status()) # after observed time, "ped_status" must be the same as "status" - td = transformed_data[tend > time2] + td = transformed_data[tend > obs_times] expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1]) expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2]) expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3]) From 3bc57fdf3b7c45c069790053f3fcad2942de3e9b Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Tue, 30 Jul 2024 11:18:48 +0200 Subject: [PATCH 17/30] edit transformed_data --- R/PipeOpTaskSurvClassifDiscTime.R | 1 + tests/testthat/test_pipeop_trafotask_survclassif_disctime.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index a09b572c8..738bcb178 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -203,6 +203,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", # map observed times back reps = table(long_data$id) long_data$obs_times = rep(time, each = rows_per_id) + long_data = long_data[, .(id, obs_times, tend, ped_status)] list(task_disc, long_data) } diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index 617959851..1e21ebfb7 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -49,7 +49,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", { # test rows ids are correct expect_equal(transformed_data$id, correct_ids) # check columns in the transformed data.table - expect_equal(sort(c("id", "ped_status", "obs_times", pred_task$feature_names)), + expect_equal(sort(c("id", "ped_status", "obs_times", "tend")), sort(colnames(transformed_data))) # `ped_status` per interval and per observation is correct From a472af63f22c80c4a586d5ec30c18684ee0241a5 Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Tue, 30 Jul 2024 11:41:27 +0200 Subject: [PATCH 18/30] rename ped_status -> disc_status --- R/PipeOpPredClassifSurvDiscTime.R | 6 ++-- R/PipeOpTaskSurvClassifDiscTime.R | 28 ++++++++++--------- ...st_pipeop_trafotask_survclassif_disctime.R | 18 ++++++------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R index ab86a299b..5671041cd 100644 --- a/R/PipeOpPredClassifSurvDiscTime.R +++ b/R/PipeOpPredClassifSurvDiscTime.R @@ -88,14 +88,14 @@ PipeOpPredClassifSurvDiscTime = R6Class( ids = unique(data$id) # select last row for every id => observed times - id = ped_status = NULL # to fix note - data = data[, .SD[.N, list(ped_status)], by = id] + id = disc_status = NULL # to fix note + data = data[, .SD[.N, list(disc_status)], by = id] # create prediction object p = PredictionSurv$new( row_ids = ids, crank = pred_list$crank, distr = pred_list$distr, - truth = Surv(real_tend, as.integer(as.character(data$ped_status)))) + truth = Surv(real_tend, as.integer(as.character(data$disc_status)))) list(p) }, diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index 738bcb178..f794ca993 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -5,7 +5,7 @@ #' @description #' Transform [TaskSurv] to [TaskClassif][mlr3::TaskClassif] by dividing continuous #' time into multiple time intervals for each observation. -#' This transformation creates a new target variable `ped_status` that indicates +#' This transformation creates a new target variable `disc_status` that indicates #' whether an event occurred within each time interval. #' This approach facilitates survival analysis within a classification framework #' using discrete time intervals (Tutz et al. 2016). @@ -26,7 +26,7 @@ #' #' During training, the "output" is the "input" [TaskSurv] transformed to a #' [TaskClassif][mlr3::TaskClassif]. -#' The target column is named `"ped_status"` and indicates whether an event occurred +#' The target column is named `"disc_status"` and indicates whether an event occurred #' in each time interval. #' An additional feature named `"tend"` contains the end time point of each interval. #' Lastly, the "output" task has a column with the original observation ids, @@ -34,7 +34,7 @@ #' The "transformed_data" is an empty [data.table][data.table::data.table]. #' #' During prediction, the "input" [TaskSurv] is transformed to the "output" -#' [TaskClassif][mlr3::TaskClassif] with `"ped_status"` as target and the `"tend"` +#' [TaskClassif][mlr3::TaskClassif] with `"disc_status"` as target and the `"tend"` #' feature included. #' The "transformed_data" is a [data.table] which has as columns all the features #' of the "output" task and in addition the columns `"id"` (original observation ids), @@ -140,10 +140,10 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data = pammtools::as_ped(data = data, formula = form, cut = cut, max_time = max_time) self$state$cut = attributes(long_data)$trafo_args$cut long_data = as.data.table(long_data) - long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1")) + long_data$disc_status = factor(long_data$ped_status, levels = c("0", "1")) # remove offset, tstart, interval for dataframe long_data - long_data[, c("offset", "tstart", "interval") := NULL] + long_data[, c("offset", "tstart", "interval", "ped_status") := NULL] # keep id mapping reps = table(long_data$id) ids = rep(task$row_ids, times = reps) @@ -151,7 +151,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data[, id := ids] task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, - target = "ped_status", positive = "1") + target = "disc_status", positive = "1") task_disc$set_col_roles("id", roles = "original_ids") list(task_disc, data.table()) @@ -179,31 +179,33 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut)) - ped_status = id = tend = obs_times = NULL # fixing global binding notes of data.table - long_data[, ped_status := 0] + long_data$disc_status = long_data$ped_status + long_data[, "ped_status" := NULL] + disc_status = id = tend = obs_times = NULL # fixing global binding notes of data.table + long_data[, disc_status := 0] # set correct id rows_per_id = nrow(long_data) / length(unique(long_data$id)) long_data$obs_times = rep(time, each = rows_per_id) ids = rep(task$row_ids, each = rows_per_id) long_data[, id := ids] - # set correct ped_status + # set correct disc_status reps = long_data[, data.table(count = sum(tend >= obs_times)), by = id]$count status = rep(status, times = reps) - long_data[long_data[, .I[tend >= obs_times], by = id]$V1, ped_status := status] - long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1")) + long_data[long_data[, .I[tend >= obs_times], by = id]$V1, disc_status := status] + long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1")) # remove offset, tstart, interval for dataframe long_data long_data[, c("offset", "tstart", "interval", "obs_times") := NULL] task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, - target = "ped_status", positive = "1") + target = "disc_status", positive = "1") task_disc$set_col_roles("id", roles = "original_ids") # map observed times back reps = table(long_data$id) long_data$obs_times = rep(time, each = rows_per_id) - long_data = long_data[, .(id, obs_times, tend, ped_status)] + long_data = long_data[, data.table(id, obs_times, tend, disc_status)] list(task_disc, long_data) } diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index 1e21ebfb7..d60770bff 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -24,7 +24,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", { expect_task_classif(output_task) expect_equal(output_task$col_roles$original_ids, "id") expect_equal(output_task$positive, "1") - expect_equal(output_task$target_names, "ped_status") + expect_equal(output_task$target_names, "disc_status") # new column added to the task expect_equal("tend", setdiff(output_task$feature_names, task$feature_names)) # not all observations have events on the last (4th) interval @@ -49,18 +49,18 @@ test_that("PipeOpTaskSurvClassifDiscTime", { # test rows ids are correct expect_equal(transformed_data$id, correct_ids) # check columns in the transformed data.table - expect_equal(sort(c("id", "ped_status", "obs_times", "tend")), + expect_equal(sort(c("id", "disc_status", "obs_times", "tend")), sort(colnames(transformed_data))) - # `ped_status` per interval and per observation is correct - # before observed time ("obs_times"), "ped_status" = 0 - expect_equal(as.character(unique(transformed_data[tend < obs_times, ped_status])), + # `disc_status` per interval and per observation is correct + # before observed time ("obs_times"), "disc_status" = 0 + expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), "0") times = test_task$times() # observed times status = as.character(test_task$status()) - # after observed time, "ped_status" must be the same as "status" + # after observed time, "disc_status" must be the same as "status" td = transformed_data[tend > obs_times] - expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1]) - expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2]) - expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3]) + expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1]) + expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2]) + expect_equal(as.character(unique(td[id == test_ids[3], disc_status])), status[3]) }) From 3fefd12ccd16058e16ebb6c77ce9ba3a4d2d16c3 Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Tue, 30 Jul 2024 11:42:58 +0200 Subject: [PATCH 19/30] updocs --- man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index 6a633767b..37396414f 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -7,7 +7,7 @@ \description{ Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} by dividing continuous time into multiple time intervals for each observation. -This transformation creates a new target variable \code{ped_status} that indicates +This transformation creates a new target variable \code{disc_status} that indicates whether an event occurred within each time interval. This approach facilitates survival analysis within a classification framework using discrete time intervals (Tutz et al. 2016). @@ -31,7 +31,7 @@ output channels, one named "output" and the other "transformed_data". During training, the "output" is the "input" \link{TaskSurv} transformed to a \link[mlr3:TaskClassif]{TaskClassif}. -The target column is named \code{"ped_status"} and indicates whether an event occurred +The target column is named \code{"disc_status"} and indicates whether an event occurred in each time interval. An additional feature named \code{"tend"} contains the end time point of each interval. Lastly, the "output" task has a column with the original observation ids, @@ -39,7 +39,7 @@ under the role \code{"original_ids"}. The "transformed_data" is an empty \link[data.table:data.table]{data.table}. During prediction, the "input" \link{TaskSurv} is transformed to the "output" -\link[mlr3:TaskClassif]{TaskClassif} with \code{"ped_status"} as target and the \code{"tend"} +\link[mlr3:TaskClassif]{TaskClassif} with \code{"disc_status"} as target and the \code{"tend"} feature included. The "transformed_data" is a \link{data.table} which has as columns all the features of the "output" task and in addition the columns \code{"id"} (original observation ids), From 1d0350763c9e2333e3889216d1a7c00f85b2c251 Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Tue, 30 Jul 2024 11:52:16 +0200 Subject: [PATCH 20/30] make sure that disc_status not in colnames --- R/PipeOpTaskSurvClassifDiscTime.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index f794ca993..c0c2aa07c 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -121,6 +121,10 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", assert_true(task$censtype == "right") data = task$data() + if ("disc_status" %in% colnames(task$data())) { + stop("\"disc_status\" can not be a column in the input data.") + } + cut = assert_numeric(self$param_set$values$cut, null.ok = TRUE, lower = 0) max_time = self$param_set$values$max_time From 8ea2c80aada8d0be641a2d027d21073aaed9ba54 Mon Sep 17 00:00:00 2001 From: john Date: Tue, 30 Jul 2024 14:46:57 +0200 Subject: [PATCH 21/30] refactor test + add some more checks --- ...st_pipeop_trafotask_survclassif_disctime.R | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index d60770bff..efb8ab7f5 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -38,27 +38,33 @@ test_that("PipeOpTaskSurvClassifDiscTime", { expect_equal(pred_task$nrow, test_task$nrow * 4) # `tend` matches the cut time points (excluding 0 time point) tends = pred_task$data(cols = "tend")[[1L]] - expect_equal(sort(unique(tends)), time_cuts[2:5]) - # test row ids are correct + expect_setequal(unique(tends), time_cuts[2:5]) + # original row ids are correct expect_equal(pred_task$col_roles$original_ids, "id") original_ids = pred_task$data(cols = "id")[[1L]] correct_ids = rep(test_ids, each = 4) expect_equal(original_ids, correct_ids) transformed_data = res[["transformed_data"]] - # test rows ids are correct - expect_equal(transformed_data$id, correct_ids) # check columns in the transformed data.table - expect_equal(sort(c("id", "disc_status", "obs_times", "tend")), - sort(colnames(transformed_data))) + expect_set_equal(colnames(transformed_data), + c("id", "disc_status", "obs_times", "tend")) + # `id`s are correct + expect_equal(transformed_data$id, correct_ids) + # `disc_status` is the same + expect_equal(transformed_data$disc_status, pred_task$truth()) + # `obs_times` are correct + times = test_task$times() # observed times + expect_setequal(unique(transformed_data$obs_times), times) + # `tends` are correct + expect_setequal(unique(transformed_data$tend), time_cuts[2:5]) # `disc_status` per interval and per observation is correct # before observed time ("obs_times"), "disc_status" = 0 - expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), - "0") - times = test_task$times() # observed times - status = as.character(test_task$status()) + expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), "0") + # after observed time, "disc_status" must be the same as "status" + status = as.character(test_task$status()) td = transformed_data[tend > obs_times] expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1]) expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2]) From 1fea434457f41b874ba6352af606b8aaf5ec28eb Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 12:22:24 +0200 Subject: [PATCH 22/30] rename file --- .github/workflows/{r-cmd-check.yml => R-CMD-check.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{r-cmd-check.yml => R-CMD-check.yml} (100%) diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/R-CMD-check.yml similarity index 100% rename from .github/workflows/r-cmd-check.yml rename to .github/workflows/R-CMD-check.yml From c4cfba567f4c124a21b8a0d78397ddb695c81678 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 12:22:53 +0200 Subject: [PATCH 23/30] add R-universe badge --- README.Rmd | 11 +++++------ README.md | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/README.Rmd b/README.Rmd index 963f316b1..546ed4863 100644 --- a/README.Rmd +++ b/README.Rmd @@ -12,16 +12,15 @@ knitr::opts_chunk$set( # mlr3proba -Package website: [release](https://mlr3proba.mlr-org.com/) - -Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)**. +Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)** ([website](https://mlr3proba.mlr-org.com/)). -[![r-cmd-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml) +[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml) +[![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba) [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions) [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039) -[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) -[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) +[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg?color=pink)](https://stackoverflow.com/questions/tagged/mlr3) +[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg?color=pink)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) ## What is mlr3proba? diff --git a/README.md b/README.md index bd0ca5edc..1c0aa0e82 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ # mlr3proba -Package website: [release](https://mlr3proba.mlr-org.com/) - Probabilistic Supervised Learning for -**[mlr3](https://github.com/mlr-org/mlr3/)**. +**[mlr3](https://github.com/mlr-org/mlr3/)** +([website](https://mlr3proba.mlr-org.com/)). -[![r-cmd-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml) +[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml) +[![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba) [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions) [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039) -[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) -[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) +[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg?color=pink)](https://stackoverflow.com/questions/tagged/mlr3) +[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg?color=pink)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) ## What is mlr3proba? From e23e29fbe27a5a990b38d69695df3b96e2d54064 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 12:26:55 +0200 Subject: [PATCH 24/30] fix note --- R/zzz.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/zzz.R b/R/zzz.R index 4d95d603c..20a0b3e83 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -80,6 +80,7 @@ unregister_reflections = function() { x = utils::getFromNamespace("mlr_reflections", ns = "mlr3") # task + package = NULL # silence data.table notes x$task_types[package != "mlr3proba"] x$task_col_roles$surv = NULL x$task_col_roles$dens = NULL From 3217512e7250cd3271f267861e361f1ef02b1461 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 12:34:08 +0200 Subject: [PATCH 25/30] use testthat expectations for testing --- R/TaskSurv.R | 2 +- R/pipelines.R | 2 +- inst/testthat/helper_expectations.R | 2 +- tests/testthat/test_PredictionSurv.R | 8 ++++---- tests/testthat/test_TaskSurv.R | 2 +- .../testthat/test_pipeop_trafotask_survclassif_disctime.R | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/TaskSurv.R b/R/TaskSurv.R index 80ba4f63a..9dd9f4326 100644 --- a/R/TaskSurv.R +++ b/R/TaskSurv.R @@ -378,7 +378,7 @@ TaskSurv = R6::R6Class("TaskSurv", #' @description #' Checks if the data satisfy the *proportional hazards (PH)* assumption using - #' the Grambsch-Therneau test, `r mlr3misc::cite_bib("grambsch_1994")`. + #' the Grambsch-Therneau test, `r cite_bib("grambsch_1994")`. #' Uses [cox.zph][survival::cox.zph()]. #' This method should be used only for **low-dimensional datasets** where #' the number of features is relatively small compared to the number of diff --git a/R/pipelines.R b/R/pipelines.R index 307d90b4e..638c89d97 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -598,7 +598,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, if (!is.null(rhs)) { gr$edges = gr$edges[-1, ] - gr$add_pipeop(mlr3pipelines::po("modelmatrix", formula = mlr3misc::formulate(rhs = rhs, quote = "left"))) + gr$add_pipeop(mlr3pipelines::po("modelmatrix", formula = formulate(rhs = rhs, quote = "left"))) gr$add_edge(src_id = "trafotask_survclassif_disctime", dst_id = "modelmatrix", src_channel = "output") gr$add_edge(src_id = "modelmatrix", dst_id = learner$id, src_channel = "output", dst_channel = "input") } diff --git a/inst/testthat/helper_expectations.R b/inst/testthat/helper_expectations.R index 5f0726ac1..b5a6ebba9 100644 --- a/inst/testthat/helper_expectations.R +++ b/inst/testthat/helper_expectations.R @@ -21,7 +21,7 @@ expect_task_surv = function(task) { f = task$formula() expect_formula(f) - expect_set_equal(mlr3misc::extract_vars(f)$lhs, task$target_names) + expect_setequal(extract_vars(f)$lhs, task$target_names) expect_class(task$kaplan(), "survfit") } diff --git a/tests/testthat/test_PredictionSurv.R b/tests/testthat/test_PredictionSurv.R index 2cfad7573..51770a540 100644 --- a/tests/testthat/test_PredictionSurv.R +++ b/tests/testthat/test_PredictionSurv.R @@ -192,10 +192,10 @@ test_that("filtering", { expect_prediction_surv(p3) expect_prediction_surv(p4) - expect_set_equal(p$data$row_ids, c(20, 37, 42)) - expect_set_equal(p2$data$row_ids, c(20, 37, 42)) - expect_set_equal(p3$data$row_ids, c(20, 37, 42)) - expect_set_equal(p4$data$row_ids, c(20, 37, 42)) + expect_setequal(p$data$row_ids, c(20, 37, 42)) + expect_setequal(p2$data$row_ids, c(20, 37, 42)) + expect_setequal(p3$data$row_ids, c(20, 37, 42)) + expect_setequal(p4$data$row_ids, c(20, 37, 42)) expect_numeric(p$data$crank, any.missing = FALSE, len = 3L) expect_numeric(p2$data$crank, any.missing = FALSE, len = 3L) expect_numeric(p3$data$crank, any.missing = FALSE, len = 3L) diff --git a/tests/testthat/test_TaskSurv.R b/tests/testthat/test_TaskSurv.R index 78ed0c095..b1867a4e2 100644 --- a/tests/testthat/test_TaskSurv.R +++ b/tests/testthat/test_TaskSurv.R @@ -1,7 +1,7 @@ test_that("Task duplicates rows", { task = tsk("lung") expect_task_surv(task) - expect_set_equal(extract_vars(task$formula())$rhs, ".") + expect_setequal(extract_vars(task$formula())$rhs, ".") }) test_that("right censoring", { diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index efb8ab7f5..32e668c0d 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -47,8 +47,8 @@ test_that("PipeOpTaskSurvClassifDiscTime", { transformed_data = res[["transformed_data"]] # check columns in the transformed data.table - expect_set_equal(colnames(transformed_data), - c("id", "disc_status", "obs_times", "tend")) + expect_setequal(colnames(transformed_data), + c("id", "disc_status", "obs_times", "tend")) # `id`s are correct expect_equal(transformed_data$id, correct_ids) # `disc_status` is the same From 0d7ba22db4238b0878594bf9815925034db44e28 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 12:35:52 +0200 Subject: [PATCH 26/30] fix test --- tests/testthat/test_pipeop_trafotask_survclassif_disctime.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R index 32e668c0d..9723bd53d 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R @@ -52,7 +52,8 @@ test_that("PipeOpTaskSurvClassifDiscTime", { # `id`s are correct expect_equal(transformed_data$id, correct_ids) # `disc_status` is the same - expect_equal(transformed_data$disc_status, pred_task$truth()) + expect_equal(as.character(transformed_data$disc_status), + as.character(pred_task$truth())) # `obs_times` are correct times = test_task$times() # observed times expect_setequal(unique(transformed_data$obs_times), times) From 36044d9885984859129610d8b4de0bc47fd83128 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 13:09:32 +0200 Subject: [PATCH 27/30] update docs --- R/PipeOpTaskSurvClassifDiscTime.R | 4 ++-- man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index c0c2aa07c..22fd944ec 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -36,8 +36,8 @@ #' During prediction, the "input" [TaskSurv] is transformed to the "output" #' [TaskClassif][mlr3::TaskClassif] with `"disc_status"` as target and the `"tend"` #' feature included. -#' The "transformed_data" is a [data.table] which has as columns all the features -#' of the "output" task and in addition the columns `"id"` (original observation ids), +#' The "transformed_data" is a [data.table] with columns the `"disc_status"` +#' target of the "output" task, the `"id"` (original observation ids), #' `"obs_times"` (observed times per `"id"`) and `"tend"` (end time of each interval). #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime]. #' diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index 37396414f..873dfb5ab 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -41,8 +41,8 @@ The "transformed_data" is an empty \link[data.table:data.table]{data.table}. During prediction, the "input" \link{TaskSurv} is transformed to the "output" \link[mlr3:TaskClassif]{TaskClassif} with \code{"disc_status"} as target and the \code{"tend"} feature included. -The "transformed_data" is a \link{data.table} which has as columns all the features -of the "output" task and in addition the columns \code{"id"} (original observation ids), +The "transformed_data" is a \link{data.table} with columns the \code{"disc_status"} +target of the "output" task, the \code{"id"} (original observation ids), \code{"obs_times"} (observed times per \code{"id"}) and \code{"tend"} (end time of each interval). This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}. } From 3aeb622ebbfbd4174a6e21d74eb626f325e34488 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 13:11:23 +0200 Subject: [PATCH 28/30] refactoring --- R/PipeOpTaskSurvClassifDiscTime.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R index 22fd944ec..89f606516 100644 --- a/R/PipeOpTaskSurvClassifDiscTime.R +++ b/R/PipeOpTaskSurvClassifDiscTime.R @@ -144,10 +144,11 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data = pammtools::as_ped(data = data, formula = form, cut = cut, max_time = max_time) self$state$cut = attributes(long_data)$trafo_args$cut long_data = as.data.table(long_data) - long_data$disc_status = factor(long_data$ped_status, levels = c("0", "1")) + setnames(long_data, old = "ped_status", new = "disc_status") + long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1")) - # remove offset, tstart, interval for dataframe long_data - long_data[, c("offset", "tstart", "interval", "ped_status") := NULL] + # remove some columns from `long_data` + long_data[, c("offset", "tstart", "interval") := NULL] # keep id mapping reps = table(long_data$id) ids = rep(task$row_ids, times = reps) @@ -182,9 +183,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", form = formulate(sprintf("Surv(%s, %s)", time_var, event_var), ".") long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut)) - - long_data$disc_status = long_data$ped_status - long_data[, "ped_status" := NULL] + setnames(long_data, old = "ped_status", new = "disc_status") disc_status = id = tend = obs_times = NULL # fixing global binding notes of data.table long_data[, disc_status := 0] @@ -200,7 +199,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", long_data[long_data[, .I[tend >= obs_times], by = id]$V1, disc_status := status] long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1")) - # remove offset, tstart, interval for dataframe long_data + # remove some columns from `long_data` long_data[, c("offset", "tstart", "interval", "obs_times") := NULL] task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data, target = "disc_status", positive = "1") @@ -209,7 +208,9 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime", # map observed times back reps = table(long_data$id) long_data$obs_times = rep(time, each = rows_per_id) - long_data = long_data[, data.table(id, obs_times, tend, disc_status)] + # subset transformed data + columns_to_keep = c("id", "obs_times", "tend", "disc_status") + long_data = long_data[, columns_to_keep, with = FALSE] list(task_disc, long_data) } From 7c0e12959b59c58393e242ce7605c47a3345594f Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 13:14:54 +0200 Subject: [PATCH 29/30] revert renaming of workflow file => capitalize name --- .github/workflows/{R-CMD-check.yml => r-cmd-check.yml} | 2 +- README.Rmd | 2 +- README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename .github/workflows/{R-CMD-check.yml => r-cmd-check.yml} (98%) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/r-cmd-check.yml similarity index 98% rename from .github/workflows/R-CMD-check.yml rename to .github/workflows/r-cmd-check.yml index 70bba92ee..d6135b1d1 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/r-cmd-check.yml @@ -9,7 +9,7 @@ on: branches: - main -name: r-cmd-check +name: R-CMD-check jobs: r-cmd-check: diff --git a/README.Rmd b/README.Rmd index 546ed4863..30d4964dc 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)** ([website](https://mlr3proba.mlr-org.com/)). -[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml) +[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml) [![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba) [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions) [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039) diff --git a/README.md b/README.md index 1c0aa0e82..0f68c4050 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Probabilistic Supervised Learning for -[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml) +[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml) [![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba) [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions) From bbd0c970b54b42c1b96ee62bba88e3d294fd4ebb Mon Sep 17 00:00:00 2001 From: john Date: Wed, 31 Jul 2024 13:19:22 +0200 Subject: [PATCH 30/30] update version + news --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 31f2302f2..9258055c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3proba Title: Probabilistic Supervised Learning for 'mlr3' -Version: 0.6.5 +Version: 0.6.6 Authors@R: c(person(given = "Raphael", family = "Sonabend", diff --git a/NEWS.md b/NEWS.md index 0a79b8ebf..e5e720a95 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# mlr3proba 0.6.6 + +- Small fixes and refactoring to the discrete-time pipeops + # mlr3proba 0.6.5 * Add support for discrete-time survival analysis