From 457a23d1fd58ef415950ec31122e17303f19ab0e Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:12 +0200
Subject: [PATCH 01/30] Reapply "remove auxiliary code"

This reverts commit f74d5cf26510fad7dd7f7683d64c2642a8093aa4.
---
 R/PipeOpPredClassifSurvDiscTime.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R
index 1ec099a0b..ad31a7f13 100644
--- a/R/PipeOpPredClassifSurvDiscTime.R
+++ b/R/PipeOpPredClassifSurvDiscTime.R
@@ -68,7 +68,6 @@ PipeOpPredClassifSurvDiscTime = R6Class(
         cumprod(1 - data[data$id == unique_id, ][["dt_hazard"]])
       }, numeric(rows_per_id)))
 
-      pred_list = list()
       unique_end_times = sort(unique(data$tend))
       # coerce to distribution and crank
       pred_list = .surv_return(times = unique_end_times, surv = surv)
@@ -77,8 +76,7 @@ PipeOpPredClassifSurvDiscTime = R6Class(
       # basically a slightly more complex unique()
       real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0]
 
-      # select last row for every id
-      data = as.data.table(data)
+      # select last row for every id => observed times
       id = ped_status = NULL # to fix note
       data = data[, .SD[.N, list(ped_status)], by = id]
 

From 6753d2e550facc6c4f189e8c14e99ea84fc07b0d Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:14 +0200
Subject: [PATCH 02/30] Reapply "add dictionary sections in docs"

This reverts commit 6c502462422500cd8c0315294e9b2e59728217aa.
---
 R/PipeOpPredClassifSurvDiscTime.R | 10 ++++++++++
 R/PipeOpTaskSurvClassifDiscTime.R | 12 +++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R
index ad31a7f13..5fcf3f1f3 100644
--- a/R/PipeOpPredClassifSurvDiscTime.R
+++ b/R/PipeOpPredClassifSurvDiscTime.R
@@ -16,6 +16,16 @@
 #' conditional probability for an event in the \eqn{k}-interval.
 #' - \eqn{p_k = 1 - h_k = P(T \ge t_k | T \ge t_{k-1})}
 #'
+#' @section Dictionary:
+#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the
+#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops]
+#' or with the associated sugar function [mlr3pipelines::po()]:
+#' ```
+#' PipeOpPredClassifSurvDiscTime$new()
+#' mlr_pipeops$get("trafopred_classifsurv_disctime")
+#' po("trafopred_classifsurv_disctime")
+#' ```
+#'
 #' @section Input and Output Channels:
 #' The input is a [PredictionClassif] and a [data.table][data.table::data.table]
 #' with the transformed data both generated by [PipeOpTaskSurvClassifDiscTime].
diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index f89b72055..ae452b45a 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -10,6 +10,16 @@
 #' This approach facilitates survival analysis within a classification framework
 #' using discrete time intervals (Tutz et al. 2016).
 #'
+#' @section Dictionary:
+#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the
+#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops]
+#' or with the associated sugar function [mlr3pipelines::po()]:
+#' ```
+#' PipeOpTaskSurvClassifDiscTime$new()
+#' mlr_pipeops$get("trafotask_survclassif_disctime")
+#' po("trafotask_survclassif_disctime")
+#' ```
+#'
 #' @section Input and Output Channels:
 #' [PipeOpTaskSurvClassifDiscTime] has one input channel named "input", and two
 #' output channels, one named "output" and the other "transformed_data".
@@ -27,7 +37,7 @@
 #' feature included.
 #' The "transformed_data" is a [data.table] which has all the features of the
 #' "output" task, including an additional column `time2` containing the
-#' original times.
+#' original observed times.
 #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime].
 #'
 #' @section State:

From dbbad12644e3b16cca23c083c0b7536898918506 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:14 +0200
Subject: [PATCH 03/30] Reapply "updocs"

This reverts commit 06b0e8b97d2f3acc08da18e9d8544a2f4e1d02dc.
---
 man/mlr_pipeops_trafopred_classifsurv_disctime.Rd | 12 ++++++++++++
 man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 14 +++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd
index b6091af21..96f50868f 100644
--- a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd
+++ b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd
@@ -21,6 +21,18 @@ conditional probability for an event in the \eqn{k}-interval.
 \item \eqn{p_k = 1 - h_k = P(T \ge t_k | T \ge t_{k-1})}
 }
 }
+\section{Dictionary}{
+
+This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the
+\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops}
+or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{PipeOpPredClassifSurvDiscTime$new()
+mlr_pipeops$get("trafopred_classifsurv_disctime")
+po("trafopred_classifsurv_disctime")
+}\if{html}{\out{</div>}}
+}
+
 \section{Input and Output Channels}{
 
 The input is a \link{PredictionClassif} and a \link[data.table:data.table]{data.table}
diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
index 5d71a5491..8e940f13d 100644
--- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
+++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
@@ -12,6 +12,18 @@ whether an event occurred within each time interval.
 This approach facilitates survival analysis within a classification framework
 using discrete time intervals (Tutz et al. 2016).
 }
+\section{Dictionary}{
+
+This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the
+\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops}
+or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{PipeOpTaskSurvClassifDiscTime$new()
+mlr_pipeops$get("trafotask_survclassif_disctime")
+po("trafotask_survclassif_disctime")
+}\if{html}{\out{</div>}}
+}
+
 \section{Input and Output Channels}{
 
 \link{PipeOpTaskSurvClassifDiscTime} has one input channel named "input", and two
@@ -30,7 +42,7 @@ During prediction, the "input" \link{TaskSurv} is transformed to the "output"
 feature included.
 The "transformed_data" is a \link{data.table} which has all the features of the
 "output" task, including an additional column \code{time2} containing the
-original times.
+original observed times.
 This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}.
 }
 

From 0e323a6b6a30e323de2d3f2e0df8e27e16e71a93 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:14 +0200
Subject: [PATCH 04/30] Reapply "test for same row ids in the test set"

This reverts commit aa5a59100ed7b52f29dd0e98998804a54cce079f.
---
 tests/testthat/test_pipelines.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R
index a4030a4ce..c7ff532f4 100644
--- a/tests/testthat/test_pipelines.R
+++ b/tests/testthat/test_pipelines.R
@@ -127,6 +127,7 @@ test_that("survtoclassif_disctime", {
   suppressWarnings(cox$train(task))
   p2 = cox$predict(task)
 
+  expect_equal(p$row_ids, p2$row_ids)
   expect_equal(p$truth, p2$truth)
   expect_equal(p$score(), p2$score(), tolerance = 0.01)
 

From 402a74df01d8c358ce18939a2c1728cfbec91566 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:15 +0200
Subject: [PATCH 05/30] Reapply "test file renaming"

This reverts commit cb64a3a4eb076d2b81fccff0fc3b2a4e214e99b8.
---
 .../{test_pipeop_crankcompositor.R => test_pipeop_crankcompose.R} | 0
 .../{test_pipeop_distrcompositor.R => test_pipeop_distrcompose.R} | 0
 .../{test_pipeop_probregrcompositor.R => test_probregr.R}         | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/testthat/{test_pipeop_crankcompositor.R => test_pipeop_crankcompose.R} (100%)
 rename tests/testthat/{test_pipeop_distrcompositor.R => test_pipeop_distrcompose.R} (100%)
 rename tests/testthat/{test_pipeop_probregrcompositor.R => test_probregr.R} (100%)

diff --git a/tests/testthat/test_pipeop_crankcompositor.R b/tests/testthat/test_pipeop_crankcompose.R
similarity index 100%
rename from tests/testthat/test_pipeop_crankcompositor.R
rename to tests/testthat/test_pipeop_crankcompose.R
diff --git a/tests/testthat/test_pipeop_distrcompositor.R b/tests/testthat/test_pipeop_distrcompose.R
similarity index 100%
rename from tests/testthat/test_pipeop_distrcompositor.R
rename to tests/testthat/test_pipeop_distrcompose.R
diff --git a/tests/testthat/test_pipeop_probregrcompositor.R b/tests/testthat/test_probregr.R
similarity index 100%
rename from tests/testthat/test_pipeop_probregrcompositor.R
rename to tests/testthat/test_probregr.R

From 4a07647856c7a20aca6d345e1a66cdd06551de33 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 26 Jul 2024 15:31:15 +0200
Subject: [PATCH 06/30] Reapply "add test for discrete time task reduction"

This reverts commit 4be98ce54dbbc4d96f7cb811aae594d038060297.
---
 ...st_pipeop_trafotask_survclassif_disctime.R | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/testthat/test_pipeop_trafotask_survclassif_disctime.R

diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
new file mode 100644
index 000000000..6b2670737
--- /dev/null
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -0,0 +1,57 @@
+test_that("PipeOpTaskSurvClassifDiscTime", {
+  task = tsk("lung")
+  test_ids = c(2, 10, 107)
+  test_task = task$clone()$filter(rows = test_ids)
+  expect_equal(test_ids, test_task$row_ids)
+
+  po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4)
+  expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime"))
+
+  res = po_disc$train(list(task))
+
+  # 0 is added
+  time_cuts = po_disc$state$cut
+  expect_numeric(time_cuts, len = 5, lower = 0)
+  # no transformed data during training
+  expect_data_table(res[["transformed_data"]], nrows = 0, ncols = 0)
+  # classification task
+  output_task = res[[1L]]
+  expect_task_classif(output_task)
+  expect_equal(output_task$positive, "1")
+  expect_equal(output_task$target_names, "ped_status")
+  # new column added to the task
+  expect_equal("tend", setdiff(output_task$feature_names, task$feature_names))
+  # not all observations have events on the last (4th) interval
+  expect_lt(output_task$nrow, task$nrow * 4)
+
+  res = po_disc$predict(list(test_task))
+  pred_task = res[[1L]]
+
+  expect_task_classif(pred_task)
+  # every test observation will have one row per interval for prediction
+  expect_equal(pred_task$nrow, test_task$nrow * 4)
+  # `tend` matches the cut time points (excluding 0 time point)
+  tends = pred_task$data(cols = "tend")[[1L]]
+  expect_equal(sort(unique(tends)), time_cuts[2:5])
+  # test row ids are correct
+  expect_equal(pred_task$row_names$row_name, rep(test_ids, each = 4))
+
+  transformed_data = res[["transformed_data"]]
+  # test rows ids are correct
+  expect_equal(transformed_data$id, rep(test_ids, each = 4))
+  # check columns in the transformed data.table
+  expect_equal(sort(c("id", "ped_status", "time2", pred_task$feature_names)),
+               sort(colnames(transformed_data)))
+
+  # `ped_status` per interval and per observation is correct
+  # before observed time ("time2"), "ped_status" = 0
+  expect_equal(as.character(unique(transformed_data[tend < time2, ped_status])),
+               "0")
+  times = test_task$times() # observed times
+  status = as.character(test_task$status())
+  # after observed time, "ped_status" must be the same as "status"
+  td = transformed_data[tend > time2]
+  expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1])
+  expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2])
+  expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3])
+})

From 8c42eb290453d84c2da8e1e42f5a802930a0dd31 Mon Sep 17 00:00:00 2001
From: studener <philip.studener@gmx.de>
Date: Sat, 27 Jul 2024 10:13:55 +0200
Subject: [PATCH 07/30] update pipe to pass tests

---
 R/PipeOpPredClassifSurvDiscTime.R |  3 ++-
 R/PipeOpTaskSurvClassifDiscTime.R | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R
index 5fcf3f1f3..db894dbfb 100644
--- a/R/PipeOpPredClassifSurvDiscTime.R
+++ b/R/PipeOpPredClassifSurvDiscTime.R
@@ -86,13 +86,14 @@ PipeOpPredClassifSurvDiscTime = R6Class(
       # basically a slightly more complex unique()
       real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0]
 
+      ids = unique(data$id)
       # select last row for every id => observed times
       id = ped_status = NULL # to fix note
       data = data[, .SD[.N, list(ped_status)], by = id]
 
       # create prediction object
       p = PredictionSurv$new(
-        row_ids = seq_row(data),
+        row_ids = ids,
         crank = pred_list$crank, distr = pred_list$distr,
         truth = Surv(real_tend, as.integer(as.character(data$ped_status))))
 
diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index ae452b45a..62c9a331e 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -143,6 +143,9 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       # remove offset, tstart, interval for dataframe long_data
       long_data[, c("offset", "tstart", "interval") := NULL]
+      reps = table(long_data$id)
+      ids = rep(task$row_ids, times = reps)
+      long_data[, id := ids]
 
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
                                   target = "ped_status", positive = "1")
@@ -176,16 +179,26 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       ped_status = id = NULL # fixing global binding notes of data.table
       new_data[, ped_status := 0]
-      new_data[new_data[, .I[.N], by = id]$V1, ped_status := status]
+
+      rows_per_id = nrow(new_data) / length(unique(new_data$id))
+      new_data$time2 = rep(time, each = rows_per_id)
+      ids = rep(task$row_ids, each = rows_per_id)
+      new_data[, id := ids]
+
+      # Set correct ped_status
+      reps = new_data[, .(count = sum(tend >= time2)), by = id]$count
+      status = rep(status, times = reps)
+      new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status]
       new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1"))
 
       # remove offset, tstart, interval for dataframe long_data
-      new_data[, c("offset", "tstart", "interval") := NULL]
+      new_data[, c("offset", "tstart", "interval", "time2") := NULL]
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), new_data,
                                   target = "ped_status", positive = "1")
       task_disc$set_col_roles("id", roles = "name")
 
-      new_data$time2 = rep(time, each = sum(new_data$id == 1))
+      reps = table(new_data$id)
+      new_data$time2 = rep(time, each = rows_per_id)
       list(task_disc, new_data)
     }
   )

From 331963c7527929a7c51fa6b621b6fe2d43e5c9a6 Mon Sep 17 00:00:00 2001
From: studener <philip.studener@gmx.de>
Date: Sat, 27 Jul 2024 10:21:30 +0200
Subject: [PATCH 08/30] fix data.table global bindings

---
 R/PipeOpTaskSurvClassifDiscTime.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index 62c9a331e..5304fc523 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -177,15 +177,16 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       new_data = pammtools::as_ped(data, formula = form, cut = cut)
       new_data = as.data.table(new_data)
 
-      ped_status = id = NULL # fixing global binding notes of data.table
+      ped_status = id = tend = time2 = NULL # fixing global binding notes of data.table
       new_data[, ped_status := 0]
 
+      # set correct id
       rows_per_id = nrow(new_data) / length(unique(new_data$id))
       new_data$time2 = rep(time, each = rows_per_id)
       ids = rep(task$row_ids, each = rows_per_id)
       new_data[, id := ids]
 
-      # Set correct ped_status
+      # set correct ped_status
       reps = new_data[, .(count = sum(tend >= time2)), by = id]$count
       status = rep(status, times = reps)
       new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status]

From 3c554d87d0ec6bb1a0511f9adc3fd7cd2c22829c Mon Sep 17 00:00:00 2001
From: studener <philip.studener@gmx.de>
Date: Sat, 27 Jul 2024 10:30:41 +0200
Subject: [PATCH 09/30] fix global bindings

---
 R/PipeOpTaskSurvClassifDiscTime.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index 5304fc523..e5d7a2451 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -145,6 +145,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       long_data[, c("offset", "tstart", "interval") := NULL]
       reps = table(long_data$id)
       ids = rep(task$row_ids, times = reps)
+      id = NULL
       long_data[, id := ids]
 
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
@@ -187,7 +188,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       new_data[, id := ids]
 
       # set correct ped_status
-      reps = new_data[, .(count = sum(tend >= time2)), by = id]$count
+      reps = new_data[, data.table(count = sum(tend >= time2)), by = id]$count
       status = rep(status, times = reps)
       new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status]
       new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1"))

From b220585c1a7ca86696702ad2e796c5f4abb055e4 Mon Sep 17 00:00:00 2001
From: studener <philip.studener@gmx.de>
Date: Sat, 27 Jul 2024 10:35:52 +0200
Subject: [PATCH 10/30] delete file

---
 test.R | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 test.R

diff --git a/test.R b/test.R
deleted file mode 100644
index cf3820a3e..000000000
--- a/test.R
+++ /dev/null
@@ -1,5 +0,0 @@
-keys = as.data.table(mlr_tasks)[task_type == "surv"][["key"]]
-
-tasks = lapply(keys, function(key) {
-  tsk(key)
-})

From 1b917b8d89f0323cfb582b9018249a1bd32c4d35 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 18:42:34 +0200
Subject: [PATCH 11/30] add "original_ids" col role

---
 R/aaa.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/aaa.R b/R/aaa.R
index 5fc3c6f42..20925bec5 100644
--- a/R/aaa.R
+++ b/R/aaa.R
@@ -51,6 +51,7 @@ register_reflections = function() {
 
   x$task_col_roles$surv = x$task_col_roles$regr
   x$task_col_roles$dens = c("feature", "target", "label", "order", "group", "weight", "stratum")
+  x$task_col_roles$classif = unique(c(x$task_col_roles$classif, "original_ids")) # for discrete time
   x$task_properties$surv = x$task_properties$regr
   x$task_properties$dens = x$task_properties$regr
 

From 8e35065be1d28d98f99222b3a9f2571bcd6af2fb Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 18:43:06 +0200
Subject: [PATCH 12/30] unregister reflections

---
 R/zzz.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/R/zzz.R b/R/zzz.R
index ac879cf8c..4d95d603c 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -63,6 +63,7 @@ utils::globalVariables(c(
   setHook(event, hooks[pkgname != "mlr3proba"], action = "replace")
 
   # unregister
+  unregister_reflections()
   walk(names(mlr3proba_learners), function(nm) mlr_learners$remove(nm))
   walk(names(mlr3proba_tasks), function(nm) mlr_tasks$remove(nm))
   walk(names(mlr3proba_measures), function(nm) mlr_measures$remove(nm))
@@ -75,4 +76,28 @@ utils::globalVariables(c(
   library.dynam.unload("mlr3proba", libpath)
 }
 
+unregister_reflections = function() {
+  x = utils::getFromNamespace("mlr_reflections", ns = "mlr3")
+
+  # task
+  x$task_types[package != "mlr3proba"]
+  x$task_col_roles$surv = NULL
+  x$task_col_roles$dens = NULL
+  x$task_col_roles$classif = setdiff(x$task_col_roles$classif, "original_ids")
+  x$task_properties$surv = NULL
+  x$task_properties$dens = NULL
+
+  # learner
+  x$learner_properties$surv = NULL
+  x$learner_properties$dens = NULL
+  x$learner_predict_types$surv = NULL
+  x$learner_predict_types$dens = NULL
+
+  # measure
+  x$measure_properties$surv = NULL
+  x$measure_properties$dens = NULL
+  x$default_measures$surv = NULL
+  x$default_measures$dens = NULL
+}
+
 leanify_package()

From bef1f90e9ff6e93caf615230ff597238b839a9f9 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 23:09:32 +0200
Subject: [PATCH 13/30] refactoring

* better doc
* refactoring var names (new_data => lond_data, time2 => obs_times)
* add ids to "original_ids" role
---
 R/PipeOpPredClassifSurvDiscTime.R |  2 +-
 R/PipeOpTaskSurvClassifDiscTime.R | 51 ++++++++++++++++---------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R
index db894dbfb..ab86a299b 100644
--- a/R/PipeOpPredClassifSurvDiscTime.R
+++ b/R/PipeOpPredClassifSurvDiscTime.R
@@ -84,7 +84,7 @@ PipeOpPredClassifSurvDiscTime = R6Class(
 
       # select the real tend values by only selecting the last row of each id
       # basically a slightly more complex unique()
-      real_tend = data$time2[seq_len(nrow(data)) %% rows_per_id == 0]
+      real_tend = data$obs_times[seq_len(nrow(data)) %% rows_per_id == 0]
 
       ids = unique(data$id)
       # select last row for every id => observed times
diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index e5d7a2451..a09b572c8 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -26,18 +26,19 @@
 #'
 #' During training, the "output" is the "input" [TaskSurv] transformed to a
 #' [TaskClassif][mlr3::TaskClassif].
-#' The target column is named `ped_status` and indicates whether an event occurred
+#' The target column is named `"ped_status"` and indicates whether an event occurred
 #' in each time interval.
-#' An additional feature named `tend` is added to the ouput task, containing the
-#' end time of each interval.
+#' An additional feature named `"tend"` contains the end time point of each interval.
+#' Lastly, the "output" task has a column with the original observation ids,
+#' under the role `"original_ids"`.
 #' The "transformed_data" is an empty [data.table][data.table::data.table].
 #'
 #' During prediction, the "input" [TaskSurv] is transformed to the "output"
-#' [TaskClassif][mlr3::TaskClassif] with `ped_status` as target and the `tend`
+#' [TaskClassif][mlr3::TaskClassif] with `"ped_status"` as target and the `"tend"`
 #' feature included.
-#' The "transformed_data" is a [data.table] which has all the features of the
-#' "output" task, including an additional column `time2` containing the
-#' original observed times.
+#' The "transformed_data" is a [data.table] which has as columns all the features
+#' of the "output" task and in addition the columns `"id"` (original observation ids),
+#' `"obs_times"` (observed times per `"id"`) and `"tend"` (end time of each interval).
 #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime].
 #'
 #' @section State:
@@ -143,6 +144,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       # remove offset, tstart, interval for dataframe long_data
       long_data[, c("offset", "tstart", "interval") := NULL]
+      # keep id mapping
       reps = table(long_data$id)
       ids = rep(task$row_ids, times = reps)
       id = NULL
@@ -150,7 +152,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
                                   target = "ped_status", positive = "1")
-      task_disc$set_col_roles("id", roles = "name")
+      task_disc$set_col_roles("id", roles = "original_ids")
 
       list(task_disc, data.table())
     },
@@ -175,33 +177,34 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       # update form
       form = formulate(sprintf("Surv(%s, %s)", time_var, event_var), ".")
 
-      new_data = pammtools::as_ped(data, formula = form, cut = cut)
-      new_data = as.data.table(new_data)
+      long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut))
 
-      ped_status = id = tend = time2 = NULL # fixing global binding notes of data.table
-      new_data[, ped_status := 0]
+      ped_status = id = tend = obs_times = NULL # fixing global binding notes of data.table
+      long_data[, ped_status := 0]
 
       # set correct id
-      rows_per_id = nrow(new_data) / length(unique(new_data$id))
-      new_data$time2 = rep(time, each = rows_per_id)
+      rows_per_id = nrow(long_data) / length(unique(long_data$id))
+      long_data$obs_times = rep(time, each = rows_per_id)
       ids = rep(task$row_ids, each = rows_per_id)
-      new_data[, id := ids]
+      long_data[, id := ids]
 
       # set correct ped_status
-      reps = new_data[, data.table(count = sum(tend >= time2)), by = id]$count
+      reps = long_data[, data.table(count = sum(tend >= obs_times)), by = id]$count
       status = rep(status, times = reps)
-      new_data[new_data[, .I[tend >= time2], by = id]$V1, ped_status := status]
-      new_data$ped_status = factor(new_data$ped_status, levels = c("0", "1"))
+      long_data[long_data[, .I[tend >= obs_times], by = id]$V1, ped_status := status]
+      long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1"))
 
       # remove offset, tstart, interval for dataframe long_data
-      new_data[, c("offset", "tstart", "interval", "time2") := NULL]
-      task_disc = TaskClassif$new(paste0(task$id, "_disc"), new_data,
+      long_data[, c("offset", "tstart", "interval", "obs_times") := NULL]
+      task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
                                   target = "ped_status", positive = "1")
-      task_disc$set_col_roles("id", roles = "name")
+      task_disc$set_col_roles("id", roles = "original_ids")
+
+      # map observed times back
+      reps = table(long_data$id)
+      long_data$obs_times = rep(time, each = rows_per_id)
 
-      reps = table(new_data$id)
-      new_data$time2 = rep(time, each = rows_per_id)
-      list(task_disc, new_data)
+      list(task_disc, long_data)
     }
   )
 )

From a01cdfc4656600b791e81045f7d1db47d1561a45 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 23:11:46 +0200
Subject: [PATCH 14/30] updocs

---
 man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
index 8e940f13d..6a633767b 100644
--- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
+++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
@@ -31,18 +31,19 @@ output channels, one named "output" and the other "transformed_data".
 
 During training, the "output" is the "input" \link{TaskSurv} transformed to a
 \link[mlr3:TaskClassif]{TaskClassif}.
-The target column is named \code{ped_status} and indicates whether an event occurred
+The target column is named \code{"ped_status"} and indicates whether an event occurred
 in each time interval.
-An additional feature named \code{tend} is added to the ouput task, containing the
-end time of each interval.
+An additional feature named \code{"tend"} contains the end time point of each interval.
+Lastly, the "output" task has a column with the original observation ids,
+under the role \code{"original_ids"}.
 The "transformed_data" is an empty \link[data.table:data.table]{data.table}.
 
 During prediction, the "input" \link{TaskSurv} is transformed to the "output"
-\link[mlr3:TaskClassif]{TaskClassif} with \code{ped_status} as target and the \code{tend}
+\link[mlr3:TaskClassif]{TaskClassif} with \code{"ped_status"} as target and the \code{"tend"}
 feature included.
-The "transformed_data" is a \link{data.table} which has all the features of the
-"output" task, including an additional column \code{time2} containing the
-original observed times.
+The "transformed_data" is a \link{data.table} which has as columns all the features
+of the "output" task and in addition the columns \code{"id"} (original observation ids),
+\code{"obs_times"} (observed times per \code{"id"}) and \code{"tend"} (end time of each interval).
 This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}.
 }
 

From e4859a920729e1733e24b640e155e110cb0982f4 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 23:13:01 +0200
Subject: [PATCH 15/30] add package name

---
 tests/testthat/test_pipelines.R | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R
index c7ff532f4..fae131dd9 100644
--- a/tests/testthat/test_pipelines.R
+++ b/tests/testthat/test_pipelines.R
@@ -153,12 +153,13 @@ test_that("survtoclassif_disctime", {
   expect_prediction_surv(p)
 
   # Test with rhs
-  grlrn = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = "1",
-              graph_learner = TRUE)
+  grlrn = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"),
+                             rhs = "1", graph_learner = TRUE)
   grlrn$train(task)
   pred = suppressWarnings(grlrn$predict(task))
 
-  grlrn2 = ppl("survtoclassif_disctime", learner = lrn("classif.featureless"), graph_learner = TRUE)
+  grlrn2 = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.featureless"),
+                              graph_learner = TRUE)
   grlrn2$train(task)
   pred2 = grlrn2$predict(task)
 
@@ -167,16 +168,16 @@ test_that("survtoclassif_disctime", {
   expect_equal(unname(pred2$score()), 0.5)
   expect_equal(pred$data$distr, pred2$data$distr)
 
-  grlrn = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = "rx + litter",
-             graph_learner = TRUE)
+  grlrn = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"),
+                             rhs = "rx + litter", graph_learner = TRUE)
   grlrn$train(task)
   pred = suppressWarnings(grlrn$predict(task))
 
-  grlrn2 = ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), rhs = ".",
-               graph_learner = TRUE)
+  grlrn2 = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"),
+                              rhs = ".", graph_learner = TRUE)
   grlrn2$train(task)
   pred2 = suppressWarnings(grlrn2$predict(task))
 
-  # model with more covariates should have better c-index
+  # model with more covariates should have better C-index
   expect_gt(pred2$score(), pred$score())
 })

From e2b471c63b920e8c35226f7822032ed44d99759b Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 29 Jul 2024 23:14:07 +0200
Subject: [PATCH 16/30] update test, split train/test

---
 ...st_pipeop_trafotask_survclassif_disctime.R | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index 6b2670737..617959851 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -1,13 +1,18 @@
 test_that("PipeOpTaskSurvClassifDiscTime", {
   task = tsk("lung")
+
+  # imitate train/test split manually
   test_ids = c(2, 10, 107)
+  train_ids = setdiff(task$row_ids, test_ids)
   test_task = task$clone()$filter(rows = test_ids)
-  expect_equal(test_ids, test_task$row_ids)
+  train_task = task$clone()$filter(rows = train_ids)
+  expect_equal(test_task$row_ids, test_ids)
+  expect_equal(train_task$row_ids, train_ids)
 
   po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4)
   expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime"))
 
-  res = po_disc$train(list(task))
+  res = po_disc$train(list(train_task))
 
   # 0 is added
   time_cuts = po_disc$state$cut
@@ -17,6 +22,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   # classification task
   output_task = res[[1L]]
   expect_task_classif(output_task)
+  expect_equal(output_task$col_roles$original_ids, "id")
   expect_equal(output_task$positive, "1")
   expect_equal(output_task$target_names, "ped_status")
   # new column added to the task
@@ -34,23 +40,26 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   tends = pred_task$data(cols = "tend")[[1L]]
   expect_equal(sort(unique(tends)), time_cuts[2:5])
   # test row ids are correct
-  expect_equal(pred_task$row_names$row_name, rep(test_ids, each = 4))
+  expect_equal(pred_task$col_roles$original_ids, "id")
+  original_ids = pred_task$data(cols = "id")[[1L]]
+  correct_ids = rep(test_ids, each = 4)
+  expect_equal(original_ids, correct_ids)
 
   transformed_data = res[["transformed_data"]]
   # test rows ids are correct
-  expect_equal(transformed_data$id, rep(test_ids, each = 4))
+  expect_equal(transformed_data$id, correct_ids)
   # check columns in the transformed data.table
-  expect_equal(sort(c("id", "ped_status", "time2", pred_task$feature_names)),
+  expect_equal(sort(c("id", "ped_status", "obs_times", pred_task$feature_names)),
                sort(colnames(transformed_data)))
 
   # `ped_status` per interval and per observation is correct
-  # before observed time ("time2"), "ped_status" = 0
-  expect_equal(as.character(unique(transformed_data[tend < time2, ped_status])),
+  # before observed time ("obs_times"), "ped_status" = 0
+  expect_equal(as.character(unique(transformed_data[tend < obs_times, ped_status])),
                "0")
   times = test_task$times() # observed times
   status = as.character(test_task$status())
   # after observed time, "ped_status" must be the same as "status"
-  td = transformed_data[tend > time2]
+  td = transformed_data[tend > obs_times]
   expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1])
   expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2])
   expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3])

From 3bc57fdf3b7c45c069790053f3fcad2942de3e9b Mon Sep 17 00:00:00 2001
From: Philip Studener <philip.studener@gmx.de>
Date: Tue, 30 Jul 2024 11:18:48 +0200
Subject: [PATCH 17/30] edit transformed_data

---
 R/PipeOpTaskSurvClassifDiscTime.R                           | 1 +
 tests/testthat/test_pipeop_trafotask_survclassif_disctime.R | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index a09b572c8..738bcb178 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -203,6 +203,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       # map observed times back
       reps = table(long_data$id)
       long_data$obs_times = rep(time, each = rows_per_id)
+      long_data = long_data[, .(id, obs_times, tend, ped_status)]
 
       list(task_disc, long_data)
     }
diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index 617959851..1e21ebfb7 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -49,7 +49,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   # test rows ids are correct
   expect_equal(transformed_data$id, correct_ids)
   # check columns in the transformed data.table
-  expect_equal(sort(c("id", "ped_status", "obs_times", pred_task$feature_names)),
+  expect_equal(sort(c("id", "ped_status", "obs_times", "tend")),
                sort(colnames(transformed_data)))
 
   # `ped_status` per interval and per observation is correct

From a472af63f22c80c4a586d5ec30c18684ee0241a5 Mon Sep 17 00:00:00 2001
From: Philip Studener <philip.studener@gmx.de>
Date: Tue, 30 Jul 2024 11:41:27 +0200
Subject: [PATCH 18/30] rename ped_status -> disc_status

---
 R/PipeOpPredClassifSurvDiscTime.R             |  6 ++--
 R/PipeOpTaskSurvClassifDiscTime.R             | 28 ++++++++++---------
 ...st_pipeop_trafotask_survclassif_disctime.R | 18 ++++++------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/R/PipeOpPredClassifSurvDiscTime.R b/R/PipeOpPredClassifSurvDiscTime.R
index ab86a299b..5671041cd 100644
--- a/R/PipeOpPredClassifSurvDiscTime.R
+++ b/R/PipeOpPredClassifSurvDiscTime.R
@@ -88,14 +88,14 @@ PipeOpPredClassifSurvDiscTime = R6Class(
 
       ids = unique(data$id)
       # select last row for every id => observed times
-      id = ped_status = NULL # to fix note
-      data = data[, .SD[.N, list(ped_status)], by = id]
+      id = disc_status = NULL # to fix note
+      data = data[, .SD[.N, list(disc_status)], by = id]
 
       # create prediction object
       p = PredictionSurv$new(
         row_ids = ids,
         crank = pred_list$crank, distr = pred_list$distr,
-        truth = Surv(real_tend, as.integer(as.character(data$ped_status))))
+        truth = Surv(real_tend, as.integer(as.character(data$disc_status))))
 
       list(p)
     },
diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index 738bcb178..f794ca993 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -5,7 +5,7 @@
 #' @description
 #' Transform [TaskSurv] to [TaskClassif][mlr3::TaskClassif] by dividing continuous
 #' time into multiple time intervals for each observation.
-#' This transformation creates a new target variable `ped_status` that indicates
+#' This transformation creates a new target variable `disc_status` that indicates
 #' whether an event occurred within each time interval.
 #' This approach facilitates survival analysis within a classification framework
 #' using discrete time intervals (Tutz et al. 2016).
@@ -26,7 +26,7 @@
 #'
 #' During training, the "output" is the "input" [TaskSurv] transformed to a
 #' [TaskClassif][mlr3::TaskClassif].
-#' The target column is named `"ped_status"` and indicates whether an event occurred
+#' The target column is named `"disc_status"` and indicates whether an event occurred
 #' in each time interval.
 #' An additional feature named `"tend"` contains the end time point of each interval.
 #' Lastly, the "output" task has a column with the original observation ids,
@@ -34,7 +34,7 @@
 #' The "transformed_data" is an empty [data.table][data.table::data.table].
 #'
 #' During prediction, the "input" [TaskSurv] is transformed to the "output"
-#' [TaskClassif][mlr3::TaskClassif] with `"ped_status"` as target and the `"tend"`
+#' [TaskClassif][mlr3::TaskClassif] with `"disc_status"` as target and the `"tend"`
 #' feature included.
 #' The "transformed_data" is a [data.table] which has as columns all the features
 #' of the "output" task and in addition the columns `"id"` (original observation ids),
@@ -140,10 +140,10 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       long_data = pammtools::as_ped(data = data, formula = form, cut = cut, max_time = max_time)
       self$state$cut = attributes(long_data)$trafo_args$cut
       long_data = as.data.table(long_data)
-      long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1"))
+      long_data$disc_status = factor(long_data$ped_status, levels = c("0", "1"))
 
       # remove offset, tstart, interval for dataframe long_data
-      long_data[, c("offset", "tstart", "interval") := NULL]
+      long_data[, c("offset", "tstart", "interval", "ped_status") := NULL]
       # keep id mapping
       reps = table(long_data$id)
       ids = rep(task$row_ids, times = reps)
@@ -151,7 +151,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       long_data[, id := ids]
 
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
-                                  target = "ped_status", positive = "1")
+                                  target = "disc_status", positive = "1")
       task_disc$set_col_roles("id", roles = "original_ids")
 
       list(task_disc, data.table())
@@ -179,31 +179,33 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
 
       long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut))
 
-      ped_status = id = tend = obs_times = NULL # fixing global binding notes of data.table
-      long_data[, ped_status := 0]
+      long_data$disc_status = long_data$ped_status
+      long_data[, "ped_status" := NULL]
 
+      disc_status = id = tend = obs_times = NULL # fixing global binding notes of data.table
+      long_data[, disc_status := 0]
       # set correct id
       rows_per_id = nrow(long_data) / length(unique(long_data$id))
       long_data$obs_times = rep(time, each = rows_per_id)
       ids = rep(task$row_ids, each = rows_per_id)
       long_data[, id := ids]
 
-      # set correct ped_status
+      # set correct disc_status
       reps = long_data[, data.table(count = sum(tend >= obs_times)), by = id]$count
       status = rep(status, times = reps)
-      long_data[long_data[, .I[tend >= obs_times], by = id]$V1, ped_status := status]
-      long_data$ped_status = factor(long_data$ped_status, levels = c("0", "1"))
+      long_data[long_data[, .I[tend >= obs_times], by = id]$V1, disc_status := status]
+      long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1"))
 
       # remove offset, tstart, interval for dataframe long_data
       long_data[, c("offset", "tstart", "interval", "obs_times") := NULL]
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
-                                  target = "ped_status", positive = "1")
+                                  target = "disc_status", positive = "1")
       task_disc$set_col_roles("id", roles = "original_ids")
 
       # map observed times back
       reps = table(long_data$id)
       long_data$obs_times = rep(time, each = rows_per_id)
-      long_data = long_data[, .(id, obs_times, tend, ped_status)]
+      long_data = long_data[, data.table(id, obs_times, tend, disc_status)]
 
       list(task_disc, long_data)
     }
diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index 1e21ebfb7..d60770bff 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -24,7 +24,7 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   expect_task_classif(output_task)
   expect_equal(output_task$col_roles$original_ids, "id")
   expect_equal(output_task$positive, "1")
-  expect_equal(output_task$target_names, "ped_status")
+  expect_equal(output_task$target_names, "disc_status")
   # new column added to the task
   expect_equal("tend", setdiff(output_task$feature_names, task$feature_names))
   # not all observations have events on the last (4th) interval
@@ -49,18 +49,18 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   # test rows ids are correct
   expect_equal(transformed_data$id, correct_ids)
   # check columns in the transformed data.table
-  expect_equal(sort(c("id", "ped_status", "obs_times", "tend")),
+  expect_equal(sort(c("id", "disc_status", "obs_times", "tend")),
                sort(colnames(transformed_data)))
 
-  # `ped_status` per interval and per observation is correct
-  # before observed time ("obs_times"), "ped_status" = 0
-  expect_equal(as.character(unique(transformed_data[tend < obs_times, ped_status])),
+  # `disc_status` per interval and per observation is correct
+  # before observed time ("obs_times"), "disc_status" = 0
+  expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])),
                "0")
   times = test_task$times() # observed times
   status = as.character(test_task$status())
-  # after observed time, "ped_status" must be the same as "status"
+  # after observed time, "disc_status" must be the same as "status"
   td = transformed_data[tend > obs_times]
-  expect_equal(as.character(unique(td[id == test_ids[1], ped_status])), status[1])
-  expect_equal(as.character(unique(td[id == test_ids[2], ped_status])), status[2])
-  expect_equal(as.character(unique(td[id == test_ids[3], ped_status])), status[3])
+  expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1])
+  expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2])
+  expect_equal(as.character(unique(td[id == test_ids[3], disc_status])), status[3])
 })

From 3fefd12ccd16058e16ebb6c77ce9ba3a4d2d16c3 Mon Sep 17 00:00:00 2001
From: Philip Studener <philip.studener@gmx.de>
Date: Tue, 30 Jul 2024 11:42:58 +0200
Subject: [PATCH 19/30] updocs

---
 man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
index 6a633767b..37396414f 100644
--- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
+++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
@@ -7,7 +7,7 @@
 \description{
 Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} by dividing continuous
 time into multiple time intervals for each observation.
-This transformation creates a new target variable \code{ped_status} that indicates
+This transformation creates a new target variable \code{disc_status} that indicates
 whether an event occurred within each time interval.
 This approach facilitates survival analysis within a classification framework
 using discrete time intervals (Tutz et al. 2016).
@@ -31,7 +31,7 @@ output channels, one named "output" and the other "transformed_data".
 
 During training, the "output" is the "input" \link{TaskSurv} transformed to a
 \link[mlr3:TaskClassif]{TaskClassif}.
-The target column is named \code{"ped_status"} and indicates whether an event occurred
+The target column is named \code{"disc_status"} and indicates whether an event occurred
 in each time interval.
 An additional feature named \code{"tend"} contains the end time point of each interval.
 Lastly, the "output" task has a column with the original observation ids,
@@ -39,7 +39,7 @@ under the role \code{"original_ids"}.
 The "transformed_data" is an empty \link[data.table:data.table]{data.table}.
 
 During prediction, the "input" \link{TaskSurv} is transformed to the "output"
-\link[mlr3:TaskClassif]{TaskClassif} with \code{"ped_status"} as target and the \code{"tend"}
+\link[mlr3:TaskClassif]{TaskClassif} with \code{"disc_status"} as target and the \code{"tend"}
 feature included.
 The "transformed_data" is a \link{data.table} which has as columns all the features
 of the "output" task and in addition the columns \code{"id"} (original observation ids),

From 1d0350763c9e2333e3889216d1a7c00f85b2c251 Mon Sep 17 00:00:00 2001
From: Philip Studener <philip.studener@gmx.de>
Date: Tue, 30 Jul 2024 11:52:16 +0200
Subject: [PATCH 20/30] make sure that disc_status not in colnames

---
 R/PipeOpTaskSurvClassifDiscTime.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index f794ca993..c0c2aa07c 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -121,6 +121,10 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       assert_true(task$censtype == "right")
       data = task$data()
 
+      if ("disc_status" %in% colnames(task$data())) {
+        stop("\"disc_status\" can not be a column in the input data.")
+      }
+
       cut = assert_numeric(self$param_set$values$cut, null.ok = TRUE, lower = 0)
       max_time = self$param_set$values$max_time
 

From 8ea2c80aada8d0be641a2d027d21073aaed9ba54 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Tue, 30 Jul 2024 14:46:57 +0200
Subject: [PATCH 21/30] refactor test + add some more checks

---
 ...st_pipeop_trafotask_survclassif_disctime.R | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index d60770bff..efb8ab7f5 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -38,27 +38,33 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   expect_equal(pred_task$nrow, test_task$nrow * 4)
   # `tend` matches the cut time points (excluding 0 time point)
   tends = pred_task$data(cols = "tend")[[1L]]
-  expect_equal(sort(unique(tends)), time_cuts[2:5])
-  # test row ids are correct
+  expect_setequal(unique(tends), time_cuts[2:5])
+  # original row ids are correct
   expect_equal(pred_task$col_roles$original_ids, "id")
   original_ids = pred_task$data(cols = "id")[[1L]]
   correct_ids = rep(test_ids, each = 4)
   expect_equal(original_ids, correct_ids)
 
   transformed_data = res[["transformed_data"]]
-  # test rows ids are correct
-  expect_equal(transformed_data$id, correct_ids)
   # check columns in the transformed data.table
-  expect_equal(sort(c("id", "disc_status", "obs_times", "tend")),
-               sort(colnames(transformed_data)))
+  expect_set_equal(colnames(transformed_data),
+                   c("id", "disc_status", "obs_times", "tend"))
+  # `id`s are correct
+  expect_equal(transformed_data$id, correct_ids)
+  # `disc_status` is the same
+  expect_equal(transformed_data$disc_status, pred_task$truth())
+  # `obs_times` are correct
+  times = test_task$times() # observed times
+  expect_setequal(unique(transformed_data$obs_times), times)
+  # `tends` are correct
+  expect_setequal(unique(transformed_data$tend), time_cuts[2:5])
 
   # `disc_status` per interval and per observation is correct
   # before observed time ("obs_times"), "disc_status" = 0
-  expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])),
-               "0")
-  times = test_task$times() # observed times
-  status = as.character(test_task$status())
+  expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), "0")
+
   # after observed time, "disc_status" must be the same as "status"
+  status = as.character(test_task$status())
   td = transformed_data[tend > obs_times]
   expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1])
   expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2])

From 1fea434457f41b874ba6352af606b8aaf5ec28eb Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 12:22:24 +0200
Subject: [PATCH 22/30] rename file

---
 .github/workflows/{r-cmd-check.yml => R-CMD-check.yml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{r-cmd-check.yml => R-CMD-check.yml} (100%)

diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/R-CMD-check.yml
similarity index 100%
rename from .github/workflows/r-cmd-check.yml
rename to .github/workflows/R-CMD-check.yml

From c4cfba567f4c124a21b8a0d78397ddb695c81678 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 12:22:53 +0200
Subject: [PATCH 23/30] add R-universe badge

---
 README.Rmd | 11 +++++------
 README.md  | 12 ++++++------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 963f316b1..546ed4863 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -12,16 +12,15 @@ knitr::opts_chunk$set(
 
 # mlr3proba
 
-Package website: [release](https://mlr3proba.mlr-org.com/)
-
-Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)**.
+Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)** ([website](https://mlr3proba.mlr-org.com/)).
 
 <!-- badges: start -->
-[![r-cmd-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml)
+[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml)
+[![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba)
 [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions)
 [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039)
-[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3)
-[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
+[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg?color=pink)](https://stackoverflow.com/questions/tagged/mlr3)
+[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg?color=pink)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
 <!-- badges: end -->
 
 ## What is mlr3proba?
diff --git a/README.md b/README.md
index bd0ca5edc..1c0aa0e82 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,19 @@
 
 # mlr3proba
 
-Package website: [release](https://mlr3proba.mlr-org.com/)
-
 Probabilistic Supervised Learning for
-**[mlr3](https://github.com/mlr-org/mlr3/)**.
+**[mlr3](https://github.com/mlr-org/mlr3/)**
+([website](https://mlr3proba.mlr-org.com/)).
 
 <!-- badges: start -->
 
-[![r-cmd-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml)
+[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml)
+[![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba)
 [![GitHub
 Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions)
 [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039)
-[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3)
-[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
+[![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg?color=pink)](https://stackoverflow.com/questions/tagged/mlr3)
+[![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg?color=pink)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
 <!-- badges: end -->
 
 ## What is mlr3proba?

From e23e29fbe27a5a990b38d69695df3b96e2d54064 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 12:26:55 +0200
Subject: [PATCH 24/30] fix note

---
 R/zzz.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/zzz.R b/R/zzz.R
index 4d95d603c..20a0b3e83 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -80,6 +80,7 @@ unregister_reflections = function() {
   x = utils::getFromNamespace("mlr_reflections", ns = "mlr3")
 
   # task
+  package = NULL # silence data.table notes
   x$task_types[package != "mlr3proba"]
   x$task_col_roles$surv = NULL
   x$task_col_roles$dens = NULL

From 3217512e7250cd3271f267861e361f1ef02b1461 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 12:34:08 +0200
Subject: [PATCH 25/30] use testthat expectations for testing

---
 R/TaskSurv.R                                              | 2 +-
 R/pipelines.R                                             | 2 +-
 inst/testthat/helper_expectations.R                       | 2 +-
 tests/testthat/test_PredictionSurv.R                      | 8 ++++----
 tests/testthat/test_TaskSurv.R                            | 2 +-
 .../testthat/test_pipeop_trafotask_survclassif_disctime.R | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/TaskSurv.R b/R/TaskSurv.R
index 80ba4f63a..9dd9f4326 100644
--- a/R/TaskSurv.R
+++ b/R/TaskSurv.R
@@ -378,7 +378,7 @@ TaskSurv = R6::R6Class("TaskSurv",
 
     #' @description
     #' Checks if the data satisfy the *proportional hazards (PH)* assumption using
-    #' the Grambsch-Therneau test, `r mlr3misc::cite_bib("grambsch_1994")`.
+    #' the Grambsch-Therneau test, `r cite_bib("grambsch_1994")`.
     #' Uses [cox.zph][survival::cox.zph()].
     #' This method should be used only for **low-dimensional datasets** where
     #' the number of features is relatively small compared to the number of
diff --git a/R/pipelines.R b/R/pipelines.R
index 307d90b4e..638c89d97 100644
--- a/R/pipelines.R
+++ b/R/pipelines.R
@@ -598,7 +598,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL,
 
   if (!is.null(rhs)) {
     gr$edges = gr$edges[-1, ]
-    gr$add_pipeop(mlr3pipelines::po("modelmatrix", formula = mlr3misc::formulate(rhs = rhs, quote = "left")))
+    gr$add_pipeop(mlr3pipelines::po("modelmatrix", formula = formulate(rhs = rhs, quote = "left")))
     gr$add_edge(src_id = "trafotask_survclassif_disctime", dst_id = "modelmatrix", src_channel = "output")
     gr$add_edge(src_id = "modelmatrix", dst_id = learner$id, src_channel = "output", dst_channel = "input")
   }
diff --git a/inst/testthat/helper_expectations.R b/inst/testthat/helper_expectations.R
index 5f0726ac1..b5a6ebba9 100644
--- a/inst/testthat/helper_expectations.R
+++ b/inst/testthat/helper_expectations.R
@@ -21,7 +21,7 @@ expect_task_surv = function(task) {
 
   f = task$formula()
   expect_formula(f)
-  expect_set_equal(mlr3misc::extract_vars(f)$lhs, task$target_names)
+  expect_setequal(extract_vars(f)$lhs, task$target_names)
   expect_class(task$kaplan(), "survfit")
 }
 
diff --git a/tests/testthat/test_PredictionSurv.R b/tests/testthat/test_PredictionSurv.R
index 2cfad7573..51770a540 100644
--- a/tests/testthat/test_PredictionSurv.R
+++ b/tests/testthat/test_PredictionSurv.R
@@ -192,10 +192,10 @@ test_that("filtering", {
   expect_prediction_surv(p3)
   expect_prediction_surv(p4)
 
-  expect_set_equal(p$data$row_ids, c(20, 37, 42))
-  expect_set_equal(p2$data$row_ids, c(20, 37, 42))
-  expect_set_equal(p3$data$row_ids, c(20, 37, 42))
-  expect_set_equal(p4$data$row_ids, c(20, 37, 42))
+  expect_setequal(p$data$row_ids, c(20, 37, 42))
+  expect_setequal(p2$data$row_ids, c(20, 37, 42))
+  expect_setequal(p3$data$row_ids, c(20, 37, 42))
+  expect_setequal(p4$data$row_ids, c(20, 37, 42))
   expect_numeric(p$data$crank, any.missing = FALSE, len = 3L)
   expect_numeric(p2$data$crank, any.missing = FALSE, len = 3L)
   expect_numeric(p3$data$crank, any.missing = FALSE, len = 3L)
diff --git a/tests/testthat/test_TaskSurv.R b/tests/testthat/test_TaskSurv.R
index 78ed0c095..b1867a4e2 100644
--- a/tests/testthat/test_TaskSurv.R
+++ b/tests/testthat/test_TaskSurv.R
@@ -1,7 +1,7 @@
 test_that("Task duplicates rows", {
   task = tsk("lung")
   expect_task_surv(task)
-  expect_set_equal(extract_vars(task$formula())$rhs, ".")
+  expect_setequal(extract_vars(task$formula())$rhs, ".")
 })
 
 test_that("right censoring", {
diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index efb8ab7f5..32e668c0d 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -47,8 +47,8 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
 
   transformed_data = res[["transformed_data"]]
   # check columns in the transformed data.table
-  expect_set_equal(colnames(transformed_data),
-                   c("id", "disc_status", "obs_times", "tend"))
+  expect_setequal(colnames(transformed_data),
+                  c("id", "disc_status", "obs_times", "tend"))
   # `id`s are correct
   expect_equal(transformed_data$id, correct_ids)
   # `disc_status` is the same

From 0d7ba22db4238b0878594bf9815925034db44e28 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 12:35:52 +0200
Subject: [PATCH 26/30] fix test

---
 tests/testthat/test_pipeop_trafotask_survclassif_disctime.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
index 32e668c0d..9723bd53d 100644
--- a/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
+++ b/tests/testthat/test_pipeop_trafotask_survclassif_disctime.R
@@ -52,7 +52,8 @@ test_that("PipeOpTaskSurvClassifDiscTime", {
   # `id`s are correct
   expect_equal(transformed_data$id, correct_ids)
   # `disc_status` is the same
-  expect_equal(transformed_data$disc_status, pred_task$truth())
+  expect_equal(as.character(transformed_data$disc_status),
+               as.character(pred_task$truth()))
   # `obs_times` are correct
   times = test_task$times() # observed times
   expect_setequal(unique(transformed_data$obs_times), times)

From 36044d9885984859129610d8b4de0bc47fd83128 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 13:09:32 +0200
Subject: [PATCH 27/30] update docs

---
 R/PipeOpTaskSurvClassifDiscTime.R                 | 4 ++--
 man/mlr_pipeops_trafotask_survclassif_disctime.Rd | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index c0c2aa07c..22fd944ec 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -36,8 +36,8 @@
 #' During prediction, the "input" [TaskSurv] is transformed to the "output"
 #' [TaskClassif][mlr3::TaskClassif] with `"disc_status"` as target and the `"tend"`
 #' feature included.
-#' The "transformed_data" is a [data.table] which has as columns all the features
-#' of the "output" task and in addition the columns `"id"` (original observation ids),
+#' The "transformed_data" is a [data.table] with columns the `"disc_status"`
+#' target of the "output" task, the `"id"` (original observation ids),
 #' `"obs_times"` (observed times per `"id"`) and `"tend"` (end time of each interval).
 #' This "transformed_data" is only meant to be used with the [PipeOpPredClassifSurvDiscTime].
 #'
diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
index 37396414f..873dfb5ab 100644
--- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
+++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd
@@ -41,8 +41,8 @@ The "transformed_data" is an empty \link[data.table:data.table]{data.table}.
 During prediction, the "input" \link{TaskSurv} is transformed to the "output"
 \link[mlr3:TaskClassif]{TaskClassif} with \code{"disc_status"} as target and the \code{"tend"}
 feature included.
-The "transformed_data" is a \link{data.table} which has as columns all the features
-of the "output" task and in addition the columns \code{"id"} (original observation ids),
+The "transformed_data" is a \link{data.table} with columns the \code{"disc_status"}
+target of the "output" task, the \code{"id"} (original observation ids),
 \code{"obs_times"} (observed times per \code{"id"}) and \code{"tend"} (end time of each interval).
 This "transformed_data" is only meant to be used with the \link{PipeOpPredClassifSurvDiscTime}.
 }

From 3aeb622ebbfbd4174a6e21d74eb626f325e34488 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 13:11:23 +0200
Subject: [PATCH 28/30] refactoring

---
 R/PipeOpTaskSurvClassifDiscTime.R | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/R/PipeOpTaskSurvClassifDiscTime.R b/R/PipeOpTaskSurvClassifDiscTime.R
index 22fd944ec..89f606516 100644
--- a/R/PipeOpTaskSurvClassifDiscTime.R
+++ b/R/PipeOpTaskSurvClassifDiscTime.R
@@ -144,10 +144,11 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       long_data = pammtools::as_ped(data = data, formula = form, cut = cut, max_time = max_time)
       self$state$cut = attributes(long_data)$trafo_args$cut
       long_data = as.data.table(long_data)
-      long_data$disc_status = factor(long_data$ped_status, levels = c("0", "1"))
+      setnames(long_data, old = "ped_status", new = "disc_status")
+      long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1"))
 
-      # remove offset, tstart, interval for dataframe long_data
-      long_data[, c("offset", "tstart", "interval", "ped_status") := NULL]
+      # remove some columns from `long_data`
+      long_data[, c("offset", "tstart", "interval") := NULL]
       # keep id mapping
       reps = table(long_data$id)
       ids = rep(task$row_ids, times = reps)
@@ -182,9 +183,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       form = formulate(sprintf("Surv(%s, %s)", time_var, event_var), ".")
 
       long_data = as.data.table(pammtools::as_ped(data, formula = form, cut = cut))
-
-      long_data$disc_status = long_data$ped_status
-      long_data[, "ped_status" := NULL]
+      setnames(long_data, old = "ped_status", new = "disc_status")
 
       disc_status = id = tend = obs_times = NULL # fixing global binding notes of data.table
       long_data[, disc_status := 0]
@@ -200,7 +199,7 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       long_data[long_data[, .I[tend >= obs_times], by = id]$V1, disc_status := status]
       long_data$disc_status = factor(long_data$disc_status, levels = c("0", "1"))
 
-      # remove offset, tstart, interval for dataframe long_data
+      # remove some columns from `long_data`
       long_data[, c("offset", "tstart", "interval", "obs_times") := NULL]
       task_disc = TaskClassif$new(paste0(task$id, "_disc"), long_data,
                                   target = "disc_status", positive = "1")
@@ -209,7 +208,9 @@ PipeOpTaskSurvClassifDiscTime = R6Class("PipeOpTaskSurvClassifDiscTime",
       # map observed times back
       reps = table(long_data$id)
       long_data$obs_times = rep(time, each = rows_per_id)
-      long_data = long_data[, data.table(id, obs_times, tend, disc_status)]
+      # subset transformed data
+      columns_to_keep = c("id", "obs_times", "tend", "disc_status")
+      long_data = long_data[, columns_to_keep, with = FALSE]
 
       list(task_disc, long_data)
     }

From 7c0e12959b59c58393e242ce7605c47a3345594f Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 13:14:54 +0200
Subject: [PATCH 29/30] revert renaming of workflow file => capitalize name

---
 .github/workflows/{R-CMD-check.yml => r-cmd-check.yml} | 2 +-
 README.Rmd                                             | 2 +-
 README.md                                              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename .github/workflows/{R-CMD-check.yml => r-cmd-check.yml} (98%)

diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/r-cmd-check.yml
similarity index 98%
rename from .github/workflows/R-CMD-check.yml
rename to .github/workflows/r-cmd-check.yml
index 70bba92ee..d6135b1d1 100644
--- a/.github/workflows/R-CMD-check.yml
+++ b/.github/workflows/r-cmd-check.yml
@@ -9,7 +9,7 @@ on:
     branches:
       - main
 
-name: r-cmd-check
+name: R-CMD-check
 
 jobs:
   r-cmd-check:
diff --git a/README.Rmd b/README.Rmd
index 546ed4863..30d4964dc 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -15,7 +15,7 @@ knitr::opts_chunk$set(
 Probabilistic Supervised Learning for **[mlr3](https://github.com/mlr-org/mlr3/)** ([website](https://mlr3proba.mlr-org.com/)).
 
 <!-- badges: start -->
-[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml)
+[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml)
 [![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba)
 [![GitHub Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions)
 [![Article](https://img.shields.io/badge/Article-10.1093%2Fbioinformatics%2Fbtab039-brightgreen)](https://doi.org/10.1093/bioinformatics/btab039)
diff --git a/README.md b/README.md
index 1c0aa0e82..0f68c4050 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Probabilistic Supervised Learning for
 
 <!-- badges: start -->
 
-[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/R-CMD-check.yml)
+[![R-CMD-check](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3proba/actions/workflows/r-cmd-check.yml)
 [![runiverse](https://mlr-org.r-universe.dev/badges/mlr3proba)](https://mlr-org.r-universe.dev/mlr3proba)
 [![GitHub
 Discussions](https://img.shields.io/github/discussions/mlr-org/mlr3proba?logo=github&label=Discussions%20Q%26A&color=FFE600)](https://github.com/mlr-org/mlr3proba/discussions)

From bbd0c970b54b42c1b96ee62bba88e3d294fd4ebb Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 31 Jul 2024 13:19:22 +0200
Subject: [PATCH 30/30] update version + news

---
 DESCRIPTION | 2 +-
 NEWS.md     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 31f2302f2..9258055c7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: mlr3proba
 Title: Probabilistic Supervised Learning for 'mlr3'
-Version: 0.6.5
+Version: 0.6.6
 Authors@R:
     c(person(given = "Raphael",
              family = "Sonabend",
diff --git a/NEWS.md b/NEWS.md
index 0a79b8ebf..e5e720a95 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# mlr3proba 0.6.6
+
+- Small fixes and refactoring to the discrete-time pipeops
+
 # mlr3proba 0.6.5
 
 * Add support for discrete-time survival analysis