InsightRX
diff --git a/‎R/reformat_data_modeling_to_modeling.R‎
Lines changed: 11 additions & 2 deletions b/‎R/reformat_data_modeling_to_modeling.R‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎R/reformat_data_modeling_to_nca.R‎
Lines changed: 4 additions & 1 deletion b/‎R/reformat_data_modeling_to_nca.R‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎R/reformat_data_nca_to_modeling.R‎
Lines changed: 55 additions & 6 deletions b/‎R/reformat_data_nca_to_modeling.R‎
Lines changed: 55 additions & 6 deletions
diff --git a/‎man/reformat_data_modeling_to_modeling.Rd‎
Lines changed: 4 additions & 1 deletion b/‎man/reformat_data_modeling_to_modeling.Rd‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎man/reformat_data_modeling_to_nca.Rd‎
Lines changed: 4 additions & 1 deletion b/‎man/reformat_data_modeling_to_nca.Rd‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎man/reformat_data_nca_to_modeling.Rd‎
Lines changed: 13 additions & 1 deletion b/‎man/reformat_data_nca_to_modeling.Rd‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎man/reformat_data_sdtm_to_modeling.Rd‎
Lines changed: 4 additions & 1 deletion b/‎man/reformat_data_sdtm_to_modeling.Rd‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/testthat/test-reformat_data_nca_to_modeling.R‎
Lines changed: 60 additions & 0 deletions b/‎tests/testthat/test-reformat_data_nca_to_modeling.R‎
Lines changed: 60 additions & 0 deletions
@@ -4,14 +4,17 @@
 #' @param data dataset formatted as modeling-ready dataset
 #' @param dictionary a data dictionary that maps expected variable names to 
 #' variables in the data.
+#' @param na what to set NA values to. E.g. ".", (default) or NA (keep NA),
+#' or NULL (do nothing).
 #' 
 #' @returns data.frame with population PK input data in NONMEM-style
 #' format.
 #' 
 #' @export
 reformat_data_modeling_to_modeling <- function(
   data,
-  dictionary = NULL
+  dictionary = NULL,
+  na = "."
 ) {
 
   data <- data |>
@@ -34,6 +37,12 @@ reformat_data_modeling_to_modeling <- function(
       data$GROUP <- 1 # dummy grouper
     }
   }
-
+  
+  ## Convert NA's to dots (or something else)
+  if(!is.null(na)) {
+    data <- data |>
+      dplyr::mutate(dplyr::across(dplyr::everything(), ~ifelse(is.na(.) | . == "NA", na, .)))
+  }
+  
   data
 } 
@@ -4,14 +4,17 @@
 #' @param data dataset formatted as modeling-ready dataset
 #' @param dictionary a data dictionary that maps expected variable names to 
 #' variables in the data.
+#' @param na what to set NA values to. E.g. ".", or NA (keep NA, default),
+#' or NULL (do nothing).
 #' 
 #' @returns data.frame with population PK input data in NONMEM-style
 #' format.
 #' 
 #' @export
 reformat_data_modeling_to_nca <- function(
   data, 
-  dictionary = NULL
+  dictionary = NULL,
+  na = NA
 ) {
   ## TODO:
   # strip out EVID=2.
 
@@ -8,13 +8,21 @@
 #' @param obs_compartment the observation compartment number
 #' @param covariates a vector of covariate names that are to be extracted
 #' and added to the modeling dataset.
+#' @param na what to set NA values to. E.g. ".", (default) or NA (keep NA),
+#' or NULL (do nothing).
+#' @param repeat_doses Optional list for repeated dosing (MAD studies). Must
+#' contain `interval` (dosing interval in TIME units). Optionally contains `n`
+#' (total number of doses). If `n` is omitted, it is inferred per subject/group
+#' as `ceiling(max(observation_time) / interval)`. Only applies to column-wise
+#' dose data. Default `NULL` preserves existing behavior (no ADDL/II columns).
+#' Examples: `list(interval = 12)` or `list(n = 5, interval = 12)`.
 #'
 #' @returns data.frame with population PK input data in NONMEM-style
 #' format.
-#' 
+#'
 #' @export
 reformat_data_nca_to_modeling <- function(
-  data, 
+  data,
   dictionary = list(
     subject_id = "ID",
     group = "GROUP",
@@ -24,7 +32,9 @@ reformat_data_nca_to_modeling <- function(
   ),
   dose_compartment = 1,
   obs_compartment = 1,
-  covariates = NULL
+  covariates = NULL,
+  repeat_doses = NULL,
+  na = "."
 ) {
 
   groups <- c(dictionary$subject_id, dictionary$group)
@@ -56,12 +66,40 @@ reformat_data_nca_to_modeling <- function(
     dplyr::filter(!is.na(AMT)) |>
     dplyr::mutate(EVID = 1, MDV = 1, DV = 0, CMT = dose_compartment) |>
     dplyr::left_join(ids, by = dplyr::join_by("ORIGID"))
+  
   if(nrow(doses) == nrow(data)) { # Dose is given as a column, and not row-wise using EVID
     doses <- doses |>
       dplyr::group_by(.data$ORIGID, .data$GROUP) |>
       dplyr::slice(1) |>
       dplyr::mutate(TIME = 0) |>
       dplyr::ungroup()
+
+    if (!is.null(repeat_doses)) {
+      if (is.null(repeat_doses$interval)) {
+        stop("`repeat_doses` must contain an `interval` element.")
+      }
+      interval <- repeat_doses$interval
+      if (!is.null(repeat_doses$n)) {
+        doses <- doses |>
+          dplyr::mutate(ADDL = as.numeric(repeat_doses$n) - 1, II = interval)
+      } else {
+        max_obs_times <- data |>
+          dplyr::select(
+            ORIGID = !!dictionary$subject_id,
+            GROUP  = !!dictionary$group,
+            TIME   = !!dictionary$time
+          ) |>
+          dplyr::group_by(.data$ORIGID, .data$GROUP) |>
+          dplyr::summarise(max_obs_time = max(.data$TIME, na.rm = TRUE), .groups = "drop")
+        doses <- doses |>
+          dplyr::left_join(max_obs_times, by = c("ORIGID", "GROUP")) |>
+          dplyr::mutate(
+            ADDL = pmax(0, ceiling(.data$max_obs_time / interval) - 1),
+            II   = interval
+          ) |>
+          dplyr::select(-"max_obs_time")
+      }
+    }
   }
 
   ## Observations
@@ -78,15 +116,19 @@ reformat_data_nca_to_modeling <- function(
       stringr::str_detect(tolower(.data$DV), "[<a-z]"), -99, .data$DV
     ))) |>
     dplyr::left_join(ids, by = dplyr::join_by("ORIGID"))
-  
+
+  if (!is.null(repeat_doses)) {
+    samples <- samples |> dplyr::mutate(ADDL = 0, II = 0)
+  }
+
   ## Combine
   comb <- dplyr::bind_rows(
     doses,
     samples
   ) |>
     dplyr::mutate(ifelse(is.null(.data$GROUP), 1, .data$GROUP)) |>
     dplyr::arrange(!!dictionary$subject_id, !!dictionary$group, !!dictionary$time, .data$EVID) |>
-    dplyr::select("ID", "TIME", "CMT", "EVID", "MDV", "DV", "AMT", "GROUP", "ORIGID", !!covariates) |>
+    dplyr::select("ID", "TIME", "CMT", "EVID", "MDV", "DV", "AMT", dplyr::any_of(c("ADDL", "II")), "GROUP", "ORIGID", !!covariates) |>
     dplyr::arrange(.data$GROUP, .data$ID, .data$TIME, -.data$EVID)
 
   ## Convert all character columns to categorical (but numeric)
@@ -99,7 +141,14 @@ reformat_data_nca_to_modeling <- function(
   }
 
   ## Remove any observations with DV = -99
-  comb <- dplyr::filter(comb, .data$DV != -99)
+  comb <- comb |>
+    dplyr::filter(.data$DV != -99)
+  
+  ## Convert NA's to dots or something else
+  if(!is.null(na)) {
+    comb <- comb |>
+      dplyr::mutate(dplyr::across(dplyr::everything(), ~ifelse(is.na(.) | . == "NA", na, .)))
+  }
 
   ## Return
   comb
 
@@ -75,6 +75,66 @@ test_that("dose records are reduced to one per subject when dose is column-wise"
   expect_equal(out$TIME[out$EVID == 1], 0)
 })
 
+test_that("repeat_doses = NULL produces no ADDL/II columns", {
+  dat <- data.frame(
+    ID = c(1, 1, 1),
+    TIME = c(0, 12, 24),
+    AMT = c(100, 100, 100),
+    DV = c(NA, 5, 3)
+  )
+  out <- reformat_data_nca_to_modeling(data = dat)
+  expect_false("ADDL" %in% names(out))
+  expect_false("II" %in% names(out))
+})
+
+test_that("repeat_doses with explicit n adds correct ADDL/II", {
+  dat <- data.frame(
+    ID = c(1, 1, 1),
+    TIME = c(0, 12, 24),
+    AMT = c(100, 100, 100),
+    DV = c(NA, 5, 3)
+  )
+  out <- reformat_data_nca_to_modeling(data = dat, repeat_doses = list(n = 5, interval = 12))
+  dose_rows <- out[out$EVID == 1, ]
+  obs_rows  <- out[out$EVID == 0, ]
+  expect_equal(dose_rows$ADDL, 4)
+  expect_equal(dose_rows$II,   12)
+  expect_true(all(obs_rows$ADDL == 0))
+  expect_true(all(obs_rows$II   == 0))
+})
+
+test_that("repeat_doses without n infers ADDL per subject from max obs time", {
+  dat <- data.frame(
+    ID   = c(1, 1, 1,  2, 2, 2),
+    TIME = c(0, 12, 24, 0, 6, 12),
+    AMT  = c(100, 100, 100, 200, 200, 200),
+    DV   = c(NA, 5, 3, NA, 8, 6)
+  )
+  out <- reformat_data_nca_to_modeling(data = dat, repeat_doses = list(interval = 12))
+  dose_rows <- out[out$EVID == 1, ]
+  # Subject 1: max obs time = 24, ceiling(24/12) - 1 = 1
+  expect_equal(dose_rows$ADDL[dose_rows$ID == 1], 1)
+  # Subject 2: max obs time = 12, ceiling(12/12) - 1 = 0 (pmax guard)
+  expect_equal(dose_rows$ADDL[dose_rows$ID == 2], 0)
+  expect_true(all(dose_rows$II == 12))
+  obs_rows <- out[out$EVID == 0, ]
+  expect_true(all(obs_rows$ADDL == 0))
+  expect_true(all(obs_rows$II   == 0))
+})
+
+test_that("repeat_doses without interval raises an error", {
+  dat <- data.frame(
+    ID = c(1, 1),
+    TIME = c(0, 12),
+    AMT = c(100, 100),
+    DV = c(NA, 5)
+  )
+  expect_error(
+    reformat_data_nca_to_modeling(data = dat, repeat_doses = list(n = 3)),
+    "interval"
+  )
+})
+
 test_that("reformat_data_nca_to_modeling handles multiple doses per subject", {
   # Create data with multiple dose events per subject (row-wise dosing)
   dat <- data.frame(