InsightRX · roninsightrx · May 26, 2026 · May 26, 2026 · roninsightrx · May 26, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pharmr.extra
 Title: Extension of pharmr (Pharmpy) functionality
-Version: 0.0.0.9072
+Version: 0.0.0.9074
 Authors@R: c(
     person("Ron", "Keizer", email = "ron@insight-rx.com", role = c("cre", "aut")),
     person("Michael", "McCarthy", email = "michael.mccarthy@insight-rx.com", role = "ctb"),

diff --git a/R/prepare_run_folder.R b/R/prepare_run_folder.R
@@ -7,6 +7,7 @@ prepare_run_folder <- function(
   force = FALSE,
   data = NULL,
   auto_stack_encounters = FALSE,
+  copy_dataset = TRUE,
   verbose = TRUE
 ) {
 
@@ -31,23 +32,33 @@ prepare_run_folder <- function(
 
   if(!is.null(data)) {
     if(inherits(data, "character")) {
-      if(verbose) cli::cli_process_start("Copying dataset")
       if(!file.exists(data)) {
         cli::cli_abort("`data` file does not exist.")
       }
       if(isTRUE(auto_stack_encounters)) {
         cli::cli_warn("`auto_stack_encounters` can only be used when `data` is specified as data.frame, not when it is a CSV filename.")
       }
-      file.copy(from = data, to = dataset_path)
-      ## If the source CSV has quoted headers (e.g. `"ID","TIME",...`), NONMEM
-      ## will try to parse the header row as data. Detect this and rewrite the
-      ## dataset with unquoted headers.
-      first_line <- tryCatch(readLines(dataset_path, n = 1), error = function(e) character(0))
-      if (length(first_line) && grepl('^["\']', first_line)) {
-        if (verbose) cli::cli_alert_info("Stripping quoted column names from dataset header")
-        df <- read.csv(dataset_path, check.names = FALSE)
-        df <- unquote_column_names(df)
-        write.csv(df, file = dataset_path, quote = FALSE, row.names = FALSE)
+      if(!copy_dataset) {
+        ## Leave the dataset in its existing location and point $DATA at its
+        ## absolute path. The file is not modified (so no quoted-header
+        ## rewrite); the user is responsible for the dataset being NONMEM-ready.
+        if(verbose) cli::cli_process_start("Using dataset in existing location (not copying into run folder)")
+        dataset_path <- normalizePath(data, mustWork = TRUE)
+      } else {
+        if(verbose) cli::cli_process_start("Copying dataset")
+        if(!isTRUE(file.copy(from = data, to = dataset_path))) {
+          cli::cli_abort("Failed to copy dataset from {.path {data}} to {.path {dataset_path}}.")
+        }
+        ## If the source CSV has quoted headers (e.g. `"ID","TIME",...`), NONMEM
+        ## will try to parse the header row as data. Detect this and rewrite the
+        ## dataset with unquoted headers.
+        first_line <- tryCatch(readLines(dataset_path, n = 1), error = function(e) character(0))
+        if (length(first_line) && grepl('^["\']', first_line)) {
+          if (verbose) cli::cli_alert_info("Stripping quoted column names from dataset header")
+          df <- read.csv(dataset_path, check.names = FALSE)
+          df <- unquote_column_names(df)
+          write.csv(df, file = dataset_path, quote = FALSE, row.names = FALSE)
+        }
       }
     } else {
       if(verbose) cli::cli_process_start("Checking, cleaning, and copying dataset")
@@ -83,7 +94,17 @@ prepare_run_folder <- function(
         }
       }
       if (!is.null(dataset_file)) {
-        file.copy(from = dataset_file, to = dataset_path)
+        if (!copy_dataset) {
+          ## Dataset resolved from the model's $DATA record is already on disk;
+          ## leave it in place and point $DATA at its absolute path.
+          if (verbose) cli::cli_process_start("Using dataset from model's $DATA record (not copying into run folder)")
+          dataset_path <- normalizePath(dataset_file, mustWork = TRUE)
+        } else {
+          if (verbose) cli::cli_process_start("Copying dataset from model's $DATA record")
+          if (!isTRUE(file.copy(from = dataset_file, to = dataset_path))) {
+            cli::cli_abort("Failed to copy dataset from {.path {dataset_file}} to {.path {dataset_path}}.")
+          }
+        }
       } else {
         cli::cli_abort("No dataset could be resolved: `model$dataset` is NULL and no existing file was found from the model's $DATA record.")
       }

diff --git a/R/run_nlme.R b/R/run_nlme.R
@@ -66,6 +66,12 @@
 #' This feature is useful e.g. for crossover trials when data on the same
 #' individual ispresent but is included in the dataset as time-after-dose and
 #' not actual time since first overall dose.
+#' @param copy_dataset copy the dataset into the run folder? If `TRUE`, the
+#' dataset is copied into the run folder as `data.csv` and the model's `$DATA`
+#' record points to that copy. If `FALSE` (default), the dataset is left in its
+#' existing location and the model's `$DATA` record points to that absolute
+#' path. Only applies when `data` is supplied as (or resolves to) a file on
+#' disk; in-memory data frames are always written into the run folder.
 #' @param clean clean up run folder after NONMEM execution?
 #' @param as_job run as RStudio job?
 #' @param save_final after running the model, should a file `final.mod` be created
@@ -114,6 +120,7 @@ run_nlme <- function(
   estimation_options = NULL,
   sir_options = NULL,
   auto_stack_encounters = TRUE,
+  copy_dataset = FALSE,
   clean = TRUE,
   as_job = FALSE,
   save_final = TRUE,
@@ -127,6 +134,12 @@ run_nlme <- function(
 
   time_start <- Sys.time()
 
+  ## An in-memory data.frame has no on-disk "existing location" to reference,
+  ## so it must always be written into the run folder regardless of
+  ## `copy_dataset` (otherwise $DATA would point at the ephemeral tempfile
+  ## created just below).
+  data_in_memory <- inherits(data, "data.frame")
+
   ## Make sure `data` is pointing to a file. This is to avoid issue with
   ## Pharmpy trying to parse the data.frame. `data` may also be NULL, in
   ## which case `prepare_run_folder()` falls back to `model$dataset` or the
@@ -238,6 +251,7 @@ run_nlme <- function(
     data = data,
     force = force,
     auto_stack_encounters = auto_stack_encounters,
+    copy_dataset = copy_dataset || data_in_memory,
     verbose = verbose
   )
 

diff --git a/man/prepare_run_folder.Rd b/man/prepare_run_folder.Rd
diff --git a/man/run_nlme.Rd b/man/run_nlme.Rd
diff --git a/tests/testthat/test-run_nlme.R b/tests/testthat/test-run_nlme.R
@@ -269,6 +269,40 @@ test_that("run_nlme / prepare_run_folder strip surrounding quotes from column na
   )
 })
 
+test_that("prepare_run_folder respects copy_dataset", {
+  local_pharmr.extra_options()
+  skip_if_nonmem_not_available()
+
+  mod <- create_model(route = "iv", verbose = FALSE)
+
+  src_dir <- withr::local_tempdir()
+  src_csv <- file.path(src_dir, "mydata.csv")
+  writeLines(c("ID,TIME,DV", "1,0,0", "1,1,10"), src_csv)
+
+  ## copy_dataset = FALSE: leave dataset in place, point $DATA at its abs path
+  obj_no_copy <- prepare_run_folder(
+    id = "run1", model = mod, path = withr::local_tempdir(), data = src_csv,
+    copy_dataset = FALSE, verbose = FALSE
+  )
+  expect_false(file.exists(file.path(obj_no_copy$fit_folder, "data.csv")))
+  expect_equal(obj_no_copy$dataset_path, normalizePath(src_csv))
+  data_line <- grep("^\\$DATA", readLines(
+    file.path(obj_no_copy$fit_folder, obj_no_copy$model_file)
+  ), value = TRUE)
+  expect_match(data_line, normalizePath(src_csv), fixed = TRUE)
+
+  ## copy_dataset = TRUE: dataset copied into run folder, $DATA points to copy
+  obj_copy <- prepare_run_folder(
+    id = "run1", model = mod, path = withr::local_tempdir(), data = src_csv,
+    copy_dataset = TRUE, verbose = FALSE
+  )
+  expect_true(file.exists(file.path(obj_copy$fit_folder, "data.csv")))
+  expect_equal(
+    obj_copy$dataset_path,
+    file.path(obj_copy$fit_folder, "data.csv")
+  )
+})
+
 test_that("unquote_column_names strips a single pair of surrounding quotes", {
   df <- data.frame(a = 1, b = 2, c = 3)
   names(df) <- c('"a"', "'b'", "c")
@@ -315,6 +349,33 @@ test_that("run_nlme converts data.frame input to a CSV file path", {
   expect_equal(written, dat, ignore_attr = TRUE)
 })
 
+test_that("run_nlme forces copy_dataset for in-memory data.frame input", {
+  local_pharmr.extra_options()
+  skip_if_nonmem_not_available()
+  mod <- create_model(route = "iv", verbose = FALSE)
+  dat <- data.frame(
+    ID = 1, TIME = c(0, 1, 2), DV = c(0, 10, 5),
+    AMT = c(100, 0, 0), CMT = 1, EVID = c(1, 0, 0), MDV = c(1, 0, 0)
+  )
+
+  ## A data.frame has no on-disk location to reference, so even with
+  ## copy_dataset = FALSE it must be written into the run folder (otherwise
+  ## $DATA would point at an ephemeral tempfile).
+  captured_copy <- "<not captured>"
+  stub(run_nlme, "prepare_run_folder", function(id, model, path, data, ...) {
+    captured_copy <<- list(...)$copy_dataset
+    stop("abort before NONMEM")
+  })
+
+  tryCatch(
+    run_nlme(mod, data = dat, id = "run1", path = withr::local_tempdir(),
+             copy_dataset = FALSE, verbose = FALSE),
+    error = function(e) NULL
+  )
+
+  expect_true(captured_copy)
+})
+
 test_that("run_nlme passes through a CSV file path unchanged", {
   local_pharmr.extra_options()
   mod <- create_model(route = "iv", verbose = FALSE)