feat(runner): add runner for replications to run in parallel, and set to full sim_length

amyheather · amyheather · commit a80d9a4bf7d7 · 2025-07-02T14:54:36.000+01:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,8 +18,10 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.2
 Imports:
+    dplyr,
+    future,
+    future.apply,
     simmer
 Suggests:
-    devtools,
-    dplyr
+    devtools
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,10 +10,20 @@ export(create_rehab_arrivals)
 export(create_rehab_los)
 export(create_rehab_routing)
 export(create_rehab_trajectory)
+export(get_occupancy_stats)
 export(model)
+export(runner)
 export(sample_routing)
 export(transform_to_lnorm)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,filter)
+importFrom(dplyr,mutate)
+importFrom(dplyr,rowwise)
+importFrom(dplyr,ungroup)
+importFrom(future,multisession)
+importFrom(future,plan)
+importFrom(future,sequential)
+importFrom(future.apply,future_lapply)
 importFrom(simmer,add_generator)
 importFrom(simmer,add_resource)
 importFrom(simmer,branch)
diff --git a/R/get_occupancy_stats.R b/R/get_occupancy_stats.R
@@ -17,6 +17,7 @@
 #'
 #' @return A list of data frames, one per resource, each containing occupancy
 #' statistics.
+#' @export
 
 get_occupancy_stats <- function(occupancy) {
 
diff --git a/R/model.R b/R/model.R
@@ -6,7 +6,7 @@
 #' may not wish to do if being set elsewhere - such as done in \code{runner()}).
 #' Default is TRUE.
 #'
-#' @importFrom dplyr filter
+#' @importFrom dplyr filter mutate rowwise ungroup
 #' @importFrom simmer add_resource get_mon_arrivals get_mon_resources simmer
 #' @importFrom simmer wrap
 #' @importFrom utils capture.output
@@ -62,8 +62,7 @@ model <- function(run_number, param, set_seed = TRUE) {
   }
 
   # Run the model
-  sim_length <- 20L
-  # sim_length <- param[["data_collection_period"]] + param[["warm_up_period"]]
+  sim_length <- param[["data_collection_period"]] + param[["warm_up_period"]]
   sim_log <- capture.output(
     env <- env |> # nolint
       simmer::run(sim_length) |>
@@ -97,7 +96,7 @@ model <- function(run_number, param, set_seed = TRUE) {
   # Calculate occupancy at end of each day (i.e. at time 1, 2, 3, 4...)
   # Make dataframe with one row per resource per day to count patients
   occupancy <- expand.grid(
-    resource = unique(arrivals$resource),
+    resource = unique(arrivals[["resource"]]),
     time = days
   ) |>
     rowwise() |>
@@ -107,12 +106,17 @@ model <- function(run_number, param, set_seed = TRUE) {
       # - Have not yet left by this day (end_time > time), or have NA end_time
       #   (still present at simulation end)
       occupancy = sum(
-        arrivals$resource == .data[["resource"]] &
-          arrivals$start_time <= time &
-          (is.na(arrivals$end_time) | arrivals$end_time > time)
+        arrivals[["resource"]] == .data[["resource"]] &
+          arrivals[["start_time"]] <= .data[["time"]] &
+          (is.na(arrivals[["end_time"]]) |
+             arrivals[["end_time"]] > .data[["time"]])
       )
     ) |>
     ungroup()
 
+  # Set replication
+  arrivals <- mutate(arrivals, replication = run_number)
+  occupancy <- mutate(occupancy, replication = run_number)
+
   return(list(arrivals = arrivals, occupancy = occupancy))
 }
diff --git a/R/runner.R b/R/runner.R
@@ -0,0 +1,79 @@
+#' Run simulation for multiple replications, sequentially or in parallel.
+#'
+#' @param param Named list of model parameters.
+#' @param use_future_seeding Logical. If TRUE, the function will use the
+#' seeding mechanism provided by `future.seed = seed`,  which is generally
+#' recommended and ensures reproducibility across parallel executions. However,
+#' this will not align exactly with the seeding approach used in `model()`. If
+#' FALSE, the function will override future's default seeding and instead
+#' generate a list of run numbers to use as seeds,similar to `model()`. Be
+#' aware that this approach is not recommended according to `future_lapply`
+#' documentation, which states: "Note that as.list(seq_along(x)) is not a valid
+#' set of such .Random.seed values."
+#'
+#' @importFrom future plan multisession sequential
+#' @importFrom future.apply future_lapply
+#' @importFrom dplyr bind_rows
+#'
+#' @return Named list with three tables: monitored arrivals, monitored
+#' resources, and the processed results from each run.
+#' @export
+
+runner <- function(param, use_future_seeding = TRUE) {
+  # Determine the parallel execution plan
+  if (param[["cores"]] == 1L) {
+    plan(sequential)  # Sequential execution
+  } else {
+    if (param[["cores"]] == -1L) {
+      cores <- future::availableCores() - 1L
+    } else {
+      cores <- param[["cores"]]
+    }
+    plan(multisession, workers = cores)  # Parallel execution
+  }
+
+  # Set seed for future.seed
+  if (isTRUE(use_future_seeding)) {
+    # Recommended option - base seed used when generating others by future.seed
+    custom_seed <- 123456L
+  } else {
+    # Not recommended (but will allow match to model())
+    # Generates list of pre-generated seeds set to the run numbers
+    create_seeds <- function(seed) {
+      set.seed(seed)
+      .Random.seed
+    }
+    custom_seed <- lapply(1L:param[["number_of_runs"]], create_seeds)
+  }
+
+  # Run simulations (sequentially or in parallel)
+  # Mark set_seed as FALSE as we handle this using future.seed(), rather than
+  # within the function, and we don't want to override future.seed
+  results <- future_lapply(
+    1L:param[["number_of_runs"]],
+    function(i) {
+      simulation::model(run_number = i,
+                        param = param,
+                        set_seed = FALSE)
+    },
+    future.seed = custom_seed
+  )
+
+  # Combine the results from multiple replications into just two dataframes
+  if (param[["number_of_runs"]] == 1L) {
+    results <- results[[1L]]
+  } else {
+    all_arrivals <- do.call(
+      rbind, lapply(results, function(x) x[["arrivals"]])
+    )
+    all_occupancy <- do.call(
+      rbind, lapply(results, function(x) x[["occupancy"]])
+    )
+    results <- list(arrivals = all_arrivals,
+                    occupancy = all_occupancy)
+  }
+
+  results[["occupancy_stats"]] <- get_occupancy_stats(results[["occupancy"]])
+
+  results
+}
diff --git a/man/runner.Rd b/man/runner.Rd
diff --git a/renv.lock b/renv.lock
@@ -376,6 +376,36 @@
       ],
       "Hash": "15aeb8c27f5ea5161f9f6a641fafd93a"
     },
+    "future": {
+      "Package": "future",
+      "Version": "1.58.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "digest",
+        "globals",
+        "listenv",
+        "parallel",
+        "parallelly",
+        "utils"
+      ],
+      "Hash": "b096716c11b90643e3cecdcbef564259"
+    },
+    "future.apply": {
+      "Package": "future.apply",
+      "Version": "1.20.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "future",
+        "globals",
+        "parallel",
+        "utils"
+      ],
+      "Hash": "42eb18487138fa2683ff92149e4bd01a"
+    },
     "generics": {
       "Package": "generics",
       "Version": "0.1.4",
@@ -430,6 +460,17 @@
       ],
       "Hash": "ab08ac61f3e1be454ae21911eb8bc2fe"
     },
+    "globals": {
+      "Package": "globals",
+      "Version": "0.18.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R",
+        "codetools"
+      ],
+      "Hash": "0e0c37bd3108b8835c99eaa4d83cf6f5"
+    },
     "glue": {
       "Package": "glue",
       "Version": "1.7.0",
@@ -586,6 +627,16 @@
       ],
       "Hash": "b8552d117e1b808b09a832f589b79035"
     },
+    "listenv": {
+      "Package": "listenv",
+      "Version": "0.9.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "R"
+      ],
+      "Hash": "e2fca3e12e4db979dccc6e519b10a7ee"
+    },
     "magrittr": {
       "Package": "magrittr",
       "Version": "2.0.3",
@@ -639,6 +690,18 @@
       ],
       "Hash": "05ce1ed077e8c97fbb3ec1cb078f1159"
     },
+    "parallelly": {
+      "Package": "parallelly",
+      "Version": "1.45.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Requirements": [
+        "parallel",
+        "tools",
+        "utils"
+      ],
+      "Hash": "eec07caa14285c8a9f9de8276473e7a1"
+    },
     "pillar": {
       "Package": "pillar",
       "Version": "1.10.2",
diff --git a/rmarkdown/analysis.Rmd b/rmarkdown/analysis.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "analysis"
+title: "Analysis"
 author: "Amy Heather"
 date: "`r Sys.Date()`"
 output:
@@ -8,25 +8,74 @@ output:
       html_preview: false
 ---
 
+This analysis reproduces the analysis performed in:
+
+> Monks T, Worthington D, Allen M, Pitt M, Stein K, James MA. A modelling tool for capacity planning in acute and community stroke services. BMC Health Serv Res. 2016 Sep 29;16(1):530. doi: 10.1186/s12913-016-1789-4. PMID: 27688152; PMCID: PMC5043535.
+
+It is organised into:
+
+* Set-up
+* Base case
+  * Run the model
+  * Figure 1
+  * Theory: probability of delay
+  * Figure 3
+* Scenario analysis: altering arrivals
+  * Scenario 1
+  * Table 2
+  * Scenario 4
+  * Supplementary table 1
+* Scenario analysis: pooling beds
+  * Theory: pooling beds
+  * Scenario 2
+
+## Set-up
+
+Install the latest version of the local simulation package. If running sequentially, `devtools::load_all()` is sufficient. If running in parallel, you must use `devtools::install()`.
+
 ```{r}
-# Load the package from the local directory
-devtools::load_all()
+devtools::install(upgrade = "never")
 ```
 
 ```{r}
 # nolint start: undesirable_function_linter
-# Load the package
-library(dplyr)
+# Import required packages.
+library(dplyr, warn.conflicts = FALSE)
 library(simulation)
 # nolint end
 ```
 
 ```{r}
-param <- create_parameters(log_to_console = TRUE)
-result <- model(run_number = 1L, param = param, set_seed = TRUE)
-result
+start_time <- Sys.time()
+```
+
+```{r}
+output_dir <- file.path("..", "outputs")
+```
+
+## Base case
+
+### Run the model
+
+```{r}
+# Run 150 replications in parallel with nine cores
+param <- create_parameters(cores = 9)
+results <- runner(param = param)
 ```
 
 ```{r}
-get_occupancy_stats(result[["occupancy"]])
+get_occupancy_stats(results[["occupancy"]])
+```
+
+## Calculate run time
+
+```{r end_timer}
+# Get run time in seconds
+end_time <- Sys.time()
+runtime <- as.numeric(end_time - start_time, units = "secs")
+
+# Display converted to minutes and seconds
+minutes <- as.integer(runtime / 60L)
+seconds <- as.integer(runtime %% 60L)
+cat(sprintf("Notebook run time: %dm %ds", minutes, seconds))
 ```
diff --git a/rmarkdown/analysis.md b/rmarkdown/analysis.md