diff --git a/base/workflow/NAMESPACE b/base/workflow/NAMESPACE index c05f334c5d..dc5c53203e 100644 --- a/base/workflow/NAMESPACE +++ b/base/workflow/NAMESPACE @@ -1,10 +1,14 @@ # Generated by roxygen2: do not edit by hand +S3method(as.data.frame,pecan_preprocess_result) +S3method(print,pecan_preprocess_result) export(do_conversions) export(run.write.configs) export(runModule.get.trait.data) export(runModule.run.write.configs) export(runModule_start_model_runs) +export(standard_result) export(start_model_runs) +export(validate_standard_result) importFrom(dplyr,"%>%") importFrom(rlang,"%||%") diff --git a/base/workflow/R/standard_result.R b/base/workflow/R/standard_result.R new file mode 100644 index 0000000000..4439e247b6 --- /dev/null +++ b/base/workflow/R/standard_result.R @@ -0,0 +1,179 @@ +#' Standard Preprocessing Result Object +#' +#' @description +#' Construct a minimal, uniform result object for preprocessing functions. +#' This object is intended to standardize the current mix of return values such +#' as path strings, nested lists, and BETY identifier lists. +#' +#' @param tag Character scalar identifying the input type. One of +#' `"met"`, `"soil"`, `"ic"`, `"phenology"`, or `"fia"`. +#' @param paths Character vector of output file paths. +#' @param input_id Integer scalar BETY input ID, or `NA_integer_` when unused. +#' @param dbfile_id Integer scalar BETY dbfile ID, or `NA_integer_` when unused. +#' @param format Character scalar naming the output format. +#' @param source Character scalar naming the source dataset or provider. +#' @param status Character scalar. One of `"success"`, `"error"`, or +#' `"skipped"`. +#' @param error_message Character scalar or `NULL`. Error detail when +#' `status == "error"`. +#' +#' @return An object of class `pecan_preprocess_result`. +#' @export +#' +#' @examples +#' standard_result( +#' tag = "met", +#' paths = "/data/dbfiles/ERA5_772_2005.nc", +#' input_id = 42001L, +#' dbfile_id = 55003L, +#' format = "CF Meteorology", +#' source = "ERA5" +#' ) +standard_result <- function(tag, + paths = character(), + input_id = NA_integer_, + dbfile_id = NA_integer_, + format = "", + source = "", + status = "success", + error_message = NULL) { + result <- list( + tag = tag, + paths = as.character(paths), + input_id = as.integer(input_id), + dbfile_id = as.integer(dbfile_id), + format = format, + source = source, + status = status, + error_message = error_message + ) + + class(result) <- c("pecan_preprocess_result", "list") + validate_standard_result(result) +} + +#' Validate a Standard Preprocessing Result +#' +#' @description +#' Validate the structure and field types of a standard preprocessing result. +#' Returns the input object invisibly when valid and throws an error otherwise. +#' +#' @param x Object to validate. +#' +#' @return `x`, invisibly, when validation succeeds. +#' @export +validate_standard_result <- function(x) { + allowed_tags <- c("met", "soil", "ic", "phenology", "fia") + allowed_status <- c("success", "error", "skipped") + + if (!inherits(x, "pecan_preprocess_result")) { + stop("`x` must inherit from 'pecan_preprocess_result'.", call. = FALSE) + } + if (!is.list(x)) { + stop("`x` must be a list.", call. = FALSE) + } + if (!is.character(x$tag) || length(x$tag) != 1L || is.na(x$tag) || !(x$tag %in% allowed_tags)) { + stop("`tag` must be one of: met, soil, ic, phenology, fia.", call. = FALSE) + } + if (!is.character(x$paths)) { + stop("`paths` must be a character vector.", call. = FALSE) + } + if (!is.integer(x$input_id) || length(x$input_id) != 1L) { + stop("`input_id` must be an integer scalar.", call. = FALSE) + } + if (!is.integer(x$dbfile_id) || length(x$dbfile_id) != 1L) { + stop("`dbfile_id` must be an integer scalar.", call. = FALSE) + } + if (!is.character(x$format) || length(x$format) != 1L) { + stop("`format` must be a character scalar.", call. = FALSE) + } + if (!is.character(x$source) || length(x$source) != 1L) { + stop("`source` must be a character scalar.", call. = FALSE) + } + if (!is.character(x$status) || length(x$status) != 1L || !(x$status %in% allowed_status)) { + stop("`status` must be one of: success, error, skipped.", call. = FALSE) + } + if (!is.null(x$error_message) && (!is.character(x$error_message) || length(x$error_message) != 1L)) { + stop("`error_message` must be NULL or a character scalar.", call. = FALSE) + } + if (identical(x$status, "error") && is.null(x$error_message)) { + stop("`error_message` must be provided when `status` is 'error'.", call. = FALSE) + } + if (!identical(x$status, "error") && !is.null(x$error_message)) { + stop("`error_message` must be NULL unless `status` is 'error'.", call. = FALSE) + } + + # TODO: When callers migrate, validate caller-specific invariants here. + invisible(x) +} + +#' Print a Standard Preprocessing Result +#' +#' @param x A `pecan_preprocess_result` object. +#' @param ... Unused. +#' +#' @return `x`, invisibly. +#' @export +print.pecan_preprocess_result <- function(x, ...) { + validate_standard_result(x) + + cat(sprintf("PEcAn preprocessing result [%s]\n", x$tag)) + cat(sprintf(" status: %s\n", x$status)) + cat(sprintf(" source: %s\n", x$source)) + cat(sprintf(" format: %s\n", x$format)) + + if (length(x$paths) == 0L) { + cat(" paths: \n") + } else { + cat(sprintf(" paths: %s\n", paste(x$paths, collapse = ", "))) + } + + cat(sprintf( + " db_ids: input=%s, dbfile=%s\n", + ifelse(is.na(x$input_id), "NA", as.character(x$input_id)), + ifelse(is.na(x$dbfile_id), "NA", as.character(x$dbfile_id)) + )) + + if (identical(x$status, "error")) { + cat(sprintf(" error: %s\n", x$error_message)) + } + + # TODO: Expand printed details only after all preprocessors return this type. + invisible(x) +} + +#' Coerce a Standard Preprocessing Result to a Data Frame +#' +#' @param x A `pecan_preprocess_result` object. +#' @param ... Unused. +#' +#' @return A data frame with one row per path. +#' @export +as.data.frame.pecan_preprocess_result <- function(x, ...) { + validate_standard_result(x) + + n_paths <- length(x$paths) + if (n_paths == 0L) { + path_values <- NA_character_ + n_rows <- 1L + } else { + path_values <- x$paths + n_rows <- n_paths + } + + # TODO: Revisit row shape if callers need one row per result instead of per path. + data.frame( + tag = rep.int(x$tag, n_rows), + path = path_values, + input_id = rep.int(x$input_id, n_rows), + dbfile_id = rep.int(x$dbfile_id, n_rows), + format = rep.int(x$format, n_rows), + source = rep.int(x$source, n_rows), + status = rep.int(x$status, n_rows), + error_message = rep.int( + if (is.null(x$error_message)) NA_character_ else x$error_message, + n_rows + ), + stringsAsFactors = FALSE + ) +} \ No newline at end of file diff --git a/base/workflow/man/as.data.frame.pecan_preprocess_result.Rd b/base/workflow/man/as.data.frame.pecan_preprocess_result.Rd new file mode 100644 index 0000000000..d36b9f1d9b --- /dev/null +++ b/base/workflow/man/as.data.frame.pecan_preprocess_result.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standard_result.R +\name{as.data.frame.pecan_preprocess_result} +\alias{as.data.frame.pecan_preprocess_result} +\title{Coerce a Standard Preprocessing Result to a Data Frame} +\usage{ +\method{as.data.frame}{pecan_preprocess_result}(x, ...) +} +\arguments{ +\item{x}{A `pecan_preprocess_result` object.} + +\item{...}{Unused.} +} +\value{ +A data frame with one row per path. +} +\description{ +Coerce a Standard Preprocessing Result to a Data Frame +} diff --git a/base/workflow/man/print.pecan_preprocess_result.Rd b/base/workflow/man/print.pecan_preprocess_result.Rd new file mode 100644 index 0000000000..c0e0d313f6 --- /dev/null +++ b/base/workflow/man/print.pecan_preprocess_result.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standard_result.R +\name{print.pecan_preprocess_result} +\alias{print.pecan_preprocess_result} +\title{Print a Standard Preprocessing Result} +\usage{ +\method{print}{pecan_preprocess_result}(x, ...) +} +\arguments{ +\item{x}{A `pecan_preprocess_result` object.} + +\item{...}{Unused.} +} +\value{ +`x`, invisibly. +} +\description{ +Print a Standard Preprocessing Result +} diff --git a/base/workflow/man/standard_result.Rd b/base/workflow/man/standard_result.Rd new file mode 100644 index 0000000000..b600f5526e --- /dev/null +++ b/base/workflow/man/standard_result.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standard_result.R +\name{standard_result} +\alias{standard_result} +\title{Standard Preprocessing Result Object} +\usage{ +standard_result( + tag, + paths = character(), + input_id = NA_integer_, + dbfile_id = NA_integer_, + format = "", + source = "", + status = "success", + error_message = NULL +) +} +\arguments{ +\item{tag}{Character scalar identifying the input type. One of +`"met"`, `"soil"`, `"ic"`, `"phenology"`, or `"fia"`.} + +\item{paths}{Character vector of output file paths.} + +\item{input_id}{Integer scalar BETY input ID, or `NA_integer_` when unused.} + +\item{dbfile_id}{Integer scalar BETY dbfile ID, or `NA_integer_` when unused.} + +\item{format}{Character scalar naming the output format.} + +\item{source}{Character scalar naming the source dataset or provider.} + +\item{status}{Character scalar. One of `"success"`, `"error"`, or +`"skipped"`.} + +\item{error_message}{Character scalar or `NULL`. Error detail when +`status == "error"`.} +} +\value{ +An object of class `pecan_preprocess_result`. +} +\description{ +Construct a minimal, uniform result object for preprocessing functions. +This object is intended to standardize the current mix of return values such +as path strings, nested lists, and BETY identifier lists. +} +\examples{ +standard_result( + tag = "met", + paths = "/data/dbfiles/ERA5_772_2005.nc", + input_id = 42001L, + dbfile_id = 55003L, + format = "CF Meteorology", + source = "ERA5" +) +} diff --git a/base/workflow/man/validate_standard_result.Rd b/base/workflow/man/validate_standard_result.Rd new file mode 100644 index 0000000000..372a986415 --- /dev/null +++ b/base/workflow/man/validate_standard_result.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/standard_result.R +\name{validate_standard_result} +\alias{validate_standard_result} +\title{Validate a Standard Preprocessing Result} +\usage{ +validate_standard_result(x) +} +\arguments{ +\item{x}{Object to validate.} +} +\value{ +`x`, invisibly, when validation succeeds. +} +\description{ +Validate the structure and field types of a standard preprocessing result. +Returns the input object invisibly when valid and throws an error otherwise. +} diff --git a/base/workflow/tests/testthat/test-standard_result.R b/base/workflow/tests/testthat/test-standard_result.R new file mode 100644 index 0000000000..e5d9d9daa9 --- /dev/null +++ b/base/workflow/tests/testthat/test-standard_result.R @@ -0,0 +1,225 @@ +context("standard_result") + +if (!exists("standard_result", mode = "function")) { + standard_result_candidates <- c( + "base/workflow/R/standard_result.R", + "R/standard_result.R", + "../../R/standard_result.R", + "/Users/hm/Desktop/pecan/base/workflow/R/standard_result.R" + ) + standard_result_path <- standard_result_candidates[file.exists(standard_result_candidates)][1] + + if (is.na(standard_result_path)) { + stop("Unable to locate standard_result.R for direct test execution.", call. = FALSE) + } + + source(standard_result_path) +} + +test_that("standard_result creates valid object", { + r <- standard_result( + tag = "met", + paths = "/tmp/test.nc", + input_id = 42L, + dbfile_id = 99L, + format = "CF Meteorology", + source = "ERA5" + ) + + expect_s3_class(r, "pecan_preprocess_result") + expect_equal(r$tag, "met") + expect_equal(r$paths, "/tmp/test.nc") + expect_equal(r$input_id, 42L) + expect_equal(r$dbfile_id, 99L) + expect_equal(r$format, "CF Meteorology") + expect_equal(r$source, "ERA5") + expect_equal(r$status, "success") + expect_null(r$error_message) +}) + +test_that("standard_result coerces ids to integer for compatibility", { + r <- standard_result( + tag = "met", + input_id = 42, + dbfile_id = 99, + source = "ERA5" + ) + + expect_type(r$input_id, "integer") + expect_type(r$dbfile_id, "integer") + expect_equal(r$input_id, 42L) + expect_equal(r$dbfile_id, 99L) +}) + +test_that("standard_result validates tag inputs", { + expect_error(standard_result(tag = "invalid")) + expect_error(standard_result(tag = 123)) + expect_error(standard_result(tag = c("met", "soil"))) +}) + +test_that("standard_result handles empty values", { + r <- standard_result(tag = "soil") + + expect_equal(r$paths, character()) + expect_true(is.na(r$input_id)) + expect_true(is.na(r$dbfile_id)) + expect_equal(r$format, "") + expect_equal(r$source, "") + expect_equal(r$status, "success") +}) + +test_that("standard_result handles error status", { + r <- standard_result( + tag = "met", + status = "error", + error_message = "download failed" + ) + + expect_equal(r$status, "error") + expect_equal(r$error_message, "download failed") +}) + +test_that("validate_standard_result returns object invisibly when valid", { + r <- standard_result(tag = "met", status = "skipped") + + expect_identical(validate_standard_result(r), r) +}) + +test_that("validate_standard_result rejects invalid objects and values", { + valid <- standard_result(tag = "met", source = "ERA5") + + expect_error(validate_standard_result(list())) + + no_class <- valid + class(no_class) <- "list" + expect_error(validate_standard_result(no_class), "must inherit") + + bad_paths <- valid + bad_paths$paths <- 1 + expect_error(validate_standard_result(bad_paths), "paths") + + bad_input_id <- valid + bad_input_id$input_id <- c(1L, 2L) + expect_error(validate_standard_result(bad_input_id), "input_id") + + bad_dbfile_id <- valid + bad_dbfile_id$dbfile_id <- "99" + expect_error(validate_standard_result(bad_dbfile_id), "dbfile_id") + + bad_format <- valid + bad_format$format <- character() + expect_error(validate_standard_result(bad_format), "format") + + bad_source <- valid + bad_source$source <- c("ERA5", "NARR") + expect_error(validate_standard_result(bad_source), "source") + + bad_status <- valid + bad_status$status <- "done" + expect_error(validate_standard_result(bad_status), "status") + + missing_error_message <- valid + missing_error_message$status <- "error" + expect_error(validate_standard_result(missing_error_message), "error_message") + + unexpected_error_message <- valid + unexpected_error_message$error_message <- "boom" + expect_error(validate_standard_result(unexpected_error_message), "error_message") + + bad_error_message_type <- valid + bad_error_message_type$status <- "error" + bad_error_message_type$error_message <- 1 + expect_error(validate_standard_result(bad_error_message_type), "error_message") +}) + +test_that("print.pecan_preprocess_result outputs expected format for populated result", { + r <- standard_result(tag = "met", source = "ERA5", paths = "/tmp/test.nc") + + output <- capture.output(print(r)) + + expect_match(output[1], "PEcAn preprocessing result \\[met\\]") + expect_match(output[2], "status: success") + expect_match(output[3], "source: ERA5") + expect_match(output[5], "paths: /tmp/test.nc") +}) + +test_that("print.pecan_preprocess_result handles empty paths and errors", { + skipped <- standard_result(tag = "soil", status = "skipped") + skipped_output <- capture.output(print(skipped)) + expect_match(skipped_output[5], "paths: ") + + errored <- standard_result( + tag = "met", + status = "error", + error_message = "download failed" + ) + error_output <- capture.output(print(errored)) + expect_true(any(grepl("error: download failed", error_output))) +}) + +test_that("as.data.frame.pecan_preprocess_result expands one row per path", { + r <- standard_result( + tag = "met", + paths = c("/tmp/a.nc", "/tmp/b.nc"), + input_id = 1L, + source = "ERA5" + ) + + df <- as.data.frame(r) + expect_equal(nrow(df), 2) + expect_equal(df$tag, c("met", "met")) + expect_equal(df$path, c("/tmp/a.nc", "/tmp/b.nc")) + expect_equal(df$input_id, c(1L, 1L)) + expect_equal(df$error_message, c(NA_character_, NA_character_)) +}) + +test_that("as.data.frame.pecan_preprocess_result handles empty paths", { + r <- standard_result(tag = "soil") + + df <- as.data.frame(r) + expect_equal(nrow(df), 1) + expect_equal(df$tag, "soil") + expect_true(is.na(df$path)) + expect_true(is.na(df$error_message)) +}) + +test_that("standard_result serialization preserves the contract", { + r <- standard_result( + tag = "met", + paths = c("/tmp/a.nc", "/tmp/b.nc"), + input_id = 10L, + dbfile_id = 20L, + format = "CF Meteorology", + source = "ERA5", + status = "success" + ) + + restored <- unserialize(serialize(r, NULL)) + + expect_s3_class(restored, "pecan_preprocess_result") + expect_identical(restored, r) + expect_identical(validate_standard_result(restored), restored) +}) + +test_that("backward compatibility supports legacy-shaped classed lists", { + legacy <- list( + tag = "met", + paths = "/tmp/test.nc", + input_id = 1L, + dbfile_id = NA_integer_, + format = "CF Meteorology", + source = "ERA5", + status = "success", + error_message = NULL + ) + class(legacy) <- c("pecan_preprocess_result", "list") + + expect_identical(validate_standard_result(legacy), legacy) + expect_equal(as.data.frame(legacy)$path, "/tmp/test.nc") +}) + +test_that("invalid constructor values fail fast", { + expect_error(standard_result(tag = "met", status = "error"), "error_message") + expect_error(standard_result(tag = "met", status = "success", error_message = "boom"), "error_message") + expect_error(standard_result(tag = "met", status = "unknown"), "status") +}) \ No newline at end of file