diff --git a/NEWS.md b/NEWS.md index a1aae0b7..7d054d84 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,23 @@ # mizer (development version) +- Under second-order bin-averaging (`second_order_w[["bin_average"]]`), the + spectrum plots `plotSpectra()`, `plotlySpectra()`, `plotSpectraRelative()` and + `animate()`/`animateSpectra()` now evaluate the `w^power` weight *and* the + marker location at the geometric bin centre `w* = w sqrt(beta)`, so each marker + is a point `(w*, N_j (w*)^power)` on the continuous `N w^power` curve rather + than being doubly mis-placed at the bin edge (the location error grows with + `power`, worst for the common `power = 2` Sheldon plot). These + spectrum-density changes apply only under second-order bin-averaging, so + default spectrum plots are unchanged. (#383) + +- `plotCDF()` / `plotlyCDF()` now plot each cumulative value on its bin's + **upper** edge `w_k + dw_k`, following the inclusive cumulative-sum + convention (the sum through bin `k` is the integral up to that bin's upper + edge). This corrects a long-standing one-bin location offset and applies in + both the default and the second-order schemes; under second-order + bin-averaging the CDF is then second-order accurate in its placement as well + as its increments. (#383) + - Size-resolved diagnostics that are finite-volume bin averages (the mortalities `getPredMort()`, `getFMort()`, `getMort()`, `getExtMort()`, and the reproductive investment `getERepro()`) are now drawn at the geometric bin diff --git a/R/animateSpectra.R b/R/animateSpectra.R index 28b95e6e..698bb848 100644 --- a/R/animateSpectra.R +++ b/R/animateSpectra.R @@ -190,6 +190,14 @@ animate.MizerSim <- function(x, species = NULL, } else { y_label <- paste0("Number density * w^", power) } + # The animated spectrum is a density, so under second-order bin-averaging + # we evaluate both the w^power weight and the plotted location at the + # geometric bin centre w* = w sqrt(beta) (issue #383), matching + # plotSpectra(). Default plots are unchanged. + if (isTRUE(sim@params@second_order_w[["bin_average"]])) { + beta <- sim@params@w_full[2] / sim@params@w_full[1] + nf$w <- nf$w * sqrt(beta) + } nf <- mutate(nf, value = value * w^power) animate_plotly(nf, sim@params, log_x, log_y, y_label, wlim, llim, diff --git a/R/plots.R b/R/plots.R index ca6ff584..4de5704f 100644 --- a/R/plots.R +++ b/R/plots.R @@ -1373,11 +1373,24 @@ plot_spectra <- function(params, n, n_pp, highlight, log_x, log_y, size_axis, return_data) { params <- validParams(params) size_axis <- plot_size_axis(size_axis) + # The spectrum is a density: each plotted value N_j w^power is a point on the + # continuous N(w) w^power curve, which (for the cell-average N_j) it touches + # at the geometric bin centre. Under second-order bin-averaging we therefore + # evaluate *both* the w^power weight and the plotted location at the bin + # centre w* = w sqrt(beta) (issue #383). The default keeps the grid nodes, so + # existing spectrum plots are unchanged. + second_order <- isTRUE(params@second_order_w[["bin_average"]]) + w_grid <- if (second_order) bin_midpoints(params) else params@w + w_full_grid <- if (second_order) bin_midpoints(params, w_full = TRUE) else + params@w_full + # Default size limits follow the plotted grid (the bin centres under + # second order) so the centre shift does not clip the top/bottom bins. With + # the default (first-order) nodes these reduce to the previous limits. if (is.na(wlim[1])) { - wlim[1] <- if (resource) min(params@w) / 100 else min(params@w) + wlim[1] <- if (resource) min(w_grid) / 100 else min(w_grid) } if (is.na(wlim[2])) { - wlim[2] <- max(params@w_full) + wlim[2] <- max(w_full_grid) } if (total) { @@ -1385,16 +1398,16 @@ plot_spectra <- function(params, n, n_pp, fish_idx <- (length(params@w_full) - length(params@w) + 1):length(params@w_full) total_n <- n_pp total_n[fish_idx] <- total_n[fish_idx] + colSums(n) - total_n <- total_n * params@w_full^power + total_n <- total_n * w_full_grid^power } species <- valid_species_arg(params, species) # Deal with power argument y_label <- spectra_y_label(power) - n <- sweep(n, 2, params@w^power, "*") + n <- sweep(n, 2, w_grid^power, "*") # Select only the desired species spec_n <- n[as.character(dimnames(n)[[1]]) %in% species, , drop = FALSE] # Make data.frame for plot - plot_dat <- data.frame(w = rep(params@w, + plot_dat <- data.frame(w = rep(w_grid, each = dim(spec_n)[[1]]), value = c(spec_n), Species = dimnames(spec_n)[[1]], @@ -1404,7 +1417,7 @@ plot_spectra <- function(params, n, n_pp, (params@w_full <= wlim[2]) # Do we have any resource to plot? if (sum(resource_sel) > 0) { - w_resource <- params@w_full[resource_sel] + w_resource <- w_full_grid[resource_sel] plank_n <- n_pp[resource_sel] * w_resource^power plot_dat <- rbind(plot_dat, data.frame(w = w_resource, @@ -1416,7 +1429,7 @@ plot_spectra <- function(params, n, n_pp, } if (total) { plot_dat <- rbind(plot_dat, - data.frame(w = params@w_full, + data.frame(w = w_full_grid, value = c(total_n), Species = "Total", Legend = "Total") @@ -1426,7 +1439,7 @@ plot_spectra <- function(params, n, n_pp, back_n <- n[params@species_params$is_background, , drop = FALSE] plot_dat <- rbind(plot_dat, - data.frame(w = rep(params@w, + data.frame(w = rep(w_grid, each = dim(back_n)[[1]]), value = c(back_n), Species = as.factor(dimnames(back_n)[[1]]), @@ -1660,6 +1673,21 @@ plotCDF.MizerParams <- function(object, species = NULL, plot_cdf <- function(plot_dat, params, power, normalise, log_x, log_y, wlim, llim, ylim, highlight, size_axis, return_data) { size_axis <- plot_size_axis(size_axis) + # A CDF value is cumulative *up to a size* — a boundary quantity — so it + # belongs on the bin edges, not the bin centres. Under second-order + # bin-averaging `plot_spectra()` returns its density on the geometric bin + # centres (issue #383). Here we map those back to the grid nodes + # (w = w* / sqrt(beta)) so that `prepare_spectra_cdf_data()` can look up the + # bin widths and form the bin integrals; that helper then places the + # cumulative on the *upper* bin edges (see its comments for the inclusive + # convention). The density *values* are kept centre-weighted, so each + # increment value * dw_k = N_k (w*_k)^power dw_k is the second-order bin + # integral of N w^power. (See get_ArraySpeciesBySize_w() and the + # FFT/numerical-details vignettes.) + if (isTRUE(params@second_order_w[["bin_average"]])) { + beta <- params@w_full[2] / params@w_full[1] + plot_dat$w <- plot_dat$w / sqrt(beta) + } if (identical(size_axis, "l")) { plot_dat_l <- convert_plot_size_axis(plot_dat, params, size_axis, drop_w = FALSE) @@ -1701,13 +1729,26 @@ prepare_spectra_cdf_data <- function(plot_dat, params, normalise = TRUE) { params <- validParams(params) y_var <- names(plot_dat)[2] plot_dat <- plot_dat[order(plot_dat$Species, plot_dat$w), ] - plot_dat[[y_var]] <- plot_dat[[y_var]] * spectra_bin_width(plot_dat$w, params) + # Each bin contributes its integral, value * dw_k, which under second-order + # bin-averaging is N_k (w*_k)^power dw_k, the second-order bin integral of + # N w^power. + widths <- spectra_bin_width(plot_dat$w, params) + plot_dat[[y_var]] <- plot_dat[[y_var]] * widths + # The cumulative sum is *inclusive*: the sum through bin k integrates + # N w^power over all bins up to and including bin k, so it equals the CDF at + # that bin's *upper* edge w_k + dw_k, not at its left edge w_k. plot_dat[[y_var]] <- stats::ave(plot_dat[[y_var]], plot_dat$Species, FUN = cumsum) if (normalise) { totals <- stats::ave(plot_dat[[y_var]], plot_dat$Species, FUN = max) plot_dat[[y_var]] <- plot_dat[[y_var]] / totals } + # Make the inclusive convention explicit by placing each cumulative value at + # its bin's upper edge w_k + dw_k, the size up to which it accumulates. This + # corrects the historical one-bin (O(dx)) location offset (the inclusive sum + # was previously plotted at the left edge) and applies in both the default + # and the second-order schemes. + plot_dat$w <- plot_dat$w + widths plot_dat } diff --git a/tests/testthat/test-plots.R b/tests/testthat/test-plots.R index fc7caab0..38d81c7b 100644 --- a/tests/testthat/test-plots.R +++ b/tests/testthat/test-plots.R @@ -385,7 +385,12 @@ test_that("size-based plots support length axes", { size_axis = "l", llim = llim, return_data = TRUE) expect_true(all(cdf_l_limited$l >= llim[1])) - expect_true(all(cdf_l_limited$l <= llim[2])) + # The cumulative now sits on each bin's upper edge (#383), so the top + # in-window point — where the normalised CDF reaches 1 — may exceed llim[2] + # by up to one bin, but no more (one weight-bin ratio is a safe length bound + # since the length ratio beta^(1/b) < beta). + beta_w <- params_len@w_full[2] / params_len@w_full[1] + expect_true(all(cdf_l_limited$l <= llim[2] * beta_w)) y_var <- names(cdf_l_limited)[2] expect_equal(stats::aggregate(cdf_l_limited[[y_var]] ~ cdf_l_limited$Species, FUN = max)[[2]], @@ -677,3 +682,112 @@ test_that("plotDiet works with MizerSim", { p <- plotDiet(sim, species = 2, time_range = 1:2) expect_true(is(p, "ggplot")) }) + +# Second-order power weighting in plotSpectra / plotCDF (#383) -------------- + +test_that("plotSpectra draws the spectrum at bin centres with the w^power weight there", { + p0 <- params + p1 <- params + second_order_w(p1) <- c(bin_average = TRUE) + beta <- p0@w_full[2] / p0@w_full[1] + for (pw in c(0, 1, 2)) { + d0 <- plotSpectra(p0, power = pw, return_data = TRUE) + d1 <- plotSpectra(p1, power = pw, return_data = TRUE) + d0 <- d0[order(d0$Species, d0$w), ] + d1 <- d1[order(d1$Species, d1$w), ] + expect_equal(nrow(d0), nrow(d1)) + # x moves to the geometric bin centre (a uniform sqrt(beta) shift) ... + expect_equal(unname(d1$w / d0$w), rep(sqrt(beta), nrow(d1))) + # ... and the w^power weight is evaluated there, scaling the value + # (column 2, named by the y-label) by (w*/w)^power = beta^(power/2). + expect_equal(unname(d1[[2]] / d0[[2]]), + rep(beta^(pw / 2), nrow(d1))) + } +}) + +test_that("plotSpectra default (first order) is unchanged", { + expect_identical(plotSpectra(params, power = 2, return_data = TRUE), + plotSpectra(params, power = 2, return_data = TRUE)) + # The default model never shifts: x stays on the model grid nodes. + d <- plotSpectra(params, power = 2, resource = FALSE, total = FALSE, + background = FALSE, return_data = TRUE) + expect_true(all(d$w %in% params@w)) +}) + +test_that("plotCDF places the cumulative on upper bin edges (inclusive convention)", { + upper_edges <- round(params@w_full + params@dw_full, 6) + # The inclusive cumulative sum belongs on each bin's *upper* edge w_k + dw_k, + # in both the default and the second-order schemes. + cdf0 <- plotCDF(params, power = 2, return_data = TRUE) + expect_true(all(round(cdf0$w, 6) %in% upper_edges)) + # The left bin edges (nodes) are no longer used for the placement. + nodes <- round(params@w, 6) + expect_false(all(round(cdf0$w, 6) %in% nodes)) + + p1 <- params + second_order_w(p1) <- c(bin_average = TRUE) + cdf <- plotCDF(p1, power = 2, return_data = TRUE) + centres <- round(c(bin_midpoints(p1), bin_midpoints(p1, w_full = TRUE)), 6) + expect_true(all(round(cdf$w, 6) %in% upper_edges)) + # never on the geometric bin centres + expect_false(any(round(cdf$w, 6) %in% setdiff(centres, upper_edges))) + # Cumulative is monotonic increasing per species. + sp1 <- cdf[cdf$Species == p1@species_params$species[1], ] + sp1 <- sp1[order(sp1$w), ] + expect_true(all(diff(sp1[[2]]) >= -1e-12)) +}) + +test_that("plotCDF cumulative is second-order and free of the one-bin offset", { + # For a community spectrum N = C w^-2 with power = 2 the integrand N w^power + # is constant, so the exact CDF is linear in w: F(w) = V (w - w_min). The + # bin-average of C w^-2 is exact at the geometric centre, so the plotted + # density value is the constant V at every bin. We can therefore check the + # cumulative analytically, which pins down the edge placement: a one-bin + # offset (plotting F at the *left* edge) would break F(w_min-bin) != 0 and + # the slope, so this is a sharp regression test for issue #383. + p <- params + second_order_w(p) <- c(bin_average = TRUE) + sp <- 1 + p@initial_n[sp, ] <- p@w^(-2) # community spectrum N proportional to w^-2 + + # The weighted density is the constant V = beta at every bin centre. + dens <- plotSpectra(p, species = sp, power = 2, resource = FALSE, + total = FALSE, background = FALSE, return_data = TRUE) + V <- mean(dens[[2]]) + expect_equal(unname(dens[[2]]), rep(V, nrow(dens))) + + cdf <- plotCDF(p, species = sp, power = 2, resource = FALSE, total = FALSE, + background = FALSE, normalise = FALSE, return_data = TRUE) + cdf <- cdf[order(cdf$w), ] + w_min <- min(p@w) + # F plotted at the upper edge equals the exact integral V (w_upper - w_min). + expect_equal(unname(cdf[[2]]), V * (cdf$w - w_min), tolerance = 1e-8) + # The smallest plotted x is the upper edge of the first bin, and its value is + # one full bin integral (not zero, and not two bins) — i.e. no offset. + expect_equal(cdf$w[1], unname(p@w[1] + p@dw_full[match(p@w[1], p@w_full)])) + expect_equal(cdf[[2]][1], V * (cdf$w[1] - w_min), tolerance = 1e-8) +}) + +test_that("plotSpectraRelative shifts x to centres but cancels the power weight", { + p1a <- params + p1b <- params + p1b@initial_n <- p1b@initial_n * 1.5 + second_order_w(p1a) <- c(bin_average = TRUE) + second_order_w(p1b) <- c(bin_average = TRUE) + beta <- params@w_full[2] / params@w_full[1] + # power cancels in 2(N2-N1)/(N1+N2), so the relative value is independent of + # power; only the x-location picks up the centre shift. + p_p1 <- plotSpectraRelative(p1a, p1b, species = species, resource = FALSE, + power = 1) + p_p2 <- plotSpectraRelative(p1a, p1b, species = species, resource = FALSE, + power = 2) + d_p1 <- p_p1$data[order(p_p1$data$Species, p_p1$data$w), ] + d_p2 <- p_p2$data[order(p_p2$data$Species, p_p2$data$w), ] + expect_equal(d_p1$w, d_p2$w) + expect_equal(d_p1$rel_diff, d_p2$rel_diff) + # The x-location is the geometric bin centre, not the node. + p_node <- plotSpectraRelative(params, params, species = species, + resource = FALSE) + expect_true(all(p_node$data$w %in% params@w)) + expect_false(all(d_p1$w %in% params@w)) +}) diff --git a/vignettes/numerical_details.qmd b/vignettes/numerical_details.qmd index 719dd1a0..3a94fee2 100644 --- a/vignettes/numerical_details.qmd +++ b/vignettes/numerical_details.qmd @@ -108,6 +108,10 @@ In summary: **Plotting follows the same distinction.** A bin average $N_j$ does not live at the bin boundary $w_j$ but at the geometric bin centre $w^*_j=\sqrt{w_j\,w_{j+1}}=w_j\sqrt\beta$ (the log-midpoint, exact for the community spectrum $N\propto w^{-2}$). So under second-order bin-averaging mizer draws bin-averaged quantities (the abundance and the mortality/reproduction sinks) at $w^*_j$ — a uniform half-bin shift to the right on the log axis — while point-valued quantities (the encounter and growth-type rates) stay on the nodes $w_j$. The size-resolved array classes carry a `representation` tag recording which a quantity is, and the shift is applied only when `second_order_w[["bin_average"]]` is set, so default plots are unchanged. +**Plotting follows the same distinction.** A bin average $N_j$ does not live at the bin boundary $w_j$ but at the geometric bin centre $w^*_j=\sqrt{w_j\,w_{j+1}}=w_j\sqrt\beta$ (the log-midpoint, exact for the community spectrum $N\propto w^{-2}$). So under second-order bin-averaging mizer draws bin-averaged quantities (the abundance and the mortality/reproduction sinks) at $w^*_j$ — a uniform half-bin shift to the right on the log axis — while point-valued quantities (the encounter and growth-type rates) stay on the nodes $w_j$. The size-resolved array classes carry a `representation` tag recording which a quantity is, and the shift is applied only when `second_order_w[["bin_average"]]` is set, so default plots are unchanged. + +For the `power`-weighted spectrum plots (`plotSpectra()` and friends) the $w^{\text{power}}$ factor must be evaluated where the density value lives, so it too is taken at the bin centre: each marker is the point $\bigl(w^*_j,\,N_j\,(w^*_j)^{\text{power}}\bigr)$ on the continuous $N(w)\,w^{\text{power}}$ curve. (Sampling the weight at the edge would mis-scale it by a factor $\beta^{\text{power}/2}$, largest for the common $\text{power}=2$ Sheldon plot.) A cumulative plot (`plotCDF()`) is the opposite case: a CDF value is cumulative *up to a size*, a boundary quantity, so its increments use the bin-averaged (centre-weighted) density but the cumulative is plotted on the bin **edges**, not the centres. Because the cumulative sum is inclusive — the sum through bin $k$ is the integral over all bins up to and including bin $k$ — each cumulative value is placed on that bin's *upper* edge $w_k+\Delta w_k$ (in both the default and second-order schemes). This makes the inclusive convention explicit and removes a one-bin offset that would otherwise leave the CDF only first-order accurate in its placement. + ## Semi-Implicit Time Discretisation With the diffusion term, an explicit time discretisation would require a very small time step for stability ($\Delta t \sim \Delta w^2$). Therefore, we use a semi-implicit scheme where the densities $N$ are evaluated at time $t+1$, but the rates ($g$, $\mu$, $d$) are evaluated at time $t$. Using a fully implicit scheme would require solving a nonlinear system at each time step, which is more computationally expensive.