From d55581a849b43bc0287edcfee8e474e7ebed8f4b Mon Sep 17 00:00:00 2001
From: Mahimn <kingmahimn@gmail.com>
Date: Tue, 26 May 2026 10:33:45 -0400
Subject: [PATCH] fix: extend zero_division parameter to percentage and
 range-based metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Percentage and range-based metrics (`wmape`, `ope`, `arre`, `marre`,
`coefficient_of_variation`) previously either raised a hard `ValueError`
or silently returned `nan`/`inf` when their denominator was zero. This
made batch evaluation pipelines brittle for constant or all-zero
components.

Mirrors the `zero_division` design introduced in #3059 for the scaled
error family: `"warn"` (default) returns `np.nan` and emits a warning,
`"raise"` preserves the legacy `ValueError`. A new `_safe_pct_divide`
helper sits next to `_safe_scaled_divide`; the two differ only in fill
semantics — percentage metrics multiply the ratio by 100 so a `1.0`
fill for the 0/0 case (the scaled-metric "on par with naive") would
surface as `100 %` error and be misleading, hence `np.nan` instead.

Two adjacent bugs surface and are fixed in the same change:
* `ope` previously checked `sum > 0` and rejected `actual_series` with
  a strictly negative sum (e.g. financial return series). The check
  is now `sum != 0` via the helper.
* `wmape`'s docstring claimed `ValueError if actual_series contains
  some zeros`, but the implementation divides by `sum(|y_true|)` and
  only the all-zero case ever triggered the path. Docstring corrected.

The CHANGELOG entry for the parameter addition carries the breaking-
change marker per the convention discussed in #3080 (the post-mortem
on #3059), since the default behavior flips from raising to warning.

Adds a parametrized regression test covering all five metrics and an
explicit OPE-with-negative-sum test. Existing `test_ope_zero` and the
arre/marre legacy raise check are updated to opt into the legacy
behavior with `zero_division="raise"`.
---
 CHANGELOG.md                        |  2 +
 darts/metrics/metrics.py            | 90 ++++++++++++++++++++---------
 darts/metrics/utils.py              | 67 +++++++++++++++++++++
 darts/tests/metrics/test_metrics.py | 80 ++++++++++++++++++++++++-
 4 files changed, 208 insertions(+), 31 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9b921ad3de..83c51fdff9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,12 +15,14 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - Added `use_longer_projection_head` to `TimesFM2p5Model` to enable longer non-autoregressive prediction horizons (up to 1024 steps for `output_chunk_length + output_chunk_shift`). [#3121](https://github.com/unit8co/darts/pull/3121) by [Zhihao Dai](https://github.com/daidahao).
 - `TimeSeries.from_dataframe()` now supports time columns of type `pl.Date` for `polars.DataFrame`. [#3124](https://github.com/unit8co/darts/pull/3124) by [Dennis Bader](https://github.com/dennisbader)
 - Custom encoders now support functions that return multiple components. Simply pass such a function via the `"custom"` encoder key in the `add_encoders` model input parameter. [#3069](https://github.com/unit8co/darts/pull/3069) by [Moritz Waldleben](https://github.com/mwaldleben).
+- 🔴 Percentage and range-based metrics (`wmape`, `ope`, `arre`, `marre`, `coefficient_of_variation`) now expose a `zero_division` parameter (mirroring [#3059](https://github.com/unit8co/darts/pull/3059)) controlling the behavior when the denominator is zero: `"warn"` (default) returns `np.nan` and emits a warning, `"raise"` preserves the legacy `ValueError`. [#3122](https://github.com/unit8co/darts/pull/3122) by [Mahimn](https://github.com/mahimn01).
 
 **Fixed**
 
 - Fixed `_ScaledDotProductAttention` float16 overflow in `masked_fill` under mixed precision training. [#3087](https://github.com/unit8co/darts/pull/3087) by [Robert Ruidisch](https://github.com/robrui).
 - Fixed a bug in `TimeSeries.quantile()` where the output dtype did not match the input series dtype for dtypes `float32` or `float16`. Now the dtype is correctly propagated. [#3124](https://github.com/unit8co/darts/pull/3124) by [Dennis Bader](https://github.com/dennisbader)
 - Optuna integration's `PyTorchLightningPruningCallback` for hyperparameter optimization of torch models is now natively available in Darts via `darts.utils.callbacks`. [#3114](https://github.com/unit8co/darts/pull/3114) by [Jakub Chłapek](https://github.com/jakubchlapek).
+- Fixed `ope` to accept `actual_series` with a strictly negative sum; the previous `sum > 0` check incorrectly rejected valid inputs such as financial return series. Also corrected the `wmape` docstring which inaccurately claimed it raised on zeros in `actual_series`. [#3122](https://github.com/unit8co/darts/pull/3122) by [Mahimn](https://github.com/mahimn01).
 
 **Dependencies**
 
diff --git a/darts/metrics/metrics.py b/darts/metrics/metrics.py
index 35688b1291..31471a92d7 100644
--- a/darts/metrics/metrics.py
+++ b/darts/metrics/metrics.py
@@ -24,6 +24,7 @@
     _get_values_or_raise,
     _get_wrapped_metric,
     _LabelReduction,
+    _safe_pct_divide,
     _safe_scaled_divide,
     classification_support,
     interval_support,
@@ -1725,6 +1726,7 @@ def wmape(
     intersect: bool = True,
     *,
     q: float | list[float] | tuple[np.ndarray, pd.Index] | None = None,
+    zero_division: str = "warn",
     component_reduction: Callable[[np.ndarray], float] | None = np.nanmean,
     series_reduction: Callable[[np.ndarray], float | np.ndarray] | None = None,
     n_jobs: int = 1,
@@ -1753,6 +1755,12 @@ def wmape(
         will consider the values only over their common time interval (intersection in time).
     q
         Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on.
+    zero_division
+        Controls behavior when the denominator :math:`\\sum_{t=1}^T |y_t|` is zero (i.e. ``actual_series`` is
+        all zeros for a given component).
+
+        * ``"warn"`` (default) – returns ``np.nan`` and emits a warning.
+        * ``"raise"`` – raises a ``ValueError``.
     component_reduction
         Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray`
         of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a
@@ -1776,7 +1784,7 @@ def wmape(
     Raises
     ------
     ValueError
-        If `actual_series` contains some zeros.
+        If `zero_division="raise"` and the denominator :math:`\\sum_{t=1}^T |y_t|` is zero for some component.
 
     Returns
     -------
@@ -1812,10 +1820,10 @@ def wmape(
         q=q,
     )
 
-    return (
-        100.0
-        * np.nansum(np.abs(y_true - y_pred), axis=TIME_AX)
-        / np.nansum(np.abs(y_true), axis=TIME_AX)
+    return 100.0 * _safe_pct_divide(
+        np.nansum(np.abs(y_true - y_pred), axis=TIME_AX),
+        np.nansum(np.abs(y_true), axis=TIME_AX),
+        zero_division=zero_division,
     )
 
 
@@ -2029,6 +2037,7 @@ def ope(
     intersect: bool = True,
     *,
     q: float | list[float] | tuple[np.ndarray, pd.Index] | None = None,
+    zero_division: str = "warn",
     component_reduction: Callable[[np.ndarray], float] | None = np.nanmean,
     series_reduction: Callable[[np.ndarray], float | np.ndarray] | None = None,
     n_jobs: int = 1,
@@ -2058,6 +2067,14 @@ def ope(
         will consider the values only over their common time interval (intersection in time).
     q
         Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on.
+    zero_division
+        Controls behavior when the denominator :math:`\\sum_{t=1}^{T}{y_t}` is zero.
+
+        * ``"warn"`` (default) – returns ``np.nan`` and emits a warning.
+        * ``"raise"`` – raises a ``ValueError``.
+
+        Note: a negative sum is a valid denominator (e.g. financial return series). Only an exact
+        zero sum triggers the zero-division handling.
     component_reduction
         Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray`
         of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a
@@ -2081,7 +2098,7 @@ def ope(
     Raises
     ------
     ValueError
-        If :math:`\\sum_{t=1}^{T}{y_t} = 0`.
+        If `zero_division="raise"` and :math:`\\sum_{t=1}^{T}{y_t} = 0` for some component.
 
     Returns
     -------
@@ -2116,14 +2133,16 @@ def ope(
         np.nansum(y_true, axis=TIME_AX),
         np.nansum(y_pred, axis=TIME_AX),
     )
-    if not (y_true_sum > 0).all():
-        raise_log(
-            ValueError(
-                "The series of actual value cannot sum to zero when computing OPE."
-            ),
-            logger=logger,
+    return (
+        np.abs(
+            _safe_pct_divide(
+                y_true_sum - y_pred_sum,
+                y_true_sum,
+                zero_division=zero_division,
+            )
         )
-    return np.abs((y_true_sum - y_pred_sum) / y_true_sum) * 100.0
+        * 100.0
+    )
 
 
 @multi_ts_support
@@ -2134,6 +2153,7 @@ def arre(
     intersect: bool = True,
     *,
     q: float | list[float] | tuple[np.ndarray, pd.Index] | None = None,
+    zero_division: str = "warn",
     time_reduction: Callable[..., np.ndarray] | None = None,
     component_reduction: Callable[[np.ndarray], float] | None = np.nanmean,
     series_reduction: Callable[[np.ndarray], float | np.ndarray] | None = None,
@@ -2163,6 +2183,12 @@ def arre(
         will consider the values only over their common time interval (intersection in time).
     q
         Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on.
+    zero_division
+        Controls behavior when the denominator :math:`\\max_t{y_t} - \\min_t{y_t}` is zero (i.e.
+        ``actual_series`` is constant for a given component).
+
+        * ``"warn"`` (default) – returns ``np.nan`` for affected components and emits a warning.
+        * ``"raise"`` – raises a ``ValueError``.
     time_reduction
         Optionally, a function to aggregate the metrics over the time axis. It must reduce a `np.ndarray`
         of shape `(t, c)` to a `np.ndarray` of shape `(c,)`. The function takes as input a ``np.ndarray`` and a
@@ -2191,7 +2217,7 @@ def arre(
     Raises
     ------
     ValueError
-        If :math:`\\max_t{y_t} = \\min_t{y_t}`.
+        If `zero_division="raise"` and :math:`\\max_t{y_t} = \\min_t{y_t}` for some component.
 
     Returns
     -------
@@ -2226,16 +2252,10 @@ def arre(
         q=q,
     )
     y_max, y_min = np.nanmax(y_true, axis=TIME_AX), np.nanmin(y_true, axis=TIME_AX)
-    if not (y_max > y_min).all():
-        raise_log(
-            ValueError(
-                "The difference between the max and min values must "
-                "be strictly positive to compute the MARRE."
-            ),
-            logger=logger,
-        )
     true_range = y_max - y_min
-    return 100.0 * np.abs((y_true - y_pred) / true_range)
+    return 100.0 * np.abs(
+        _safe_pct_divide(y_true - y_pred, true_range, zero_division=zero_division)
+    )
 
 
 @multi_ts_support
@@ -2246,6 +2266,7 @@ def marre(
     intersect: bool = True,
     *,
     q: float | list[float] | tuple[np.ndarray, pd.Index] | None = None,
+    zero_division: str = "warn",
     component_reduction: Callable[[np.ndarray], float] | None = np.nanmean,
     series_reduction: Callable[[np.ndarray], float | np.ndarray] | None = None,
     n_jobs: int = 1,
@@ -2275,6 +2296,12 @@ def marre(
         will consider the values only over their common time interval (intersection in time).
     q
         Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on.
+    zero_division
+        Controls behavior when the denominator :math:`\\max_t{y_t} - \\min_t{y_t}` is zero (i.e.
+        ``actual_series`` is constant for a given component).
+
+        * ``"warn"`` (default) – returns ``np.nan`` for affected components and emits a warning.
+        * ``"raise"`` – raises a ``ValueError``.
     component_reduction
         Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray`
         of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a
@@ -2298,7 +2325,7 @@ def marre(
     Raises
     ------
     ValueError
-        If :math:`\\max_t{y_t} = \\min_t{y_t}`.
+        If `zero_division="raise"` and :math:`\\max_t{y_t} = \\min_t{y_t}` for some component.
 
     float
         A single metric score for:
@@ -2322,6 +2349,7 @@ def marre(
             pred_series,
             intersect,
             q=q,
+            zero_division=zero_division,
         ),
         axis=TIME_AX,
     )
@@ -2433,6 +2461,7 @@ def coefficient_of_variation(
     intersect: bool = True,
     *,
     q: float | list[float] | tuple[np.ndarray, pd.Index] | None = None,
+    zero_division: str = "warn",
     component_reduction: Callable[[np.ndarray], float] | None = np.nanmean,
     series_reduction: Callable[[np.ndarray], float | np.ndarray] | None = None,
     n_jobs: int = 1,
@@ -2464,6 +2493,11 @@ def coefficient_of_variation(
         will consider the values only over their common time interval (intersection in time).
     q
         Optionally, the quantile (float [0, 1]) or list of quantiles of interest to compute the metric on.
+    zero_division
+        Controls behavior when the denominator :math:`\\bar{y}` (the mean of ``actual_series``) is zero.
+
+        * ``"warn"`` (default) – returns ``np.nan`` for affected components and emits a warning.
+        * ``"raise"`` – raises a ``ValueError``.
     component_reduction
         Optionally, a function to aggregate the metrics over the component/column axis. It must reduce a `np.ndarray`
         of shape `(t, c)` to a `np.ndarray` of shape `(t,)`. The function takes as input a ``np.ndarray`` and a
@@ -2514,10 +2548,10 @@ def coefficient_of_variation(
         q=q,
     )
     # not calling rmse as y_true and y_pred are np.ndarray
-    return (
-        100
-        * np.sqrt(np.nanmean((y_true - y_pred) ** 2, axis=TIME_AX))
-        / np.nanmean(y_true, axis=TIME_AX)
+    return 100 * _safe_pct_divide(
+        np.sqrt(np.nanmean((y_true - y_pred) ** 2, axis=TIME_AX)),
+        np.nanmean(y_true, axis=TIME_AX),
+        zero_division=zero_division,
     )
 
 
diff --git a/darts/metrics/utils.py b/darts/metrics/utils.py
index 454f1ca8ea..20c8e01aa2 100644
--- a/darts/metrics/utils.py
+++ b/darts/metrics/utils.py
@@ -940,6 +940,73 @@ def _safe_scaled_divide(
     return result
 
 
+def _safe_pct_divide(
+    errors: np.ndarray,
+    scale: np.ndarray,
+    zero_division: str = "warn",
+) -> np.ndarray:
+    """Divides ``errors`` by ``scale`` for percentage-style metrics, returning
+    ``np.nan`` where ``scale`` is zero.
+
+    Unlike :func:`_safe_scaled_divide` — which fills the ``0/0`` case with
+    ``1.0`` to express "on par with naive baseline" for scaled-error metrics
+    — this helper always fills zero-scale entries with ``np.nan`` because
+    percentage metrics multiply the ratio by ``100``; a fill of ``1.0`` would
+    surface as a ``100 %`` error and be misleading.
+
+    Parameters
+    ----------
+    errors
+        Numerator array. Broadcasts against ``scale``.
+    scale
+        Denominator array (e.g. the sum, mean, or range of ``actual_series``).
+    zero_division
+        Controls behavior when ``scale`` is (near) zero.
+
+        * ``"warn"`` (default) – fill zero-scale entries with ``np.nan`` and
+          emit a warning.
+        * ``"raise"`` – raise a ``ValueError`` (the legacy behavior).
+
+    Returns
+    -------
+    np.ndarray
+        The result of ``errors / scale`` with zero-scale entries replaced by
+        ``np.nan``.
+    """
+    if zero_division not in ["warn", "raise"]:
+        raise_log(
+            ValueError(
+                f"`zero_division` must be 'warn' or 'raise'. Received {zero_division}."
+            ),
+            logger=logger,
+        )
+
+    zero_mask = np.isclose(scale, 0.0)
+    if not zero_mask.any():
+        return errors / scale
+
+    if zero_division == "raise":
+        raise_log(
+            ValueError(
+                "Cannot compute percentage metric: the denominator "
+                "(e.g. sum, mean, or range of `actual_series`) is zero "
+                "for some components."
+            ),
+            logger=logger,
+        )
+
+    # Avoid runtime warnings from the masked divide
+    safe_scale = np.where(zero_mask, 1.0, scale)
+    result = np.where(zero_mask, np.nan, errors / safe_scale)
+
+    logger.warning(
+        "The denominator (e.g. sum, mean, or range of `actual_series`) is "
+        "zero for some components in the percentage metric. Those entries "
+        "are set to NaN."
+    )
+    return result
+
+
 def _unique_labels(y_true: np.ndarray, y_pred: np.ndarray) -> list[np.ndarray]:
     """Returns unique labels for each component in the true and predicted labels."""
     labels = []
diff --git a/darts/tests/metrics/test_metrics.py b/darts/tests/metrics/test_metrics.py
index 39d85a7e5c..6fccb1cde5 100644
--- a/darts/tests/metrics/test_metrics.py
+++ b/darts/tests/metrics/test_metrics.py
@@ -232,10 +232,12 @@ def test_ape_zero(self, metric):
             metric(self.series1, self.series1)
 
     def test_ope_zero(self):
+        # Legacy raising behavior is now opt-in via `zero_division="raise"`.
         with pytest.raises(ValueError):
             metrics.ope(
                 self.series1 - self.series1.to_series().mean(),
                 self.series1 - self.series1.to_series().mean(),
+                zero_division="raise",
             )
 
     @pytest.mark.parametrize(
@@ -1105,14 +1107,14 @@ def test_arre(self, config):
         self.helper_test_nan(metric, **kwargs)
         self.helper_test_non_aggregate(metric, is_aggregate)
 
+        # Legacy raising behavior is now opt-in via `zero_division="raise"`.
         with pytest.raises(ValueError) as exc:
             _ = metric(
                 TimeSeries.from_values(np.ones((3, 1, 1))),
                 TimeSeries.from_values(np.ones((3, 1, 1))),
+                zero_division="raise",
             )
-        assert str(exc.value).startswith(
-            "The difference between the max and min values must "
-        )
+        assert "denominator" in str(exc.value)
 
     @pytest.mark.parametrize(
         "metric",
@@ -1474,6 +1476,78 @@ def test_scaled_errors_zero_division(
             assert np.all(np.isnan(result[2:]))
         caplog.clear()
 
+    @pytest.mark.parametrize(
+        "metric",
+        [
+            metrics.wmape,
+            metrics.ope,
+            metrics.arre,
+            metrics.marre,
+            metrics.coefficient_of_variation,
+        ],
+    )
+    def test_pct_metrics_zero_division(self, metric, caplog):
+        """Percentage / range-based metrics return NaN on zero denominator
+        under the default ``zero_division="warn"`` and raise under
+        ``zero_division="raise"``.
+
+        A constant all-zero ``actual_series`` triggers every denominator
+        these metrics use (sum of absolutes, sum, mean, max-min)."""
+        zero_actual = TimeSeries.from_values(np.zeros((10, 1)))
+        some_pred = TimeSeries.from_values(np.ones((10, 1)))
+
+        # --- default "warn": NaN + warning ---
+        with caplog.at_level(logging.WARNING):
+            result = metric(zero_actual, some_pred, component_reduction=None)
+        assert "denominator" in caplog.text
+        assert np.all(np.isnan(np.atleast_1d(result)))
+        caplog.clear()
+
+        # --- "raise": ValueError (legacy behavior) ---
+        with pytest.raises(ValueError, match="denominator"):
+            metric(zero_actual, some_pred, zero_division="raise")
+
+        # --- invalid value rejected ---
+        with pytest.raises(ValueError, match="`zero_division` must be"):
+            metric(zero_actual, some_pred, zero_division="invalid")
+
+        # --- non-zero denominator: no warning, finite result ---
+        caplog.clear()
+        with caplog.at_level(logging.WARNING):
+            result_normal = metric(self.series1, self.series2, component_reduction=None)
+        assert "denominator" not in caplog.text
+        assert not np.any(np.isnan(np.atleast_1d(result_normal)))
+
+    def test_ope_accepts_negative_sum(self, caplog):
+        """OPE must accept ``actual_series`` whose sum is negative (e.g.
+        financial return series). Only an exact zero sum triggers the
+        zero-division handling.
+
+        The previous ``y_true_sum > 0`` guard incorrectly rejected this
+        valid input.
+        """
+        # mean ~ 0 but sum strictly negative
+        neg_sum_values = np.array([
+            1.0,
+            -2.0,
+            1.0,
+            -3.0,
+            1.0,
+            -2.0,
+            1.0,
+            -3.0,
+            1.0,
+            -2.0,
+        ]).reshape(-1, 1)
+        actual = TimeSeries.from_values(neg_sum_values)
+        pred = TimeSeries.from_values(neg_sum_values + 0.1)
+
+        caplog.clear()
+        with caplog.at_level(logging.WARNING):
+            result = metrics.ope(actual, pred)
+        assert "denominator" not in caplog.text
+        assert np.isfinite(result)
+
     def test_ope(self):
         self.helper_test_multivariate_duplication_equality(metrics.ope)
         self.helper_test_multiple_ts_duplication_equality(metrics.ope)