Merge branch 'aarthy/monitor' into 'enterprise'

ci bot · ci bot · commit 47e4fe302df1 · 2026-02-24T19:29:15.000Z
fix(monitors): prevent overconfident prediction bounds

See merge request dkinternal/testgen/dataops-testgen!417
diff --git a/testgen/commands/test_thresholds_prediction.py b/testgen/commands/test_thresholds_prediction.py
@@ -110,19 +110,19 @@ def run(self) -> None:
                     )
                     test_prediction.extend([lower, upper, staleness, prediction])
                 else:
-                    functional_table_type = group["functional_table_type"].iloc[0]
-                    is_cumulative = bool(
-                        functional_table_type and str(functional_table_type).startswith("cumulative")
-                    )
                     lower, upper, prediction = compute_sarimax_threshold(
                         history,
                         sensitivity=self.test_suite.predict_sensitivity or PredictSensitivity.medium,
                         min_lookback=self.test_suite.predict_min_lookback or 1,
                         exclude_weekends=self.test_suite.predict_exclude_weekends,
                         holiday_codes=self.test_suite.holiday_codes_list,
                         schedule_tz=self.tz,
-                        is_cumulative=is_cumulative,
                     )
+                    if test_type == "Volume_Trend":
+                        if lower is not None: 
+                            lower = max(lower, 0.0)
+                        if upper is not None:
+                            upper = max(upper, 0.0)
                     test_prediction.extend([lower, upper, None, prediction])
 
                 prediction_results.append(test_prediction)
@@ -263,13 +263,10 @@ def compute_sarimax_threshold(
     exclude_weekends: bool = False,
     holiday_codes: list[str] | None = None,
     schedule_tz: str | None = None,
-    is_cumulative: bool = False,
 ) -> tuple[float | None, float | None, str | None]:
     """Compute SARIMAX-based thresholds for the next forecast point.
 
     Returns (lower, upper, forecast_json) or (None, None, None) if insufficient data.
-    For cumulative tables, the lower tolerance is floored at the last observed value
-    so that any decrease in row count is detected as an anomaly.
     """
     if len(history) < min_lookback:
         return None, None, None
@@ -299,12 +296,7 @@ def compute_sarimax_threshold(
 
         if pd.isna(lower_tolerance) or pd.isna(upper_tolerance):
             return None, None, None
-
-        lower_tolerance = float(lower_tolerance)
-        if is_cumulative:
-            last_observed = float(history["result_signal"].iloc[-1])
-            lower_tolerance = max(lower_tolerance, last_observed)
-
-        return lower_tolerance, float(upper_tolerance), forecast.to_json()
+        else:
+            return float(lower_tolerance), float(upper_tolerance), forecast.to_json()
     except NotEnoughData:
         return None, None, None
diff --git a/testgen/common/time_series_service.py b/testgen/common/time_series_service.py
@@ -2,6 +2,7 @@
 from datetime import datetime
 
 import holidays
+import numpy as np
 import pandas as pd
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 
@@ -94,7 +95,21 @@ def get_exog_flags(index: pd.DatetimeIndex) -> pd.DataFrame:
 
     results = pd.DataFrame(index=forecast_index)
     results["mean"] = forecast.predicted_mean
-    results["se"] = forecast.var_pred_mean ** 0.5
+
+    # SE estimation: take the max of three sources to prevent overconfident bounds.
+    # 1. Model SE (var_pred_mean): can be artificially small when AR/MA nearly cancel
+    # 2. Residual SE: the model's actual 1-step prediction errors (after Kalman burn-in)
+    # 3. Raw diff SE: std of first-differences of the original data — captures inherent
+    #    point-to-point variability that the model may underestimate
+    model_se = forecast.var_pred_mean ** 0.5
+    order_sum = model.k_ar + model.k_diff + model.k_ma
+    burn_in = max(order_sum, 3)
+    usable_residuals = fitted_model.resid.iloc[burn_in:]
+    resid_se = usable_residuals.std() if len(usable_residuals) >= 5 else 0.0
+    raw_diffs = np.diff(history.iloc[:, 0].values)
+    raw_diff_se = np.std(raw_diffs, ddof=1) if len(raw_diffs) > 1 else 0.0
+    results["se"] = np.maximum(model_se, max(resid_se, raw_diff_se))
+
     return results
 
 
diff --git a/testgen/template/prediction/get_historical_test_results.sql b/testgen/template/prediction/get_historical_test_results.sql
@@ -2,7 +2,6 @@ WITH filtered_defs AS (
   -- Filter definitions first to minimize join surface area
   SELECT id,
     test_suite_id,
-    table_groups_id,
     schema_name,
     table_name,
     column_name,
@@ -18,13 +17,8 @@ SELECT r.test_definition_id,
   CASE
     WHEN r.result_signal ~ '^-?[0-9]*\.?[0-9]+$' THEN r.result_signal::NUMERIC
     ELSE NULL
-  END AS result_signal,
-  dtc.functional_table_type
+  END AS result_signal
 FROM test_results r
 JOIN filtered_defs d ON d.id = r.test_definition_id
-LEFT JOIN data_table_chars dtc
-  ON dtc.table_groups_id = d.table_groups_id
-  AND dtc.schema_name = d.schema_name
-  AND dtc.table_name = d.table_name
 WHERE r.test_suite_id = :TEST_SUITE_ID
 ORDER BY r.test_time;
diff --git a/tests/unit/common/test_time_series_service.py b/tests/unit/common/test_time_series_service.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import pytest
 
-from testgen.commands.test_thresholds_prediction import compute_freshness_threshold, compute_sarimax_threshold
+from testgen.commands.test_thresholds_prediction import compute_freshness_threshold
 from testgen.common.freshness_service import (
     MIN_FRESHNESS_GAPS,
     FreshnessThreshold,
@@ -634,77 +634,3 @@ def test_without_exclusions_timezone_has_no_effect(self):
         forecast_with_tz = get_sarimax_forecast(history, num_forecast=3, exclude_weekends=False, tz="America/New_York")
 
         pd.testing.assert_frame_equal(forecast_no_tz, forecast_with_tz)
-
-
-class Test_ComputeSarimaxThreshold_CumulativeFloor:
-    """Tests for the cumulative table floor constraint in compute_sarimax_threshold."""
-
-    @staticmethod
-    def _make_monotonic_history(n_days: int = 30, start_value: int = 1000, daily_growth: int = 100) -> pd.DataFrame:
-        """Create a monotonically increasing row count history (cumulative table)."""
-        dates = pd.date_range("2026-01-01", periods=n_days, freq="1D")
-        values = [start_value + i * daily_growth for i in range(n_days)]
-        return pd.DataFrame({"result_signal": values}, index=dates)
-
-    def test_cumulative_floors_lower_at_last_observed(self):
-        history = self._make_monotonic_history(n_days=30, start_value=1000, daily_growth=100)
-        last_observed = float(history["result_signal"].iloc[-1])
-
-        lower, upper, prediction = compute_sarimax_threshold(
-            history, PredictSensitivity.medium, is_cumulative=True,
-        )
-
-        assert lower is not None
-        assert upper is not None
-        assert prediction is not None
-        assert lower >= last_observed
-
-    def test_non_cumulative_allows_lower_below_last_observed(self):
-        # With high variance, SARIMAX lower bound can drop below last observed
-        rng = np.random.default_rng(42)
-        dates = pd.date_range("2026-01-01", periods=30, freq="1D")
-        # Trending up but with large noise — lower bound should be below last value
-        values = [1000 + i * 50 + rng.normal(0, 200) for i in range(30)]
-        history = pd.DataFrame({"result_signal": values}, index=dates)
-        last_observed = float(history["result_signal"].iloc[-1])
-
-        lower, upper, prediction = compute_sarimax_threshold(
-            history, PredictSensitivity.low, is_cumulative=False,
-        )
-
-        assert lower is not None
-        # With low sensitivity (z=-3.0) and high noise, lower should be below last value
-        # This is the behavior we're protecting against with the cumulative floor
-        assert lower < last_observed
-
-    def test_cumulative_does_not_affect_upper_tolerance(self):
-        history = self._make_monotonic_history(n_days=30)
-
-        _, upper_cumulative, _ = compute_sarimax_threshold(
-            history, PredictSensitivity.medium, is_cumulative=True,
-        )
-        _, upper_normal, _ = compute_sarimax_threshold(
-            history, PredictSensitivity.medium, is_cumulative=False,
-        )
-
-        assert upper_cumulative == upper_normal
-
-    def test_cumulative_with_insufficient_data_returns_none(self):
-        history = self._make_monotonic_history(n_days=2)
-
-        lower, upper, prediction = compute_sarimax_threshold(
-            history, PredictSensitivity.medium, min_lookback=5, is_cumulative=True,
-        )
-
-        assert lower is None
-        assert upper is None
-        assert prediction is None
-
-    def test_cumulative_default_is_false(self):
-        history = self._make_monotonic_history(n_days=30)
-
-        # Without is_cumulative param, should behave as non-cumulative
-        lower_default, _, _ = compute_sarimax_threshold(history, PredictSensitivity.medium)
-        lower_explicit, _, _ = compute_sarimax_threshold(history, PredictSensitivity.medium, is_cumulative=False)
-
-        assert lower_default == lower_explicit