|
4 | 4 | import pandas as pd |
5 | 5 | import pytest |
6 | 6 |
|
7 | | -from testgen.commands.test_thresholds_prediction import compute_freshness_threshold |
| 7 | +from testgen.commands.test_thresholds_prediction import compute_freshness_threshold, compute_sarimax_threshold |
8 | 8 | from testgen.common.freshness_service import ( |
9 | 9 | MIN_FRESHNESS_GAPS, |
10 | 10 | FreshnessThreshold, |
@@ -634,3 +634,77 @@ def test_without_exclusions_timezone_has_no_effect(self): |
634 | 634 | forecast_with_tz = get_sarimax_forecast(history, num_forecast=3, exclude_weekends=False, tz="America/New_York") |
635 | 635 |
|
636 | 636 | pd.testing.assert_frame_equal(forecast_no_tz, forecast_with_tz) |
| 637 | + |
| 638 | + |
| 639 | +class Test_ComputeSarimaxThreshold_CumulativeFloor: |
| 640 | + """Tests for the cumulative table floor constraint in compute_sarimax_threshold.""" |
| 641 | + |
| 642 | + @staticmethod |
| 643 | + def _make_monotonic_history(n_days: int = 30, start_value: int = 1000, daily_growth: int = 100) -> pd.DataFrame: |
| 644 | + """Create a monotonically increasing row count history (cumulative table).""" |
| 645 | + dates = pd.date_range("2026-01-01", periods=n_days, freq="1D") |
| 646 | + values = [start_value + i * daily_growth for i in range(n_days)] |
| 647 | + return pd.DataFrame({"result_signal": values}, index=dates) |
| 648 | + |
| 649 | + def test_cumulative_floors_lower_at_last_observed(self): |
| 650 | + history = self._make_monotonic_history(n_days=30, start_value=1000, daily_growth=100) |
| 651 | + last_observed = float(history["result_signal"].iloc[-1]) |
| 652 | + |
| 653 | + lower, upper, prediction = compute_sarimax_threshold( |
| 654 | + history, PredictSensitivity.medium, is_cumulative=True, |
| 655 | + ) |
| 656 | + |
| 657 | + assert lower is not None |
| 658 | + assert upper is not None |
| 659 | + assert prediction is not None |
| 660 | + assert lower >= last_observed |
| 661 | + |
| 662 | + def test_non_cumulative_allows_lower_below_last_observed(self): |
| 663 | + # With high variance, SARIMAX lower bound can drop below last observed |
| 664 | + rng = np.random.default_rng(42) |
| 665 | + dates = pd.date_range("2026-01-01", periods=30, freq="1D") |
| 666 | + # Trending up but with large noise — lower bound should be below last value |
| 667 | + values = [1000 + i * 50 + rng.normal(0, 200) for i in range(30)] |
| 668 | + history = pd.DataFrame({"result_signal": values}, index=dates) |
| 669 | + last_observed = float(history["result_signal"].iloc[-1]) |
| 670 | + |
| 671 | + lower, upper, prediction = compute_sarimax_threshold( |
| 672 | + history, PredictSensitivity.low, is_cumulative=False, |
| 673 | + ) |
| 674 | + |
| 675 | + assert lower is not None |
| 676 | + # With low sensitivity (z=-3.0) and high noise, lower should be below last value |
| 677 | + # This is the behavior we're protecting against with the cumulative floor |
| 678 | + assert lower < last_observed |
| 679 | + |
| 680 | + def test_cumulative_does_not_affect_upper_tolerance(self): |
| 681 | + history = self._make_monotonic_history(n_days=30) |
| 682 | + |
| 683 | + _, upper_cumulative, _ = compute_sarimax_threshold( |
| 684 | + history, PredictSensitivity.medium, is_cumulative=True, |
| 685 | + ) |
| 686 | + _, upper_normal, _ = compute_sarimax_threshold( |
| 687 | + history, PredictSensitivity.medium, is_cumulative=False, |
| 688 | + ) |
| 689 | + |
| 690 | + assert upper_cumulative == upper_normal |
| 691 | + |
| 692 | + def test_cumulative_with_insufficient_data_returns_none(self): |
| 693 | + history = self._make_monotonic_history(n_days=2) |
| 694 | + |
| 695 | + lower, upper, prediction = compute_sarimax_threshold( |
| 696 | + history, PredictSensitivity.medium, min_lookback=5, is_cumulative=True, |
| 697 | + ) |
| 698 | + |
| 699 | + assert lower is None |
| 700 | + assert upper is None |
| 701 | + assert prediction is None |
| 702 | + |
| 703 | + def test_cumulative_default_is_false(self): |
| 704 | + history = self._make_monotonic_history(n_days=30) |
| 705 | + |
| 706 | + # Without is_cumulative param, should behave as non-cumulative |
| 707 | + lower_default, _, _ = compute_sarimax_threshold(history, PredictSensitivity.medium) |
| 708 | + lower_explicit, _, _ = compute_sarimax_threshold(history, PredictSensitivity.medium, is_cumulative=False) |
| 709 | + |
| 710 | + assert lower_default == lower_explicit |
0 commit comments