diff --git a/src/evidently/legacy/calculations/stattests/utils.py b/src/evidently/legacy/calculations/stattests/utils.py index 1957632451..f2beeba520 100644 --- a/src/evidently/legacy/calculations/stattests/utils.py +++ b/src/evidently/legacy/calculations/stattests/utils.py @@ -42,20 +42,16 @@ def get_binned_data( current_percents = np.array([current_feature_dict[key] / len(current_data) for key in keys]) if feel_zeroes: - np.place( - reference_percents, - reference_percents == 0, - min(reference_percents[reference_percents != 0]) / 10**6 - if min(reference_percents[reference_percents != 0]) <= 0.0001 - else 0.0001, - ) - np.place( - current_percents, - current_percents == 0, - min(current_percents[current_percents != 0]) / 10**6 - if min(current_percents[current_percents != 0]) <= 0.0001 - else 0.0001, + # Use a single fill value derived from both distributions so that + # reference and current are treated symmetrically. The fill is 1/10 of + # the smallest genuine non-zero probability seen in either distribution, + # guaranteeing it never inflates divergence metrics (KL, PSI, etc.). + all_nonzero = np.concatenate( + [reference_percents[reference_percents > 0], current_percents[current_percents > 0]] ) + fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4 + reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents) + current_percents = np.where(current_percents == 0, fill_zero_value, current_percents) return reference_percents, current_percents diff --git a/src/evidently/legacy/spark/calculations/stattests/utils.py b/src/evidently/legacy/spark/calculations/stattests/utils.py index e43ab40194..aa895c98ea 100644 --- a/src/evidently/legacy/spark/calculations/stattests/utils.py +++ b/src/evidently/legacy/spark/calculations/stattests/utils.py @@ -52,19 +52,11 @@ def get_binned_data( current_percents = current_percents / current_percents.sum() if fill_zeroes: - np.place( - reference_percents, - reference_percents == 0, - min(reference_percents[reference_percents != 0]) / 10**6 - if min(reference_percents[reference_percents != 0]) <= 0.0001 - else 0.0001, - ) - np.place( - current_percents, - current_percents == 0, - min(current_percents[current_percents != 0]) / 10**6 - if min(current_percents[current_percents != 0]) <= 0.0001 - else 0.0001, + all_nonzero = np.concatenate( + [reference_percents[reference_percents > 0], current_percents[current_percents > 0]] ) + fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4 + reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents) + current_percents = np.where(current_percents == 0, fill_zero_value, current_percents) return reference_percents, current_percents diff --git a/src/evidently/metrics/column_statistics.py b/src/evidently/metrics/column_statistics.py index ddde369af1..3980bf3115 100644 --- a/src/evidently/metrics/column_statistics.py +++ b/src/evidently/metrics/column_statistics.py @@ -601,7 +601,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O if self.metric.threshold is None: self.resolve_parameter("threshold", drift.stattest_threshold) result = self.result(drift.drift_score) - result.widget = self._render(drift, Options(), ColorOptions()) + result.widget = self._render(drift, Options(), ColorOptions(), title=self.display_name()) if self.metric.tests is None and context.configuration.include_tests: # todo: move to _default_tests result.set_tests( @@ -627,7 +627,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O def display_name(self) -> str: return f"Value drift for {self.metric.column}" - def _render(self, result: ColumnDataDriftMetrics, options, color_options): + def _render(self, result: ColumnDataDriftMetrics, options, color_options, title: Optional[str] = None): if result.drift_detected: drift = "detected" @@ -729,7 +729,7 @@ def _render(self, result: ColumnDataDriftMetrics, options, color_options): f"Drift detection method: {result.stattest_name}. " f"Drift score: {drift_score}" ), - f"Drift in column '{result.column_name}'", + title if title is not None else f"Drift in column '{result.column_name}'", ) ], title="", diff --git a/tests/future/metrics/test_group_by_metric.py b/tests/future/metrics/test_group_by_metric.py new file mode 100644 index 0000000000..f743f9060d --- /dev/null +++ b/tests/future/metrics/test_group_by_metric.py @@ -0,0 +1,59 @@ +import pandas as pd + +from evidently import Report +from evidently.metrics import GroupBy +from evidently.metrics.column_statistics import ValueDrift, ValueDriftCalculation + + +def _make_datasets(): + current = pd.DataFrame({"col1": [float(i) for i in range(30)], "group": (["a", "b"] * 15)}) + reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)], "group": (["a", "b"] * 15)}) + return current, reference + + +def test_group_by_value_drift_widget_title_includes_group_context(): + """Widget counter label must include 'group by' info when wrapped in GroupBy.""" + current, reference = _make_datasets() + + captured_titles = [] + original_render = ValueDriftCalculation._render + + def capturing_render(self, result, options, color_options, title=None): + captured_titles.append(title) + return original_render(self, result, options, color_options, title=title) + + ValueDriftCalculation._render = capturing_render + try: + report = Report([GroupBy(ValueDrift(column="col1"), "group")]) + report.run(current_data=current, reference_data=reference) + finally: + ValueDriftCalculation._render = original_render + + assert len(captured_titles) == 2 + for title in captured_titles: + assert "group by 'group'" in title, f"Expected 'group by' in title, got: {title!r}" + assert "for label:" in title, f"Expected 'for label:' in title, got: {title!r}" + + +def test_standalone_value_drift_widget_title(): + """Standalone ValueDrift widget label uses the metric display_name.""" + current = pd.DataFrame({"col1": [float(i) for i in range(30)]}) + reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)]}) + + captured_titles = [] + original_render = ValueDriftCalculation._render + + def capturing_render(self, result, options, color_options, title=None): + captured_titles.append(title) + return original_render(self, result, options, color_options, title=title) + + ValueDriftCalculation._render = capturing_render + try: + report = Report([ValueDrift(column="col1")]) + report.run(current_data=current, reference_data=reference) + finally: + ValueDriftCalculation._render = original_render + + assert len(captured_titles) == 1 + assert "col1" in captured_titles[0] + assert "group by" not in captured_titles[0] diff --git a/tests/stattests/test_stattests.py b/tests/stattests/test_stattests.py index 33ed818466..3808a69941 100644 --- a/tests/stattests/test_stattests.py +++ b/tests/stattests/test_stattests.py @@ -5,6 +5,8 @@ from scipy import stats from evidently.legacy.calculations.stattests import z_stat_test +from evidently.legacy.calculations.stattests.kl_div import kl_div_stat_test +from evidently.legacy.calculations.stattests.utils import get_binned_data from evidently.legacy.calculations.stattests.anderson_darling_stattest import anderson_darling_test from evidently.legacy.calculations.stattests.chisquare_stattest import chi_stat_test from evidently.legacy.calculations.stattests.cramer_von_mises_stattest import cramer_von_mises @@ -335,3 +337,48 @@ def test_t_test() -> None: reference = pd.Series([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]) current = pd.Series([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) assert t_test.func(reference, current, "num", 0.05) == (approx(0.084, abs=1e-3), False) + + +def test_get_binned_data_fill_zero_is_dynamic() -> None: + # Regression test for https://github.com/evidentlyai/evidently/issues/334. + # When the minimum non-zero probability is smaller than 0.0001 the old code + # used min/1e6 as fill, which is astronomically small and inflates KL divergence. + # The fix uses min/10 so the fill stays proportional to the real data. + from evidently.legacy.core import ColumnType + + # Build a categorical distribution where one bucket has a very small percent. + # 99_999 "a" and 1 "b" → p("b") ≈ 1e-5, well below the old 0.0001 threshold. + reference = pd.Series(["a"] * 99_999 + ["b"]) + current = pd.Series(["a"] * 99_999 + ["c"]) # "c" absent in reference, "b" absent in current + + ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10) + + # All zero slots must be filled with a positive value + assert np.all(ref_pct > 0), "reference percents must be strictly positive after fill" + assert np.all(cur_pct > 0), "current percents must be strictly positive after fill" + + # The fill value must be strictly smaller than the smallest genuine probability + min_genuine = min(reference.value_counts(normalize=True).min(), current.value_counts(normalize=True).min()) + fill_used = min(ref_pct.min(), cur_pct.min()) + assert fill_used < min_genuine, "fill value must be smaller than any real non-zero probability" + + # KL divergence must not be inflated: identical distributions should score near 0 + ref_same = pd.Series(["a"] * 50 + ["b"] * 50) + cur_same = pd.Series(["a"] * 50 + ["b"] * 50) + score_same, _ = kl_div_stat_test.func(ref_same, cur_same, ColumnType.Categorical, 0.1) + assert score_same == approx(0.0, abs=1e-6), "KL of identical distributions must be ~0" + + +def test_get_binned_data_fill_zero_symmetric() -> None: + # Both reference and current must receive the same fill value so that + # KL divergence is not artificially asymmetric. + from evidently.legacy.core import ColumnType + + reference = pd.Series(["a"] * 90 + ["b"] * 10) + current = pd.Series(["a"] * 95 + ["c"] * 5) # "b" zero in current, "c" zero in reference + + ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10) + + # The fill value is derived from both distributions combined, so the minimum + # of ref and current percents must be identical. + assert ref_pct.min() == approx(cur_pct.min(), rel=1e-9), "fill value must be identical for ref and current"