From 79fad24f9247ac8f8244357fcd2e4945d075821a Mon Sep 17 00:00:00 2001
From: SuryaSunil287 <surya.sunil@rice.edu>
Date: Sun, 10 May 2026 14:18:40 +0530
Subject: [PATCH 1/2] fix: dynamic fill_zero in get_binned_data to prevent
 KL/PSI distortion

The hardcoded fill value (0.0001 or min/1e6) for zero-probability bins
in get_binned_data caused two problems:

1. When min non-zero percent <= 0.0001, filling with min/1e6 produced
   an astronomically small value that inflated KL divergence scores.
2. Reference and current distributions received different fill values,
   introducing asymmetry in divergence metrics.

Fix: compute a single fill value as min_nonzero_both / 10, where
min_nonzero_both is the smallest non-zero probability across both
distributions. This guarantees the fill is always smaller than any
genuine probability and is applied symmetrically.

Fixes #334
---
 .../legacy/calculations/stattests/utils.py    | 22 ++++-----
 .../spark/calculations/stattests/utils.py     | 18 ++-----
 tests/stattests/test_stattests.py             | 47 +++++++++++++++++++
 3 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/src/evidently/legacy/calculations/stattests/utils.py b/src/evidently/legacy/calculations/stattests/utils.py
index 1957632451..f2beeba520 100644
--- a/src/evidently/legacy/calculations/stattests/utils.py
+++ b/src/evidently/legacy/calculations/stattests/utils.py
@@ -42,20 +42,16 @@ def get_binned_data(
         current_percents = np.array([current_feature_dict[key] / len(current_data) for key in keys])
 
     if feel_zeroes:
-        np.place(
-            reference_percents,
-            reference_percents == 0,
-            min(reference_percents[reference_percents != 0]) / 10**6
-            if min(reference_percents[reference_percents != 0]) <= 0.0001
-            else 0.0001,
-        )
-        np.place(
-            current_percents,
-            current_percents == 0,
-            min(current_percents[current_percents != 0]) / 10**6
-            if min(current_percents[current_percents != 0]) <= 0.0001
-            else 0.0001,
+        # Use a single fill value derived from both distributions so that
+        # reference and current are treated symmetrically. The fill is 1/10 of
+        # the smallest genuine non-zero probability seen in either distribution,
+        # guaranteeing it never inflates divergence metrics (KL, PSI, etc.).
+        all_nonzero = np.concatenate(
+            [reference_percents[reference_percents > 0], current_percents[current_percents > 0]]
         )
+        fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4
+        reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents)
+        current_percents = np.where(current_percents == 0, fill_zero_value, current_percents)
 
     return reference_percents, current_percents
 
diff --git a/src/evidently/legacy/spark/calculations/stattests/utils.py b/src/evidently/legacy/spark/calculations/stattests/utils.py
index e43ab40194..aa895c98ea 100644
--- a/src/evidently/legacy/spark/calculations/stattests/utils.py
+++ b/src/evidently/legacy/spark/calculations/stattests/utils.py
@@ -52,19 +52,11 @@ def get_binned_data(
         current_percents = current_percents / current_percents.sum()
 
     if fill_zeroes:
-        np.place(
-            reference_percents,
-            reference_percents == 0,
-            min(reference_percents[reference_percents != 0]) / 10**6
-            if min(reference_percents[reference_percents != 0]) <= 0.0001
-            else 0.0001,
-        )
-        np.place(
-            current_percents,
-            current_percents == 0,
-            min(current_percents[current_percents != 0]) / 10**6
-            if min(current_percents[current_percents != 0]) <= 0.0001
-            else 0.0001,
+        all_nonzero = np.concatenate(
+            [reference_percents[reference_percents > 0], current_percents[current_percents > 0]]
         )
+        fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4
+        reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents)
+        current_percents = np.where(current_percents == 0, fill_zero_value, current_percents)
 
     return reference_percents, current_percents
diff --git a/tests/stattests/test_stattests.py b/tests/stattests/test_stattests.py
index 33ed818466..3808a69941 100644
--- a/tests/stattests/test_stattests.py
+++ b/tests/stattests/test_stattests.py
@@ -5,6 +5,8 @@
 from scipy import stats
 
 from evidently.legacy.calculations.stattests import z_stat_test
+from evidently.legacy.calculations.stattests.kl_div import kl_div_stat_test
+from evidently.legacy.calculations.stattests.utils import get_binned_data
 from evidently.legacy.calculations.stattests.anderson_darling_stattest import anderson_darling_test
 from evidently.legacy.calculations.stattests.chisquare_stattest import chi_stat_test
 from evidently.legacy.calculations.stattests.cramer_von_mises_stattest import cramer_von_mises
@@ -335,3 +337,48 @@ def test_t_test() -> None:
     reference = pd.Series([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
     current = pd.Series([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
     assert t_test.func(reference, current, "num", 0.05) == (approx(0.084, abs=1e-3), False)
+
+
+def test_get_binned_data_fill_zero_is_dynamic() -> None:
+    # Regression test for https://github.com/evidentlyai/evidently/issues/334.
+    # When the minimum non-zero probability is smaller than 0.0001 the old code
+    # used min/1e6 as fill, which is astronomically small and inflates KL divergence.
+    # The fix uses min/10 so the fill stays proportional to the real data.
+    from evidently.legacy.core import ColumnType
+
+    # Build a categorical distribution where one bucket has a very small percent.
+    # 99_999 "a" and 1 "b" → p("b") ≈ 1e-5, well below the old 0.0001 threshold.
+    reference = pd.Series(["a"] * 99_999 + ["b"])
+    current = pd.Series(["a"] * 99_999 + ["c"])  # "c" absent in reference, "b" absent in current
+
+    ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10)
+
+    # All zero slots must be filled with a positive value
+    assert np.all(ref_pct > 0), "reference percents must be strictly positive after fill"
+    assert np.all(cur_pct > 0), "current percents must be strictly positive after fill"
+
+    # The fill value must be strictly smaller than the smallest genuine probability
+    min_genuine = min(reference.value_counts(normalize=True).min(), current.value_counts(normalize=True).min())
+    fill_used = min(ref_pct.min(), cur_pct.min())
+    assert fill_used < min_genuine, "fill value must be smaller than any real non-zero probability"
+
+    # KL divergence must not be inflated: identical distributions should score near 0
+    ref_same = pd.Series(["a"] * 50 + ["b"] * 50)
+    cur_same = pd.Series(["a"] * 50 + ["b"] * 50)
+    score_same, _ = kl_div_stat_test.func(ref_same, cur_same, ColumnType.Categorical, 0.1)
+    assert score_same == approx(0.0, abs=1e-6), "KL of identical distributions must be ~0"
+
+
+def test_get_binned_data_fill_zero_symmetric() -> None:
+    # Both reference and current must receive the same fill value so that
+    # KL divergence is not artificially asymmetric.
+    from evidently.legacy.core import ColumnType
+
+    reference = pd.Series(["a"] * 90 + ["b"] * 10)
+    current = pd.Series(["a"] * 95 + ["c"] * 5)  # "b" zero in current, "c" zero in reference
+
+    ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10)
+
+    # The fill value is derived from both distributions combined, so the minimum
+    # of ref and current percents must be identical.
+    assert ref_pct.min() == approx(cur_pct.min(), rel=1e-9), "fill value must be identical for ref and current"

From 62a5f0c6d83b00c77cdbee22b77935a829185ab7 Mon Sep 17 00:00:00 2001
From: SuryaSunil287 <surya.sunil@rice.edu>
Date: Mon, 18 May 2026 21:45:26 +0530
Subject: [PATCH 2/2] fix: GroupBy(ValueDrift) widget title now includes
 group-by context

When ValueDrift is wrapped in GroupBy, the counter widget label was
hardcoded as "Drift in column '<col>'" regardless of the GroupBy
label. Thread display_name() through _render() so the patched name
("... group by '<col>' for label: '<label>'") appears in the widget.

Fixes #1706

AI assistance: Written with Claude Code. All code reviewed, tested, and
verified by the author.
---
 src/evidently/metrics/column_statistics.py   |  6 +-
 tests/future/metrics/test_group_by_metric.py | 59 ++++++++++++++++++++
 2 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 tests/future/metrics/test_group_by_metric.py

diff --git a/src/evidently/metrics/column_statistics.py b/src/evidently/metrics/column_statistics.py
index ddde369af1..3980bf3115 100644
--- a/src/evidently/metrics/column_statistics.py
+++ b/src/evidently/metrics/column_statistics.py
@@ -601,7 +601,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O
         if self.metric.threshold is None:
             self.resolve_parameter("threshold", drift.stattest_threshold)
         result = self.result(drift.drift_score)
-        result.widget = self._render(drift, Options(), ColorOptions())
+        result.widget = self._render(drift, Options(), ColorOptions(), title=self.display_name())
         if self.metric.tests is None and context.configuration.include_tests:
             # todo: move to _default_tests
             result.set_tests(
@@ -627,7 +627,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O
     def display_name(self) -> str:
         return f"Value drift for {self.metric.column}"
 
-    def _render(self, result: ColumnDataDriftMetrics, options, color_options):
+    def _render(self, result: ColumnDataDriftMetrics, options, color_options, title: Optional[str] = None):
         if result.drift_detected:
             drift = "detected"
 
@@ -729,7 +729,7 @@ def _render(self, result: ColumnDataDriftMetrics, options, color_options):
                             f"Drift detection method: {result.stattest_name}. "
                             f"Drift score: {drift_score}"
                         ),
-                        f"Drift in column '{result.column_name}'",
+                        title if title is not None else f"Drift in column '{result.column_name}'",
                     )
                 ],
                 title="",
diff --git a/tests/future/metrics/test_group_by_metric.py b/tests/future/metrics/test_group_by_metric.py
new file mode 100644
index 0000000000..f743f9060d
--- /dev/null
+++ b/tests/future/metrics/test_group_by_metric.py
@@ -0,0 +1,59 @@
+import pandas as pd
+
+from evidently import Report
+from evidently.metrics import GroupBy
+from evidently.metrics.column_statistics import ValueDrift, ValueDriftCalculation
+
+
+def _make_datasets():
+    current = pd.DataFrame({"col1": [float(i) for i in range(30)], "group": (["a", "b"] * 15)})
+    reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)], "group": (["a", "b"] * 15)})
+    return current, reference
+
+
+def test_group_by_value_drift_widget_title_includes_group_context():
+    """Widget counter label must include 'group by' info when wrapped in GroupBy."""
+    current, reference = _make_datasets()
+
+    captured_titles = []
+    original_render = ValueDriftCalculation._render
+
+    def capturing_render(self, result, options, color_options, title=None):
+        captured_titles.append(title)
+        return original_render(self, result, options, color_options, title=title)
+
+    ValueDriftCalculation._render = capturing_render
+    try:
+        report = Report([GroupBy(ValueDrift(column="col1"), "group")])
+        report.run(current_data=current, reference_data=reference)
+    finally:
+        ValueDriftCalculation._render = original_render
+
+    assert len(captured_titles) == 2
+    for title in captured_titles:
+        assert "group by 'group'" in title, f"Expected 'group by' in title, got: {title!r}"
+        assert "for label:" in title, f"Expected 'for label:' in title, got: {title!r}"
+
+
+def test_standalone_value_drift_widget_title():
+    """Standalone ValueDrift widget label uses the metric display_name."""
+    current = pd.DataFrame({"col1": [float(i) for i in range(30)]})
+    reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)]})
+
+    captured_titles = []
+    original_render = ValueDriftCalculation._render
+
+    def capturing_render(self, result, options, color_options, title=None):
+        captured_titles.append(title)
+        return original_render(self, result, options, color_options, title=title)
+
+    ValueDriftCalculation._render = capturing_render
+    try:
+        report = Report([ValueDrift(column="col1")])
+        report.run(current_data=current, reference_data=reference)
+    finally:
+        ValueDriftCalculation._render = original_render
+
+    assert len(captured_titles) == 1
+    assert "col1" in captured_titles[0]
+    assert "group by" not in captured_titles[0]