Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 9 additions & 13 deletions src/evidently/legacy/calculations/stattests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,16 @@ def get_binned_data(
current_percents = np.array([current_feature_dict[key] / len(current_data) for key in keys])

if feel_zeroes:
np.place(
reference_percents,
reference_percents == 0,
min(reference_percents[reference_percents != 0]) / 10**6
if min(reference_percents[reference_percents != 0]) <= 0.0001
else 0.0001,
)
np.place(
current_percents,
current_percents == 0,
min(current_percents[current_percents != 0]) / 10**6
if min(current_percents[current_percents != 0]) <= 0.0001
else 0.0001,
# Use a single fill value derived from both distributions so that
# reference and current are treated symmetrically. The fill is 1/10 of
# the smallest genuine non-zero probability seen in either distribution,
# guaranteeing it never inflates divergence metrics (KL, PSI, etc.).
all_nonzero = np.concatenate(
[reference_percents[reference_percents > 0], current_percents[current_percents > 0]]
)
fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4
reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents)
current_percents = np.where(current_percents == 0, fill_zero_value, current_percents)

return reference_percents, current_percents

Expand Down
18 changes: 5 additions & 13 deletions src/evidently/legacy/spark/calculations/stattests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,11 @@ def get_binned_data(
current_percents = current_percents / current_percents.sum()

if fill_zeroes:
np.place(
reference_percents,
reference_percents == 0,
min(reference_percents[reference_percents != 0]) / 10**6
if min(reference_percents[reference_percents != 0]) <= 0.0001
else 0.0001,
)
np.place(
current_percents,
current_percents == 0,
min(current_percents[current_percents != 0]) / 10**6
if min(current_percents[current_percents != 0]) <= 0.0001
else 0.0001,
all_nonzero = np.concatenate(
[reference_percents[reference_percents > 0], current_percents[current_percents > 0]]
)
fill_zero_value = float(all_nonzero.min()) / 10 if all_nonzero.size > 0 else 1e-4
reference_percents = np.where(reference_percents == 0, fill_zero_value, reference_percents)
current_percents = np.where(current_percents == 0, fill_zero_value, current_percents)

return reference_percents, current_percents
6 changes: 3 additions & 3 deletions src/evidently/metrics/column_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O
if self.metric.threshold is None:
self.resolve_parameter("threshold", drift.stattest_threshold)
result = self.result(drift.drift_score)
result.widget = self._render(drift, Options(), ColorOptions())
result.widget = self._render(drift, Options(), ColorOptions(), title=self.display_name())
if self.metric.tests is None and context.configuration.include_tests:
# todo: move to _default_tests
result.set_tests(
Expand All @@ -627,7 +627,7 @@ def calculate(self, context: "Context", current_data: Dataset, reference_data: O
def display_name(self) -> str:
return f"Value drift for {self.metric.column}"

def _render(self, result: ColumnDataDriftMetrics, options, color_options):
def _render(self, result: ColumnDataDriftMetrics, options, color_options, title: Optional[str] = None):
if result.drift_detected:
drift = "detected"

Expand Down Expand Up @@ -729,7 +729,7 @@ def _render(self, result: ColumnDataDriftMetrics, options, color_options):
f"Drift detection method: {result.stattest_name}. "
f"Drift score: {drift_score}"
),
f"Drift in column '{result.column_name}'",
title if title is not None else f"Drift in column '{result.column_name}'",
)
],
title="",
Expand Down
59 changes: 59 additions & 0 deletions tests/future/metrics/test_group_by_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pandas as pd

from evidently import Report
from evidently.metrics import GroupBy
from evidently.metrics.column_statistics import ValueDrift, ValueDriftCalculation


def _make_datasets():
current = pd.DataFrame({"col1": [float(i) for i in range(30)], "group": (["a", "b"] * 15)})
reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)], "group": (["a", "b"] * 15)})
return current, reference


def test_group_by_value_drift_widget_title_includes_group_context():
"""Widget counter label must include 'group by' info when wrapped in GroupBy."""
current, reference = _make_datasets()

captured_titles = []
original_render = ValueDriftCalculation._render

def capturing_render(self, result, options, color_options, title=None):
captured_titles.append(title)
return original_render(self, result, options, color_options, title=title)

ValueDriftCalculation._render = capturing_render
try:
report = Report([GroupBy(ValueDrift(column="col1"), "group")])
report.run(current_data=current, reference_data=reference)
finally:
ValueDriftCalculation._render = original_render

assert len(captured_titles) == 2
for title in captured_titles:
assert "group by 'group'" in title, f"Expected 'group by' in title, got: {title!r}"
assert "for label:" in title, f"Expected 'for label:' in title, got: {title!r}"


def test_standalone_value_drift_widget_title():
"""Standalone ValueDrift widget label uses the metric display_name."""
current = pd.DataFrame({"col1": [float(i) for i in range(30)]})
reference = pd.DataFrame({"col1": [float(i) + 0.5 for i in range(30)]})

captured_titles = []
original_render = ValueDriftCalculation._render

def capturing_render(self, result, options, color_options, title=None):
captured_titles.append(title)
return original_render(self, result, options, color_options, title=title)

ValueDriftCalculation._render = capturing_render
try:
report = Report([ValueDrift(column="col1")])
report.run(current_data=current, reference_data=reference)
finally:
ValueDriftCalculation._render = original_render

assert len(captured_titles) == 1
assert "col1" in captured_titles[0]
assert "group by" not in captured_titles[0]
47 changes: 47 additions & 0 deletions tests/stattests/test_stattests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from scipy import stats

from evidently.legacy.calculations.stattests import z_stat_test
from evidently.legacy.calculations.stattests.kl_div import kl_div_stat_test
from evidently.legacy.calculations.stattests.utils import get_binned_data
from evidently.legacy.calculations.stattests.anderson_darling_stattest import anderson_darling_test
from evidently.legacy.calculations.stattests.chisquare_stattest import chi_stat_test
from evidently.legacy.calculations.stattests.cramer_von_mises_stattest import cramer_von_mises
Expand Down Expand Up @@ -335,3 +337,48 @@ def test_t_test() -> None:
reference = pd.Series([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
current = pd.Series([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
assert t_test.func(reference, current, "num", 0.05) == (approx(0.084, abs=1e-3), False)


def test_get_binned_data_fill_zero_is_dynamic() -> None:
# Regression test for https://github.com/evidentlyai/evidently/issues/334.
# When the minimum non-zero probability is smaller than 0.0001 the old code
# used min/1e6 as fill, which is astronomically small and inflates KL divergence.
# The fix uses min/10 so the fill stays proportional to the real data.
from evidently.legacy.core import ColumnType

# Build a categorical distribution where one bucket has a very small percent.
# 99_999 "a" and 1 "b" → p("b") ≈ 1e-5, well below the old 0.0001 threshold.
reference = pd.Series(["a"] * 99_999 + ["b"])
current = pd.Series(["a"] * 99_999 + ["c"]) # "c" absent in reference, "b" absent in current

ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10)

# All zero slots must be filled with a positive value
assert np.all(ref_pct > 0), "reference percents must be strictly positive after fill"
assert np.all(cur_pct > 0), "current percents must be strictly positive after fill"

# The fill value must be strictly smaller than the smallest genuine probability
min_genuine = min(reference.value_counts(normalize=True).min(), current.value_counts(normalize=True).min())
fill_used = min(ref_pct.min(), cur_pct.min())
assert fill_used < min_genuine, "fill value must be smaller than any real non-zero probability"

# KL divergence must not be inflated: identical distributions should score near 0
ref_same = pd.Series(["a"] * 50 + ["b"] * 50)
cur_same = pd.Series(["a"] * 50 + ["b"] * 50)
score_same, _ = kl_div_stat_test.func(ref_same, cur_same, ColumnType.Categorical, 0.1)
assert score_same == approx(0.0, abs=1e-6), "KL of identical distributions must be ~0"


def test_get_binned_data_fill_zero_symmetric() -> None:
# Both reference and current must receive the same fill value so that
# KL divergence is not artificially asymmetric.
from evidently.legacy.core import ColumnType

reference = pd.Series(["a"] * 90 + ["b"] * 10)
current = pd.Series(["a"] * 95 + ["c"] * 5) # "b" zero in current, "c" zero in reference

ref_pct, cur_pct = get_binned_data(reference, current, ColumnType.Categorical, n=10)

# The fill value is derived from both distributions combined, so the minimum
# of ref and current percents must be identical.
assert ref_pct.min() == approx(cur_pct.min(), rel=1e-9), "fill value must be identical for ref and current"