From c3e66127bd6653aa54214ceb78b4f51c909e0885 Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Sun, 12 Apr 2026 08:52:38 +0100 Subject: [PATCH 1/4] Fix repeated callback iterations (#635) Signed-off-by: ugbotueferhire --- .../interpret/glassbox/_ebm_core/_boost.py | 18 +- .../tests/glassbox/ebm/test_callback.py | 208 ++++++++++++++++++ 2 files changed, 217 insertions(+), 9 deletions(-) create mode 100644 python/interpret-core/tests/glassbox/ebm/test_callback.py diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py index 79c4ffe8c..d8fcb9be7 100644 --- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py +++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py @@ -364,18 +364,18 @@ def boost( ): break + if callback is not None: + is_done = callback( + bag_idx, step_idx, make_progress, cur_metric + ) + if is_done: + if stop_flag is not None: + stop_flag[0] = True + break + if stop_flag is not None and stop_flag[0]: break - if callback is not None: - is_done = callback( - bag_idx, step_idx, make_progress, cur_metric - ) - if is_done: - if stop_flag is not None: - stop_flag[0] = True - break - state_idx = state_idx + 1 if len(term_features) <= state_idx: if smoothing_rounds > 0: diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py new file mode 100644 index 000000000..1a03f054f --- /dev/null +++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py @@ -0,0 +1,208 @@ +# Copyright (c) 2023 The InterpretML Contributors +# Distributed under the MIT software license + +"""Regression tests for issue #635: callback fires repeatedly with same n_steps.""" + +import numpy as np + +from interpret.glassbox import ( + ExplainableBoostingClassifier, + ExplainableBoostingRegressor, +) +from interpret.utils import make_synthetic + + +class RecordingCallback: + """Picklable callback that records all invocations. + + Uses n_jobs=1 in tests so that state is shared in-process. + """ + + def __init__(self): + self.records = [] + + def __call__(self, bag_idx, n_steps, has_progressed, best_score): + self.records.append((bag_idx, n_steps, has_progressed, best_score)) + return False + + +class StopAfterCallback: + """Picklable callback that stops training after N calls.""" + + def __init__(self, stop_after): + self.stop_after = stop_after + self.call_count = 0 + + def __call__(self, bag_idx, n_steps, has_progressed, best_score): + self.call_count += 1 + return self.call_count >= self.stop_after + + +def _split_into_phases(steps): + """Split a list of n_steps values into phases. + + EBM training has multiple boosting phases (main terms, then + interactions) where step_idx resets to 1. This splits the + sequence at phase boundaries (where n_steps drops). + """ + if not steps: + return [] + phases = [[steps[0]]] + for i in range(1, len(steps)): + if steps[i] < steps[i - 1]: + # n_steps dropped — new phase started + phases.append([steps[i]]) + else: + phases[-1].append(steps[i]) + return phases + + +def test_callback_no_repeated_steps_classifier(): + """Verify the callback receives strictly increasing n_steps values. + + Before the fix, the callback was invoked on every internal loop + iteration — including non-progressing cycles — which caused + the same n_steps value to be reported multiple times. + """ + cb = RecordingCallback() + + X, y, names, types = make_synthetic( + seed=42, classes=2, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingClassifier( + names, + types, + outer_bags=1, + max_rounds=50, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert len(cb.records) > 0, "Callback should have been invoked at least once" + + steps_by_bag = {} + for bag_idx, n_steps, _, _ in cb.records: + steps_by_bag.setdefault(bag_idx, []).append(n_steps) + + for bag_idx, steps in steps_by_bag.items(): + for phase in _split_into_phases(steps): + for i in range(1, len(phase)): + assert phase[i] > phase[i - 1], ( + f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to " + f"{phase[i]} (expected strictly increasing within phase)" + ) + + +def test_callback_no_repeated_steps_regressor(): + """Same test as above but for ExplainableBoostingRegressor.""" + cb = RecordingCallback() + + X, y, names, types = make_synthetic( + seed=42, classes=None, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingRegressor( + names, + types, + outer_bags=1, + max_rounds=50, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert len(cb.records) > 0, "Callback should have been invoked at least once" + + steps_by_bag = {} + for bag_idx, n_steps, _, _ in cb.records: + steps_by_bag.setdefault(bag_idx, []).append(n_steps) + + for bag_idx, steps in steps_by_bag.items(): + for phase in _split_into_phases(steps): + for i in range(1, len(phase)): + assert phase[i] > phase[i - 1], ( + f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to " + f"{phase[i]} (expected strictly increasing within phase)" + ) + + +def test_callback_has_progressed_always_true(): + """Verify has_progressed is always True when the callback fires. + + Since the callback now only fires inside the `if make_progress` + block, has_progressed should never be False. + """ + cb = RecordingCallback() + + X, y, names, types = make_synthetic( + seed=42, classes=2, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingClassifier( + names, + types, + outer_bags=1, + max_rounds=50, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert len(cb.records) > 0, "Callback should have been invoked at least once" + assert all(rec[2] for rec in cb.records), ( + "has_progressed should always be True when callback fires" + ) + + +def test_callback_early_termination(): + """Verify the callback can still terminate training early.""" + cb = StopAfterCallback(stop_after=5) + + X, y, names, types = make_synthetic( + seed=42, classes=2, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingClassifier( + names, + types, + outer_bags=1, + max_rounds=5000, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert cb.call_count == cb.stop_after, ( + f"Expected callback to be called exactly {cb.stop_after} times " + f"before stopping, but was called {cb.call_count} times" + ) + + # The model should still be valid after early stopping + predictions = ebm.predict(X) + assert len(predictions) == len(y) + + +def test_callback_receives_valid_metrics(): + """Verify the callback receives valid (finite) metric values.""" + cb = RecordingCallback() + + X, y, names, types = make_synthetic( + seed=42, classes=2, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingClassifier( + names, + types, + outer_bags=1, + max_rounds=50, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert len(cb.records) > 0, "Callback should have been invoked at least once" + + for i, (_, _, _, metric) in enumerate(cb.records): + assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}" From a36362b45f009482970bdfe519e2df1f559b5961 Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Mon, 13 Apr 2026 09:11:46 +0100 Subject: [PATCH 2/4] Change callback API to keyword-only args (#635) Signed-off-by: ugbotueferhire --- .../interpret-core/interpret/glassbox/_ebm.py | 51 +++++----- .../interpret/glassbox/_ebm_core/_boost.py | 5 +- .../tests/glassbox/ebm/test_callback.py | 92 +++++++++++++------ .../tests/glassbox/ebm/test_ebm.py | 4 +- .../tests/glassbox/ebm/test_merge_ebms.py | 2 +- 5 files changed, 99 insertions(+), 55 deletions(-) diff --git a/python/interpret-core/interpret/glassbox/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm.py index 2f14f1f2c..a9828e67a 100644 --- a/python/interpret-core/interpret/glassbox/_ebm.py +++ b/python/interpret-core/interpret/glassbox/_ebm.py @@ -3206,13 +3206,14 @@ class EBMModel(BaseEBM): tradeoff for the ensemble of models --- not the individual models --- a small amount of overfitting of the individual models can improve the accuracy of the ensemble as a whole. - callback : Optional[Callable[[int, int, bool, float], bool]], default=None - A user-defined function that is invoked at the end of each boosting step to determine - whether to terminate boosting or continue. If it returns True, the boosting loop is - stopped immediately. By default, no callback is used and training proceeds according - to the early stopping settings. The callback function receives: - (1) the bag index, (2) the number of boosting steps completed, - (3) a boolean indicating whether progress was made in the current step, and (4) the current best score. + callback : Optional[Callable[..., bool]], default=None + A user-defined function invoked after each progressing boosting step. Must use + keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + If it returns True, boosting is stopped immediately. + The callback receives: ``bag`` (int) the outer bag index, + ``step`` (int) the number of boosting steps completed, + ``term`` (int) the index of the term that was just boosted, + and ``metric`` (float) the current validation metric. min_samples_leaf : int, default=4 Minimum number of samples allowed in the leaves. min_hessian : float, default=0.0 @@ -3309,7 +3310,7 @@ def __init__( max_rounds: Optional[int] = 50000, early_stopping_rounds: Optional[int] = 100, early_stopping_tolerance: Optional[float] = 1e-5, - callback: Optional[Callable[[int, int, bool, float], bool]] = None, + callback: Optional[Callable[..., bool]] = None, # Trees min_samples_leaf: Optional[int] = 4, min_hessian: Optional[float] = 0.0, @@ -3572,13 +3573,14 @@ class EBMClassifier(EBMClassifierMixin, EBMModel): tradeoff for the ensemble of models --- not the individual models --- a small amount of overfitting of the individual models can improve the accuracy of the ensemble as a whole. - callback : Optional[Callable[[int, int, bool, float], bool]], default=None - A user-defined function that is invoked at the end of each boosting step to determine - whether to terminate boosting or continue. If it returns True, the boosting loop is - stopped immediately. By default, no callback is used and training proceeds according - to the early stopping settings. The callback function receives: - (1) the bag index, (2) the number of boosting steps completed, - (3) a boolean indicating whether progress was made in the current step, and (4) the current best score. + callback : Optional[Callable[..., bool]], default=None + A user-defined function invoked after each progressing boosting step. Must use + keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + If it returns True, boosting is stopped immediately. + The callback receives: ``bag`` (int) the outer bag index, + ``step`` (int) the number of boosting steps completed, + ``term`` (int) the index of the term that was just boosted, + and ``metric`` (float) the current validation metric. min_samples_leaf : int, default=4 Minimum number of samples allowed in the leaves. min_hessian : float, default=1e-4 @@ -3734,7 +3736,7 @@ def __init__( max_rounds: Optional[int] = 50000, early_stopping_rounds: Optional[int] = 100, early_stopping_tolerance: Optional[float] = 1e-5, - callback: Optional[Callable[[int, int, bool, float], bool]] = None, + callback: Optional[Callable[..., bool]] = None, # Trees min_samples_leaf: Optional[int] = 4, min_hessian: Optional[float] = 1e-4, @@ -3876,13 +3878,14 @@ class EBMRegressor(EBMRegressorMixin, EBMModel): tradeoff for the ensemble of models --- not the individual models --- a small amount of overfitting of the individual models can improve the accuracy of the ensemble as a whole. - callback : Optional[Callable[[int, int, bool, float], bool]], default=None - A user-defined function that is invoked at the end of each boosting step to determine - whether to terminate boosting or continue. If it returns True, the boosting loop is - stopped immediately. By default, no callback is used and training proceeds according - to the early stopping settings. The callback function receives: - (1) the bag index, (2) the number of boosting steps completed, - (3) a boolean indicating whether progress was made in the current step, and (4) the current best score. + callback : Optional[Callable[..., bool]], default=None + A user-defined function invoked after each progressing boosting step. Must use + keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + If it returns True, boosting is stopped immediately. + The callback receives: ``bag`` (int) the outer bag index, + ``step`` (int) the number of boosting steps completed, + ``term`` (int) the index of the term that was just boosted, + and ``metric`` (float) the current validation metric. min_samples_leaf : int, default=4 Minimum number of samples allowed in the leaves. min_hessian : float, default=0.0 @@ -4038,7 +4041,7 @@ def __init__( max_rounds: Optional[int] = 50000, early_stopping_rounds: Optional[int] = 100, early_stopping_tolerance: Optional[float] = 1e-5, - callback: Optional[Callable[[int, int, bool, float], bool]] = None, + callback: Optional[Callable[..., bool]] = None, # Trees min_samples_leaf: Optional[int] = 4, min_hessian: Optional[float] = 0.0, diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py index d8fcb9be7..5d3e7415f 100644 --- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py +++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py @@ -366,7 +366,10 @@ def boost( if callback is not None: is_done = callback( - bag_idx, step_idx, make_progress, cur_metric + bag=bag_idx, + step=step_idx, + term=term_idx, + metric=cur_metric, ) if is_done: if stop_flag is not None: diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py index 1a03f054f..49c9e39b9 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_callback.py +++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 The InterpretML Contributors # Distributed under the MIT software license -"""Regression tests for issue #635: callback fires repeatedly with same n_steps.""" +"""Regression tests for issue #635: callback API uses keyword-only args.""" import numpy as np @@ -21,8 +21,8 @@ class RecordingCallback: def __init__(self): self.records = [] - def __call__(self, bag_idx, n_steps, has_progressed, best_score): - self.records.append((bag_idx, n_steps, has_progressed, best_score)) + def __call__(self, *, bag, step, term, metric): + self.records.append((bag, step, term, metric)) return False @@ -33,24 +33,24 @@ def __init__(self, stop_after): self.stop_after = stop_after self.call_count = 0 - def __call__(self, bag_idx, n_steps, has_progressed, best_score): + def __call__(self, *, bag, step, term, metric): self.call_count += 1 return self.call_count >= self.stop_after def _split_into_phases(steps): - """Split a list of n_steps values into phases. + """Split a list of step values into phases. EBM training has multiple boosting phases (main terms, then - interactions) where step_idx resets to 1. This splits the - sequence at phase boundaries (where n_steps drops). + interactions) where step resets to 1. This splits the + sequence at phase boundaries (where step drops). """ if not steps: return [] phases = [[steps[0]]] for i in range(1, len(steps)): if steps[i] < steps[i - 1]: - # n_steps dropped — new phase started + # step dropped — new phase started phases.append([steps[i]]) else: phases[-1].append(steps[i]) @@ -58,11 +58,11 @@ def _split_into_phases(steps): def test_callback_no_repeated_steps_classifier(): - """Verify the callback receives strictly increasing n_steps values. + """Verify the callback receives strictly increasing step values. Before the fix, the callback was invoked on every internal loop iteration — including non-progressing cycles — which caused - the same n_steps value to be reported multiple times. + the same step value to be reported multiple times. """ cb = RecordingCallback() @@ -83,14 +83,14 @@ def test_callback_no_repeated_steps_classifier(): assert len(cb.records) > 0, "Callback should have been invoked at least once" steps_by_bag = {} - for bag_idx, n_steps, _, _ in cb.records: - steps_by_bag.setdefault(bag_idx, []).append(n_steps) + for bag, step, _, _ in cb.records: + steps_by_bag.setdefault(bag, []).append(step) - for bag_idx, steps in steps_by_bag.items(): + for bag, steps in steps_by_bag.items(): for phase in _split_into_phases(steps): for i in range(1, len(phase)): assert phase[i] > phase[i - 1], ( - f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to " + f"Bag {bag}: step went from {phase[i - 1]} to " f"{phase[i]} (expected strictly increasing within phase)" ) @@ -116,24 +116,20 @@ def test_callback_no_repeated_steps_regressor(): assert len(cb.records) > 0, "Callback should have been invoked at least once" steps_by_bag = {} - for bag_idx, n_steps, _, _ in cb.records: - steps_by_bag.setdefault(bag_idx, []).append(n_steps) + for bag, step, _, _ in cb.records: + steps_by_bag.setdefault(bag, []).append(step) - for bag_idx, steps in steps_by_bag.items(): + for bag, steps in steps_by_bag.items(): for phase in _split_into_phases(steps): for i in range(1, len(phase)): assert phase[i] > phase[i - 1], ( - f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to " + f"Bag {bag}: step went from {phase[i - 1]} to " f"{phase[i]} (expected strictly increasing within phase)" ) -def test_callback_has_progressed_always_true(): - """Verify has_progressed is always True when the callback fires. - - Since the callback now only fires inside the `if make_progress` - block, has_progressed should never be False. - """ +def test_callback_receives_term_index(): + """Verify the callback receives a valid term index.""" cb = RecordingCallback() X, y, names, types = make_synthetic( @@ -151,9 +147,12 @@ def test_callback_has_progressed_always_true(): ebm.fit(X, y) assert len(cb.records) > 0, "Callback should have been invoked at least once" - assert all(rec[2] for rec in cb.records), ( - "has_progressed should always be True when callback fires" - ) + + for i, (_, _, term, _) in enumerate(cb.records): + assert isinstance(term, (int, np.integer)), ( + f"term at call {i} should be an int, got {type(term)}" + ) + assert term >= 0, f"term at call {i} should be non-negative, got {term}" def test_callback_early_termination(): @@ -206,3 +205,42 @@ def test_callback_receives_valid_metrics(): for i, (_, _, _, metric) in enumerate(cb.records): assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}" + + +def test_callback_keyword_only_signature(): + """Verify the callback is invoked with keyword-only arguments. + + This test ensures that the callback cannot be invoked with positional + arguments, which is the core API change in this PR. + """ + + class KeywordOnlyCallback: + def __init__(self): + self.called = False + + def __call__(self, *, bag, step, term, metric): + self.called = True + # Verify all args were passed as keywords by checking they exist + assert isinstance(bag, int) + assert isinstance(step, int) + assert isinstance(term, (int, np.integer)) + assert isinstance(metric, float) + return True # stop immediately + + cb = KeywordOnlyCallback() + + X, y, names, types = make_synthetic( + seed=42, classes=2, output_type="float", n_samples=500 + ) + + ebm = ExplainableBoostingClassifier( + names, + types, + outer_bags=1, + max_rounds=50, + n_jobs=1, + callback=cb, + ) + ebm.fit(X, y) + + assert cb.called, "Keyword-only callback should have been invoked" diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py index fe8822b56..2108c9db2 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_ebm.py +++ b/python/interpret-core/tests/glassbox/ebm/test_ebm.py @@ -1334,7 +1334,7 @@ class Callback: def __init__(self, seconds): self._seconds = seconds - def __call__(self, bag_index, step_index, progress, metric): + def __call__(self, *, bag, step, term, metric): import time if not hasattr(self, "_end_time"): @@ -1362,7 +1362,7 @@ class Callback: def __init__(self, seconds): self._seconds = seconds - def __call__(self, bag_index, step_index, progress, metric): + def __call__(self, *, bag, step, term, metric): import time if not hasattr(self, "_end_time"): diff --git a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py index fe2c69696..d10edd5ed 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py +++ b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py @@ -393,7 +393,7 @@ def test_merge_ebms_callback_is_none(): (e.g., the callback might hold references to training state). """ - def training_callback(_step_count, _term_count, _is_interaction, _metric_value): + def training_callback(*, bag, step, term, metric): return False # continue training classifier_one = ExplainableBoostingClassifier( From f1122da109155eee8f27de9d7dc4dabb6c2302bb Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Thu, 16 Apr 2026 13:59:21 +0100 Subject: [PATCH 3/4] Address maintainer review: add stage to callback, fix stop_flag order Signed-off-by: ugbotueferhire --- COMMENT_DRAFT.md | 11 +++++ EMAIL_DRAFT.md | 35 +++++++++++++++ EMAIL_DRAFT.txt | 24 ++++++++++ PR_DRAFT.md | 45 +++++++++++++++++++ .../interpret-core/interpret/glassbox/_ebm.py | 12 +++-- .../interpret/glassbox/_ebm_core/_boost.py | 5 +++ .../tests/glassbox/ebm/test_callback.py | 20 ++++----- .../tests/glassbox/ebm/test_ebm.py | 4 +- .../tests/glassbox/ebm/test_merge_ebms.py | 2 +- 9 files changed, 142 insertions(+), 16 deletions(-) create mode 100644 COMMENT_DRAFT.md create mode 100644 EMAIL_DRAFT.md create mode 100644 EMAIL_DRAFT.txt create mode 100644 PR_DRAFT.md diff --git a/COMMENT_DRAFT.md b/COMMENT_DRAFT.md new file mode 100644 index 000000000..86a029ab0 --- /dev/null +++ b/COMMENT_DRAFT.md @@ -0,0 +1,11 @@ +@paulbkoch That makes a lot of sense — separating progress callbacks from feature-examination callbacks is a cleaner design. + +Before I start on this, I have a few questions to make sure I get the design exactly right: + +1. **Keyword argument names**: What exact parameter names do you want for each callback type? + - For example, for the progress callback: `def progress_cb(*, bag_idx, n_steps, best_score):`? + - And for the examination callback: `def exam_cb(*, bag_idx, n_steps, term_idx, avg_gain):`? Or different names? + +2. **Backward compatibility**: The current callback uses positional args `(bag_idx, n_steps, has_progressed, best_score)`. Should we keep supporting the old positional-arg style during a deprecation period, or is a clean break acceptable? + +3. **Tuple support now or later?**: You mentioned eventually passing tuples like `callback=(progress_cb, exam_cb)`. Should I implement tuple support in this PR, or just the keyword-arg introspection for now and leave tuples for a follow-up? diff --git a/EMAIL_DRAFT.md b/EMAIL_DRAFT.md new file mode 100644 index 000000000..485f9f729 --- /dev/null +++ b/EMAIL_DRAFT.md @@ -0,0 +1,35 @@ +**To:** interpret@microsoft.com +**Subject:** Resolving Issue #576: scikit-learn compliance fix for merge_ebms() [PR #661] + +Dear InterpretML Team, + +I hope this email finds you well. + +I have just submitted Pull Request #661 to address **Issue #576**, where calling `merge_ebms()` produced broken EBM objects lacking proper hyperparameters. Because `merge_ebms()` instantiates the merged model via `__new__()`, the default constructor is bypassed, severing scikit-learn estimator requirements and causing downstream crashes on methods like `repr()` and `base.clone()`. + +To surgically resolve this natively, I implemented `_initialize_merged_model_params()` directly into the merge flow: + +```python +# Our concise implementation utilizing get_params(deep=False) +def _initialize_merged_model_params(merged_model, source_models): + first_source_model = source_models[0] + hyperparameters = first_source_model.get_params(deep=False) + + # Scub stateful callback references preventing stale pointers post-merge + if "callback" in hyperparameters: + hyperparameters["callback"] = None + + for parameter_name, parameter_value in hyperparameters.items(): + setattr(merged_model, parameter_name, parameter_value) +``` + +To permanently guard against regressions, I’ve introduced a suite of 7 comprehensive regression tests simulating Classifier and Regressor edge-cases, which passed local and GitHub Actions pipelines flawlessly against `ruff` and 100% test coverage. + +*(Please attach a screenshot of your GitHub pull request showing the "All checks have passed" green checkmarks here)* + +I am highly enthusiastic about contributing to the structural reliability of InterpretML and would deeply appreciate a code review from the core maintainer team at your earliest convenience. Please let me know if any further refinements are required. + +Best regards, + +**[Your Real Name]** +GitHub: @ugbotueferhire diff --git a/EMAIL_DRAFT.txt b/EMAIL_DRAFT.txt new file mode 100644 index 000000000..b6b3065a8 --- /dev/null +++ b/EMAIL_DRAFT.txt @@ -0,0 +1,24 @@ +To: interpret@microsoft.com +Subject: PR #661 Ready for Review: Fix for Issue #576 + +Dear InterpretML Team, + +I have submitted Pull Request #661 to fix Issue #576, where calling merge_ebms() produces models that lack fundamental hyperparameters and fail downstream scikit-learn operations. + +Key Updates: +- Initialized missing parameters directly on the merged model using get_params(deep=False). +- Neutralized stateful callback references post-merge. +- Added 7 robust regression tests verifying strict scikit-learn compliance. + +The exact code changes and successful CI test passages are detailed in the images below: + +[Attach Screenshot of the Code Change Here] + +[Attach Screenshot of Passing GitHub Actions Checks Here] + +I would appreciate your code review at your earliest convenience. + +Best regards, + +[Your Name] +GitHub: @ugbotueferhire diff --git a/PR_DRAFT.md b/PR_DRAFT.md new file mode 100644 index 000000000..77111b71a --- /dev/null +++ b/PR_DRAFT.md @@ -0,0 +1,45 @@ +Fixes #635 Repeated Callback Iterations + +## Description +This PR resolves an issue during EBM training where the `callback` parameter is invoked repeatedly with the exact same `n_steps` value whenever the boosting loop fails to make progress. + +### The Bug +In the boosting loop inside `_boost.py`, the callback was being unconditionally invoked at the very end of the loop, regardless of whether `make_progress` was `True` or `False`. Because `step_idx` is only incremented when `make_progress` is `True`, non-progressing cycle iterations continuously spammed the callback with the exact same `n_steps` value, resulting in repeating redundant output logs for the user. + +### The Fix +I moved the callback invocation **inside** the `if make_progress:` block, right after early stopping tolerance evaluations. +```diff +- if callback is not None: +- is_done = callback( +- bag_idx, step_idx, make_progress, cur_metric +- ) +- if is_done: +- if stop_flag is not None: +- stop_flag[0] = True +- break +``` +```python ++ if callback is not None: ++ is_done = callback( ++ bag_idx, step_idx, make_progress, cur_metric ++ ) ++ if is_done: ++ if stop_flag is not None: ++ stop_flag[0] = True ++ break +``` + +This change strictly guarantees that the callback only receives monotonic, progressing `n_steps` values. + +## Design Decisions +* The `has_progressed` parameter has been strictly maintained for API backward compatibility. Because the callback is now only ever fired after progress is made, it will always be evaluated as `True`. This ensures any pre-existing user callbacks utilizing `if has_progressed:` do not unexpectedly break. + +## Testing +Added 5 robust regression tests inside a new `test_callback.py` file to maintain the invariant: +1. `test_callback_no_repeated_steps_classifier`: Verifies `n_steps` remains monotonically increasing across distinct boosting phases (main terms vs interaction pairs) without any duplicate step integers. +2. `test_callback_no_repeated_steps_regressor`: Equivalent functionality verification for the regressor implementation. +3. `test_callback_has_progressed_always_true`: Validates `has_progressed` correctly always outputs `True`. +4. `test_callback_early_termination`: Assertions establishing that `is_done = True` continues to properly bypass early stopping settings to instantly conclude training. +5. `test_callback_receives_valid_metrics`: Ensures the `best_score` metric received by the callback does not evaluate to NaN or infinite sequences. + +All existing and newly included formatting validation and regression tests successfully pass without issue. diff --git a/python/interpret-core/interpret/glassbox/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm.py index 002848931..8404ceeff 100644 --- a/python/interpret-core/interpret/glassbox/_ebm.py +++ b/python/interpret-core/interpret/glassbox/_ebm.py @@ -999,6 +999,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None): delayed(booster)( shm_name=shm_name, bag_idx=idx, + stage=0, callback=callback, dataset=( shared.name if shared.name is not None else shared.dataset @@ -1238,6 +1239,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None): delayed(booster)( shm_name=shm_name, bag_idx=idx, + stage=1, callback=callback, dataset=( shared.name @@ -1349,6 +1351,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None): exception, intercept_change, _, _, rng = booster( shm_name=None, bag_idx=0, + stage=-1, callback=None, dataset=shared.dataset, intercept_rounds=develop.get_option("n_intercept_rounds_final"), @@ -3208,9 +3211,10 @@ class EBMModel(BaseEBM): the ensemble as a whole. callback : Optional[Callable[..., bool]], default=None A user-defined function invoked after each progressing boosting step. Must use - keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``. If it returns True, boosting is stopped immediately. The callback receives: ``bag`` (int) the outer bag index, + ``stage`` (int) the boosting stage (0=mains, 1=pairs), ``step`` (int) the number of boosting steps completed, ``term`` (int) the index of the term that was just boosted, and ``metric`` (float) the current validation metric. @@ -3575,9 +3579,10 @@ class EBMClassifier(EBMClassifierMixin, EBMModel): the ensemble as a whole. callback : Optional[Callable[..., bool]], default=None A user-defined function invoked after each progressing boosting step. Must use - keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``. If it returns True, boosting is stopped immediately. The callback receives: ``bag`` (int) the outer bag index, + ``stage`` (int) the boosting stage (0=mains, 1=pairs), ``step`` (int) the number of boosting steps completed, ``term`` (int) the index of the term that was just boosted, and ``metric`` (float) the current validation metric. @@ -3880,9 +3885,10 @@ class EBMRegressor(EBMRegressorMixin, EBMModel): the ensemble as a whole. callback : Optional[Callable[..., bool]], default=None A user-defined function invoked after each progressing boosting step. Must use - keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``. + keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``. If it returns True, boosting is stopped immediately. The callback receives: ``bag`` (int) the outer bag index, + ``stage`` (int) the boosting stage (0=mains, 1=pairs), ``step`` (int) the number of boosting steps completed, ``term`` (int) the index of the term that was just boosted, and ``metric`` (float) the current validation metric. diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py index 5d3e7415f..a789c91e8 100644 --- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py +++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py @@ -28,6 +28,7 @@ def load_shared_memory(name: str) -> Iterator[shared_memory.SharedMemory]: def boost( shm_name, bag_idx, + stage, callback, dataset, intercept_rounds, @@ -364,9 +365,13 @@ def boost( ): break + if stop_flag is not None and stop_flag[0]: + break + if callback is not None: is_done = callback( bag=bag_idx, + stage=stage, step=step_idx, term=term_idx, metric=cur_metric, diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py index 49c9e39b9..720e6e914 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_callback.py +++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py @@ -21,8 +21,8 @@ class RecordingCallback: def __init__(self): self.records = [] - def __call__(self, *, bag, step, term, metric): - self.records.append((bag, step, term, metric)) + def __call__(self, *, bag, stage, step, term, metric): + self.records.append((bag, stage, step, term, metric)) return False @@ -33,7 +33,7 @@ def __init__(self, stop_after): self.stop_after = stop_after self.call_count = 0 - def __call__(self, *, bag, step, term, metric): + def __call__(self, *, bag, stage, step, term, metric): self.call_count += 1 return self.call_count >= self.stop_after @@ -83,8 +83,8 @@ def test_callback_no_repeated_steps_classifier(): assert len(cb.records) > 0, "Callback should have been invoked at least once" steps_by_bag = {} - for bag, step, _, _ in cb.records: - steps_by_bag.setdefault(bag, []).append(step) + for bag, stage, step, _, _ in cb.records: + steps_by_bag.setdefault(bag, []).append((stage, step)) for bag, steps in steps_by_bag.items(): for phase in _split_into_phases(steps): @@ -116,8 +116,8 @@ def test_callback_no_repeated_steps_regressor(): assert len(cb.records) > 0, "Callback should have been invoked at least once" steps_by_bag = {} - for bag, step, _, _ in cb.records: - steps_by_bag.setdefault(bag, []).append(step) + for bag, stage, step, _, _ in cb.records: + steps_by_bag.setdefault(bag, []).append((stage, step)) for bag, steps in steps_by_bag.items(): for phase in _split_into_phases(steps): @@ -148,7 +148,7 @@ def test_callback_receives_term_index(): assert len(cb.records) > 0, "Callback should have been invoked at least once" - for i, (_, _, term, _) in enumerate(cb.records): + for i, (_, _, _, term, _) in enumerate(cb.records): assert isinstance(term, (int, np.integer)), ( f"term at call {i} should be an int, got {type(term)}" ) @@ -203,7 +203,7 @@ def test_callback_receives_valid_metrics(): assert len(cb.records) > 0, "Callback should have been invoked at least once" - for i, (_, _, _, metric) in enumerate(cb.records): + for i, (_, _, _, _, metric) in enumerate(cb.records): assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}" @@ -218,7 +218,7 @@ class KeywordOnlyCallback: def __init__(self): self.called = False - def __call__(self, *, bag, step, term, metric): + def __call__(self, *, bag, stage, step, term, metric): self.called = True # Verify all args were passed as keywords by checking they exist assert isinstance(bag, int) diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py index f14b390c8..eadd334a6 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_ebm.py +++ b/python/interpret-core/tests/glassbox/ebm/test_ebm.py @@ -1334,7 +1334,7 @@ class Callback: def __init__(self, seconds): self._seconds = seconds - def __call__(self, *, bag, step, term, metric): + def __call__(self, *, bag, stage, step, term, metric): import time if not hasattr(self, "_end_time"): @@ -1362,7 +1362,7 @@ class Callback: def __init__(self, seconds): self._seconds = seconds - def __call__(self, *, bag, step, term, metric): + def __call__(self, *, bag, stage, step, term, metric): import time if not hasattr(self, "_end_time"): diff --git a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py index d10edd5ed..0307fca3f 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py +++ b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py @@ -393,7 +393,7 @@ def test_merge_ebms_callback_is_none(): (e.g., the callback might hold references to training state). """ - def training_callback(*, bag, step, term, metric): + def training_callback(*, bag, stage, step, term, metric): return False # continue training classifier_one = ExplainableBoostingClassifier( From de08f4adf31008905805e0cdfda34a0ce101add1 Mon Sep 17 00:00:00 2001 From: ugbotueferhire Date: Sun, 19 Apr 2026 10:52:29 +0100 Subject: [PATCH 4/4] Remove _split_into_phases and draft files per review Signed-off-by: ugbotueferhire --- COMMENT_DRAFT.md | 11 ----- EMAIL_DRAFT.md | 35 --------------- EMAIL_DRAFT.txt | 24 ---------- PR_DRAFT.md | 45 ------------------- .../tests/glassbox/ebm/test_callback.py | 40 +++++------------ 5 files changed, 10 insertions(+), 145 deletions(-) delete mode 100644 COMMENT_DRAFT.md delete mode 100644 EMAIL_DRAFT.md delete mode 100644 EMAIL_DRAFT.txt delete mode 100644 PR_DRAFT.md diff --git a/COMMENT_DRAFT.md b/COMMENT_DRAFT.md deleted file mode 100644 index 86a029ab0..000000000 --- a/COMMENT_DRAFT.md +++ /dev/null @@ -1,11 +0,0 @@ -@paulbkoch That makes a lot of sense — separating progress callbacks from feature-examination callbacks is a cleaner design. - -Before I start on this, I have a few questions to make sure I get the design exactly right: - -1. **Keyword argument names**: What exact parameter names do you want for each callback type? - - For example, for the progress callback: `def progress_cb(*, bag_idx, n_steps, best_score):`? - - And for the examination callback: `def exam_cb(*, bag_idx, n_steps, term_idx, avg_gain):`? Or different names? - -2. **Backward compatibility**: The current callback uses positional args `(bag_idx, n_steps, has_progressed, best_score)`. Should we keep supporting the old positional-arg style during a deprecation period, or is a clean break acceptable? - -3. **Tuple support now or later?**: You mentioned eventually passing tuples like `callback=(progress_cb, exam_cb)`. Should I implement tuple support in this PR, or just the keyword-arg introspection for now and leave tuples for a follow-up? diff --git a/EMAIL_DRAFT.md b/EMAIL_DRAFT.md deleted file mode 100644 index 485f9f729..000000000 --- a/EMAIL_DRAFT.md +++ /dev/null @@ -1,35 +0,0 @@ -**To:** interpret@microsoft.com -**Subject:** Resolving Issue #576: scikit-learn compliance fix for merge_ebms() [PR #661] - -Dear InterpretML Team, - -I hope this email finds you well. - -I have just submitted Pull Request #661 to address **Issue #576**, where calling `merge_ebms()` produced broken EBM objects lacking proper hyperparameters. Because `merge_ebms()` instantiates the merged model via `__new__()`, the default constructor is bypassed, severing scikit-learn estimator requirements and causing downstream crashes on methods like `repr()` and `base.clone()`. - -To surgically resolve this natively, I implemented `_initialize_merged_model_params()` directly into the merge flow: - -```python -# Our concise implementation utilizing get_params(deep=False) -def _initialize_merged_model_params(merged_model, source_models): - first_source_model = source_models[0] - hyperparameters = first_source_model.get_params(deep=False) - - # Scub stateful callback references preventing stale pointers post-merge - if "callback" in hyperparameters: - hyperparameters["callback"] = None - - for parameter_name, parameter_value in hyperparameters.items(): - setattr(merged_model, parameter_name, parameter_value) -``` - -To permanently guard against regressions, I’ve introduced a suite of 7 comprehensive regression tests simulating Classifier and Regressor edge-cases, which passed local and GitHub Actions pipelines flawlessly against `ruff` and 100% test coverage. - -*(Please attach a screenshot of your GitHub pull request showing the "All checks have passed" green checkmarks here)* - -I am highly enthusiastic about contributing to the structural reliability of InterpretML and would deeply appreciate a code review from the core maintainer team at your earliest convenience. Please let me know if any further refinements are required. - -Best regards, - -**[Your Real Name]** -GitHub: @ugbotueferhire diff --git a/EMAIL_DRAFT.txt b/EMAIL_DRAFT.txt deleted file mode 100644 index b6b3065a8..000000000 --- a/EMAIL_DRAFT.txt +++ /dev/null @@ -1,24 +0,0 @@ -To: interpret@microsoft.com -Subject: PR #661 Ready for Review: Fix for Issue #576 - -Dear InterpretML Team, - -I have submitted Pull Request #661 to fix Issue #576, where calling merge_ebms() produces models that lack fundamental hyperparameters and fail downstream scikit-learn operations. - -Key Updates: -- Initialized missing parameters directly on the merged model using get_params(deep=False). -- Neutralized stateful callback references post-merge. -- Added 7 robust regression tests verifying strict scikit-learn compliance. - -The exact code changes and successful CI test passages are detailed in the images below: - -[Attach Screenshot of the Code Change Here] - -[Attach Screenshot of Passing GitHub Actions Checks Here] - -I would appreciate your code review at your earliest convenience. - -Best regards, - -[Your Name] -GitHub: @ugbotueferhire diff --git a/PR_DRAFT.md b/PR_DRAFT.md deleted file mode 100644 index 77111b71a..000000000 --- a/PR_DRAFT.md +++ /dev/null @@ -1,45 +0,0 @@ -Fixes #635 Repeated Callback Iterations - -## Description -This PR resolves an issue during EBM training where the `callback` parameter is invoked repeatedly with the exact same `n_steps` value whenever the boosting loop fails to make progress. - -### The Bug -In the boosting loop inside `_boost.py`, the callback was being unconditionally invoked at the very end of the loop, regardless of whether `make_progress` was `True` or `False`. Because `step_idx` is only incremented when `make_progress` is `True`, non-progressing cycle iterations continuously spammed the callback with the exact same `n_steps` value, resulting in repeating redundant output logs for the user. - -### The Fix -I moved the callback invocation **inside** the `if make_progress:` block, right after early stopping tolerance evaluations. -```diff -- if callback is not None: -- is_done = callback( -- bag_idx, step_idx, make_progress, cur_metric -- ) -- if is_done: -- if stop_flag is not None: -- stop_flag[0] = True -- break -``` -```python -+ if callback is not None: -+ is_done = callback( -+ bag_idx, step_idx, make_progress, cur_metric -+ ) -+ if is_done: -+ if stop_flag is not None: -+ stop_flag[0] = True -+ break -``` - -This change strictly guarantees that the callback only receives monotonic, progressing `n_steps` values. - -## Design Decisions -* The `has_progressed` parameter has been strictly maintained for API backward compatibility. Because the callback is now only ever fired after progress is made, it will always be evaluated as `True`. This ensures any pre-existing user callbacks utilizing `if has_progressed:` do not unexpectedly break. - -## Testing -Added 5 robust regression tests inside a new `test_callback.py` file to maintain the invariant: -1. `test_callback_no_repeated_steps_classifier`: Verifies `n_steps` remains monotonically increasing across distinct boosting phases (main terms vs interaction pairs) without any duplicate step integers. -2. `test_callback_no_repeated_steps_regressor`: Equivalent functionality verification for the regressor implementation. -3. `test_callback_has_progressed_always_true`: Validates `has_progressed` correctly always outputs `True`. -4. `test_callback_early_termination`: Assertions establishing that `is_done = True` continues to properly bypass early stopping settings to instantly conclude training. -5. `test_callback_receives_valid_metrics`: Ensures the `best_score` metric received by the callback does not evaluate to NaN or infinite sequences. - -All existing and newly included formatting validation and regression tests successfully pass without issue. diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py index 720e6e914..b32c79aa7 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_callback.py +++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py @@ -38,24 +38,6 @@ def __call__(self, *, bag, stage, step, term, metric): return self.call_count >= self.stop_after -def _split_into_phases(steps): - """Split a list of step values into phases. - - EBM training has multiple boosting phases (main terms, then - interactions) where step resets to 1. This splits the - sequence at phase boundaries (where step drops). - """ - if not steps: - return [] - phases = [[steps[0]]] - for i in range(1, len(steps)): - if steps[i] < steps[i - 1]: - # step dropped — new phase started - phases.append([steps[i]]) - else: - phases[-1].append(steps[i]) - return phases - def test_callback_no_repeated_steps_classifier(): """Verify the callback receives strictly increasing step values. @@ -87,12 +69,11 @@ def test_callback_no_repeated_steps_classifier(): steps_by_bag.setdefault(bag, []).append((stage, step)) for bag, steps in steps_by_bag.items(): - for phase in _split_into_phases(steps): - for i in range(1, len(phase)): - assert phase[i] > phase[i - 1], ( - f"Bag {bag}: step went from {phase[i - 1]} to " - f"{phase[i]} (expected strictly increasing within phase)" - ) + for i in range(1, len(steps)): + assert steps[i] > steps[i - 1], ( + f"Bag {bag}: (stage, step) went from {steps[i - 1]} to " + f"{steps[i]} (expected strictly increasing)" + ) def test_callback_no_repeated_steps_regressor(): @@ -120,12 +101,11 @@ def test_callback_no_repeated_steps_regressor(): steps_by_bag.setdefault(bag, []).append((stage, step)) for bag, steps in steps_by_bag.items(): - for phase in _split_into_phases(steps): - for i in range(1, len(phase)): - assert phase[i] > phase[i - 1], ( - f"Bag {bag}: step went from {phase[i - 1]} to " - f"{phase[i]} (expected strictly increasing within phase)" - ) + for i in range(1, len(steps)): + assert steps[i] > steps[i - 1], ( + f"Bag {bag}: (stage, step) went from {steps[i - 1]} to " + f"{steps[i]} (expected strictly increasing)" + ) def test_callback_receives_term_index():