From c3e66127bd6653aa54214ceb78b4f51c909e0885 Mon Sep 17 00:00:00 2001
From: ugbotueferhire <ugbotueferhire@gmail.com>
Date: Sun, 12 Apr 2026 08:52:38 +0100
Subject: [PATCH 1/4] Fix repeated callback iterations (#635)

Signed-off-by: ugbotueferhire <ugbotueferhire@gmail.com>
---
 .../interpret/glassbox/_ebm_core/_boost.py    |  18 +-
 .../tests/glassbox/ebm/test_callback.py       | 208 ++++++++++++++++++
 2 files changed, 217 insertions(+), 9 deletions(-)
 create mode 100644 python/interpret-core/tests/glassbox/ebm/test_callback.py

diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
index 79c4ffe8c..d8fcb9be7 100644
--- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
+++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
@@ -364,18 +364,18 @@ def boost(
                                 ):
                                     break
 
+                            if callback is not None:
+                                is_done = callback(
+                                    bag_idx, step_idx, make_progress, cur_metric
+                                )
+                                if is_done:
+                                    if stop_flag is not None:
+                                        stop_flag[0] = True
+                                    break
+
                         if stop_flag is not None and stop_flag[0]:
                             break
 
-                        if callback is not None:
-                            is_done = callback(
-                                bag_idx, step_idx, make_progress, cur_metric
-                            )
-                            if is_done:
-                                if stop_flag is not None:
-                                    stop_flag[0] = True
-                                break
-
                         state_idx = state_idx + 1
                         if len(term_features) <= state_idx:
                             if smoothing_rounds > 0:
diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py
new file mode 100644
index 000000000..1a03f054f
--- /dev/null
+++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py
@@ -0,0 +1,208 @@
+# Copyright (c) 2023 The InterpretML Contributors
+# Distributed under the MIT software license
+
+"""Regression tests for issue #635: callback fires repeatedly with same n_steps."""
+
+import numpy as np
+
+from interpret.glassbox import (
+    ExplainableBoostingClassifier,
+    ExplainableBoostingRegressor,
+)
+from interpret.utils import make_synthetic
+
+
+class RecordingCallback:
+    """Picklable callback that records all invocations.
+
+    Uses n_jobs=1 in tests so that state is shared in-process.
+    """
+
+    def __init__(self):
+        self.records = []
+
+    def __call__(self, bag_idx, n_steps, has_progressed, best_score):
+        self.records.append((bag_idx, n_steps, has_progressed, best_score))
+        return False
+
+
+class StopAfterCallback:
+    """Picklable callback that stops training after N calls."""
+
+    def __init__(self, stop_after):
+        self.stop_after = stop_after
+        self.call_count = 0
+
+    def __call__(self, bag_idx, n_steps, has_progressed, best_score):
+        self.call_count += 1
+        return self.call_count >= self.stop_after
+
+
+def _split_into_phases(steps):
+    """Split a list of n_steps values into phases.
+
+    EBM training has multiple boosting phases (main terms, then
+    interactions) where step_idx resets to 1. This splits the
+    sequence at phase boundaries (where n_steps drops).
+    """
+    if not steps:
+        return []
+    phases = [[steps[0]]]
+    for i in range(1, len(steps)):
+        if steps[i] < steps[i - 1]:
+            # n_steps dropped — new phase started
+            phases.append([steps[i]])
+        else:
+            phases[-1].append(steps[i])
+    return phases
+
+
+def test_callback_no_repeated_steps_classifier():
+    """Verify the callback receives strictly increasing n_steps values.
+
+    Before the fix, the callback was invoked on every internal loop
+    iteration — including non-progressing cycles — which caused
+    the same n_steps value to be reported multiple times.
+    """
+    cb = RecordingCallback()
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=2, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingClassifier(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=50,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert len(cb.records) > 0, "Callback should have been invoked at least once"
+
+    steps_by_bag = {}
+    for bag_idx, n_steps, _, _ in cb.records:
+        steps_by_bag.setdefault(bag_idx, []).append(n_steps)
+
+    for bag_idx, steps in steps_by_bag.items():
+        for phase in _split_into_phases(steps):
+            for i in range(1, len(phase)):
+                assert phase[i] > phase[i - 1], (
+                    f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to "
+                    f"{phase[i]} (expected strictly increasing within phase)"
+                )
+
+
+def test_callback_no_repeated_steps_regressor():
+    """Same test as above but for ExplainableBoostingRegressor."""
+    cb = RecordingCallback()
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=None, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingRegressor(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=50,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert len(cb.records) > 0, "Callback should have been invoked at least once"
+
+    steps_by_bag = {}
+    for bag_idx, n_steps, _, _ in cb.records:
+        steps_by_bag.setdefault(bag_idx, []).append(n_steps)
+
+    for bag_idx, steps in steps_by_bag.items():
+        for phase in _split_into_phases(steps):
+            for i in range(1, len(phase)):
+                assert phase[i] > phase[i - 1], (
+                    f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to "
+                    f"{phase[i]} (expected strictly increasing within phase)"
+                )
+
+
+def test_callback_has_progressed_always_true():
+    """Verify has_progressed is always True when the callback fires.
+
+    Since the callback now only fires inside the `if make_progress`
+    block, has_progressed should never be False.
+    """
+    cb = RecordingCallback()
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=2, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingClassifier(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=50,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert len(cb.records) > 0, "Callback should have been invoked at least once"
+    assert all(rec[2] for rec in cb.records), (
+        "has_progressed should always be True when callback fires"
+    )
+
+
+def test_callback_early_termination():
+    """Verify the callback can still terminate training early."""
+    cb = StopAfterCallback(stop_after=5)
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=2, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingClassifier(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=5000,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert cb.call_count == cb.stop_after, (
+        f"Expected callback to be called exactly {cb.stop_after} times "
+        f"before stopping, but was called {cb.call_count} times"
+    )
+
+    # The model should still be valid after early stopping
+    predictions = ebm.predict(X)
+    assert len(predictions) == len(y)
+
+
+def test_callback_receives_valid_metrics():
+    """Verify the callback receives valid (finite) metric values."""
+    cb = RecordingCallback()
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=2, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingClassifier(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=50,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert len(cb.records) > 0, "Callback should have been invoked at least once"
+
+    for i, (_, _, _, metric) in enumerate(cb.records):
+        assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}"

From a36362b45f009482970bdfe519e2df1f559b5961 Mon Sep 17 00:00:00 2001
From: ugbotueferhire <ugbotueferhire@gmail.com>
Date: Mon, 13 Apr 2026 09:11:46 +0100
Subject: [PATCH 2/4] Change callback API to keyword-only args (#635)

Signed-off-by: ugbotueferhire <ugbotueferhire@gmail.com>
---
 .../interpret-core/interpret/glassbox/_ebm.py | 51 +++++-----
 .../interpret/glassbox/_ebm_core/_boost.py    |  5 +-
 .../tests/glassbox/ebm/test_callback.py       | 92 +++++++++++++------
 .../tests/glassbox/ebm/test_ebm.py            |  4 +-
 .../tests/glassbox/ebm/test_merge_ebms.py     |  2 +-
 5 files changed, 99 insertions(+), 55 deletions(-)

diff --git a/python/interpret-core/interpret/glassbox/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm.py
index 2f14f1f2c..a9828e67a 100644
--- a/python/interpret-core/interpret/glassbox/_ebm.py
+++ b/python/interpret-core/interpret/glassbox/_ebm.py
@@ -3206,13 +3206,14 @@ class EBMModel(BaseEBM):
         tradeoff for the ensemble of models --- not the individual models --- a small
         amount of overfitting of the individual models can improve the accuracy of
         the ensemble as a whole.
-    callback : Optional[Callable[[int, int, bool, float], bool]], default=None
-        A user-defined function that is invoked at the end of each boosting step to determine
-        whether to terminate boosting or continue. If it returns True, the boosting loop is
-        stopped immediately. By default, no callback is used and training proceeds according
-        to the early stopping settings.  The callback function receives:
-        (1) the bag index, (2) the number of boosting steps completed,
-        (3) a boolean indicating whether progress was made in the current step, and (4) the current best score.
+    callback : Optional[Callable[..., bool]], default=None
+        A user-defined function invoked after each progressing boosting step. Must use
+        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        If it returns True, boosting is stopped immediately.
+        The callback receives: ``bag`` (int) the outer bag index,
+        ``step`` (int) the number of boosting steps completed,
+        ``term`` (int) the index of the term that was just boosted,
+        and ``metric`` (float) the current validation metric.
     min_samples_leaf : int, default=4
         Minimum number of samples allowed in the leaves.
     min_hessian : float, default=0.0
@@ -3309,7 +3310,7 @@ def __init__(
         max_rounds: Optional[int] = 50000,
         early_stopping_rounds: Optional[int] = 100,
         early_stopping_tolerance: Optional[float] = 1e-5,
-        callback: Optional[Callable[[int, int, bool, float], bool]] = None,
+        callback: Optional[Callable[..., bool]] = None,
         # Trees
         min_samples_leaf: Optional[int] = 4,
         min_hessian: Optional[float] = 0.0,
@@ -3572,13 +3573,14 @@ class EBMClassifier(EBMClassifierMixin, EBMModel):
         tradeoff for the ensemble of models --- not the individual models --- a small
         amount of overfitting of the individual models can improve the accuracy of
         the ensemble as a whole.
-    callback : Optional[Callable[[int, int, bool, float], bool]], default=None
-        A user-defined function that is invoked at the end of each boosting step to determine
-        whether to terminate boosting or continue. If it returns True, the boosting loop is
-        stopped immediately. By default, no callback is used and training proceeds according
-        to the early stopping settings.  The callback function receives:
-        (1) the bag index, (2) the number of boosting steps completed,
-        (3) a boolean indicating whether progress was made in the current step, and (4) the current best score.
+    callback : Optional[Callable[..., bool]], default=None
+        A user-defined function invoked after each progressing boosting step. Must use
+        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        If it returns True, boosting is stopped immediately.
+        The callback receives: ``bag`` (int) the outer bag index,
+        ``step`` (int) the number of boosting steps completed,
+        ``term`` (int) the index of the term that was just boosted,
+        and ``metric`` (float) the current validation metric.
     min_samples_leaf : int, default=4
         Minimum number of samples allowed in the leaves.
     min_hessian : float, default=1e-4
@@ -3734,7 +3736,7 @@ def __init__(
         max_rounds: Optional[int] = 50000,
         early_stopping_rounds: Optional[int] = 100,
         early_stopping_tolerance: Optional[float] = 1e-5,
-        callback: Optional[Callable[[int, int, bool, float], bool]] = None,
+        callback: Optional[Callable[..., bool]] = None,
         # Trees
         min_samples_leaf: Optional[int] = 4,
         min_hessian: Optional[float] = 1e-4,
@@ -3876,13 +3878,14 @@ class EBMRegressor(EBMRegressorMixin, EBMModel):
         tradeoff for the ensemble of models --- not the individual models --- a small
         amount of overfitting of the individual models can improve the accuracy of
         the ensemble as a whole.
-    callback : Optional[Callable[[int, int, bool, float], bool]], default=None
-        A user-defined function that is invoked at the end of each boosting step to determine
-        whether to terminate boosting or continue. If it returns True, the boosting loop is
-        stopped immediately. By default, no callback is used and training proceeds according
-        to the early stopping settings.  The callback function receives:
-        (1) the bag index, (2) the number of boosting steps completed,
-        (3) a boolean indicating whether progress was made in the current step, and (4) the current best score.
+    callback : Optional[Callable[..., bool]], default=None
+        A user-defined function invoked after each progressing boosting step. Must use
+        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        If it returns True, boosting is stopped immediately.
+        The callback receives: ``bag`` (int) the outer bag index,
+        ``step`` (int) the number of boosting steps completed,
+        ``term`` (int) the index of the term that was just boosted,
+        and ``metric`` (float) the current validation metric.
     min_samples_leaf : int, default=4
         Minimum number of samples allowed in the leaves.
     min_hessian : float, default=0.0
@@ -4038,7 +4041,7 @@ def __init__(
         max_rounds: Optional[int] = 50000,
         early_stopping_rounds: Optional[int] = 100,
         early_stopping_tolerance: Optional[float] = 1e-5,
-        callback: Optional[Callable[[int, int, bool, float], bool]] = None,
+        callback: Optional[Callable[..., bool]] = None,
         # Trees
         min_samples_leaf: Optional[int] = 4,
         min_hessian: Optional[float] = 0.0,
diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
index d8fcb9be7..5d3e7415f 100644
--- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
+++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
@@ -366,7 +366,10 @@ def boost(
 
                             if callback is not None:
                                 is_done = callback(
-                                    bag_idx, step_idx, make_progress, cur_metric
+                                    bag=bag_idx,
+                                    step=step_idx,
+                                    term=term_idx,
+                                    metric=cur_metric,
                                 )
                                 if is_done:
                                     if stop_flag is not None:
diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py
index 1a03f054f..49c9e39b9 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_callback.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2023 The InterpretML Contributors
 # Distributed under the MIT software license
 
-"""Regression tests for issue #635: callback fires repeatedly with same n_steps."""
+"""Regression tests for issue #635: callback API uses keyword-only args."""
 
 import numpy as np
 
@@ -21,8 +21,8 @@ class RecordingCallback:
     def __init__(self):
         self.records = []
 
-    def __call__(self, bag_idx, n_steps, has_progressed, best_score):
-        self.records.append((bag_idx, n_steps, has_progressed, best_score))
+    def __call__(self, *, bag, step, term, metric):
+        self.records.append((bag, step, term, metric))
         return False
 
 
@@ -33,24 +33,24 @@ def __init__(self, stop_after):
         self.stop_after = stop_after
         self.call_count = 0
 
-    def __call__(self, bag_idx, n_steps, has_progressed, best_score):
+    def __call__(self, *, bag, step, term, metric):
         self.call_count += 1
         return self.call_count >= self.stop_after
 
 
 def _split_into_phases(steps):
-    """Split a list of n_steps values into phases.
+    """Split a list of step values into phases.
 
     EBM training has multiple boosting phases (main terms, then
-    interactions) where step_idx resets to 1. This splits the
-    sequence at phase boundaries (where n_steps drops).
+    interactions) where step resets to 1. This splits the
+    sequence at phase boundaries (where step drops).
     """
     if not steps:
         return []
     phases = [[steps[0]]]
     for i in range(1, len(steps)):
         if steps[i] < steps[i - 1]:
-            # n_steps dropped — new phase started
+            # step dropped — new phase started
             phases.append([steps[i]])
         else:
             phases[-1].append(steps[i])
@@ -58,11 +58,11 @@ def _split_into_phases(steps):
 
 
 def test_callback_no_repeated_steps_classifier():
-    """Verify the callback receives strictly increasing n_steps values.
+    """Verify the callback receives strictly increasing step values.
 
     Before the fix, the callback was invoked on every internal loop
     iteration — including non-progressing cycles — which caused
-    the same n_steps value to be reported multiple times.
+    the same step value to be reported multiple times.
     """
     cb = RecordingCallback()
 
@@ -83,14 +83,14 @@ def test_callback_no_repeated_steps_classifier():
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
     steps_by_bag = {}
-    for bag_idx, n_steps, _, _ in cb.records:
-        steps_by_bag.setdefault(bag_idx, []).append(n_steps)
+    for bag, step, _, _ in cb.records:
+        steps_by_bag.setdefault(bag, []).append(step)
 
-    for bag_idx, steps in steps_by_bag.items():
+    for bag, steps in steps_by_bag.items():
         for phase in _split_into_phases(steps):
             for i in range(1, len(phase)):
                 assert phase[i] > phase[i - 1], (
-                    f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to "
+                    f"Bag {bag}: step went from {phase[i - 1]} to "
                     f"{phase[i]} (expected strictly increasing within phase)"
                 )
 
@@ -116,24 +116,20 @@ def test_callback_no_repeated_steps_regressor():
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
     steps_by_bag = {}
-    for bag_idx, n_steps, _, _ in cb.records:
-        steps_by_bag.setdefault(bag_idx, []).append(n_steps)
+    for bag, step, _, _ in cb.records:
+        steps_by_bag.setdefault(bag, []).append(step)
 
-    for bag_idx, steps in steps_by_bag.items():
+    for bag, steps in steps_by_bag.items():
         for phase in _split_into_phases(steps):
             for i in range(1, len(phase)):
                 assert phase[i] > phase[i - 1], (
-                    f"Bag {bag_idx}: n_steps went from {phase[i - 1]} to "
+                    f"Bag {bag}: step went from {phase[i - 1]} to "
                     f"{phase[i]} (expected strictly increasing within phase)"
                 )
 
 
-def test_callback_has_progressed_always_true():
-    """Verify has_progressed is always True when the callback fires.
-
-    Since the callback now only fires inside the `if make_progress`
-    block, has_progressed should never be False.
-    """
+def test_callback_receives_term_index():
+    """Verify the callback receives a valid term index."""
     cb = RecordingCallback()
 
     X, y, names, types = make_synthetic(
@@ -151,9 +147,12 @@ def test_callback_has_progressed_always_true():
     ebm.fit(X, y)
 
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
-    assert all(rec[2] for rec in cb.records), (
-        "has_progressed should always be True when callback fires"
-    )
+
+    for i, (_, _, term, _) in enumerate(cb.records):
+        assert isinstance(term, (int, np.integer)), (
+            f"term at call {i} should be an int, got {type(term)}"
+        )
+        assert term >= 0, f"term at call {i} should be non-negative, got {term}"
 
 
 def test_callback_early_termination():
@@ -206,3 +205,42 @@ def test_callback_receives_valid_metrics():
 
     for i, (_, _, _, metric) in enumerate(cb.records):
         assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}"
+
+
+def test_callback_keyword_only_signature():
+    """Verify the callback is invoked with keyword-only arguments.
+
+    This test ensures that the callback cannot be invoked with positional
+    arguments, which is the core API change in this PR.
+    """
+
+    class KeywordOnlyCallback:
+        def __init__(self):
+            self.called = False
+
+        def __call__(self, *, bag, step, term, metric):
+            self.called = True
+            # Verify all args were passed as keywords by checking they exist
+            assert isinstance(bag, int)
+            assert isinstance(step, int)
+            assert isinstance(term, (int, np.integer))
+            assert isinstance(metric, float)
+            return True  # stop immediately
+
+    cb = KeywordOnlyCallback()
+
+    X, y, names, types = make_synthetic(
+        seed=42, classes=2, output_type="float", n_samples=500
+    )
+
+    ebm = ExplainableBoostingClassifier(
+        names,
+        types,
+        outer_bags=1,
+        max_rounds=50,
+        n_jobs=1,
+        callback=cb,
+    )
+    ebm.fit(X, y)
+
+    assert cb.called, "Keyword-only callback should have been invoked"
diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
index fe8822b56..2108c9db2 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_ebm.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
@@ -1334,7 +1334,7 @@ class Callback:
             def __init__(self, seconds):
                 self._seconds = seconds
 
-            def __call__(self, bag_index, step_index, progress, metric):
+            def __call__(self, *, bag, step, term, metric):
                 import time
 
                 if not hasattr(self, "_end_time"):
@@ -1362,7 +1362,7 @@ class Callback:
             def __init__(self, seconds):
                 self._seconds = seconds
 
-            def __call__(self, bag_index, step_index, progress, metric):
+            def __call__(self, *, bag, step, term, metric):
                 import time
 
                 if not hasattr(self, "_end_time"):
diff --git a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
index fe2c69696..d10edd5ed 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
@@ -393,7 +393,7 @@ def test_merge_ebms_callback_is_none():
     (e.g., the callback might hold references to training state).
     """
 
-    def training_callback(_step_count, _term_count, _is_interaction, _metric_value):
+    def training_callback(*, bag, step, term, metric):
         return False  # continue training
 
     classifier_one = ExplainableBoostingClassifier(

From f1122da109155eee8f27de9d7dc4dabb6c2302bb Mon Sep 17 00:00:00 2001
From: ugbotueferhire <ugbotueferhire@gmail.com>
Date: Thu, 16 Apr 2026 13:59:21 +0100
Subject: [PATCH 3/4] Address maintainer review: add stage to callback, fix
 stop_flag order

Signed-off-by: ugbotueferhire <ugbotueferhire@gmail.com>
---
 COMMENT_DRAFT.md                              | 11 +++++
 EMAIL_DRAFT.md                                | 35 +++++++++++++++
 EMAIL_DRAFT.txt                               | 24 ++++++++++
 PR_DRAFT.md                                   | 45 +++++++++++++++++++
 .../interpret-core/interpret/glassbox/_ebm.py | 12 +++--
 .../interpret/glassbox/_ebm_core/_boost.py    |  5 +++
 .../tests/glassbox/ebm/test_callback.py       | 20 ++++-----
 .../tests/glassbox/ebm/test_ebm.py            |  4 +-
 .../tests/glassbox/ebm/test_merge_ebms.py     |  2 +-
 9 files changed, 142 insertions(+), 16 deletions(-)
 create mode 100644 COMMENT_DRAFT.md
 create mode 100644 EMAIL_DRAFT.md
 create mode 100644 EMAIL_DRAFT.txt
 create mode 100644 PR_DRAFT.md

diff --git a/COMMENT_DRAFT.md b/COMMENT_DRAFT.md
new file mode 100644
index 000000000..86a029ab0
--- /dev/null
+++ b/COMMENT_DRAFT.md
@@ -0,0 +1,11 @@
+@paulbkoch That makes a lot of sense — separating progress callbacks from feature-examination callbacks is a cleaner design. 
+
+Before I start on this, I have a few questions to make sure I get the design exactly right:
+
+1. **Keyword argument names**: What exact parameter names do you want for each callback type? 
+   - For example, for the progress callback: `def progress_cb(*, bag_idx, n_steps, best_score):`? 
+   - And for the examination callback: `def exam_cb(*, bag_idx, n_steps, term_idx, avg_gain):`? Or different names?
+
+2. **Backward compatibility**: The current callback uses positional args `(bag_idx, n_steps, has_progressed, best_score)`. Should we keep supporting the old positional-arg style during a deprecation period, or is a clean break acceptable?
+
+3. **Tuple support now or later?**: You mentioned eventually passing tuples like `callback=(progress_cb, exam_cb)`. Should I implement tuple support in this PR, or just the keyword-arg introspection for now and leave tuples for a follow-up?
diff --git a/EMAIL_DRAFT.md b/EMAIL_DRAFT.md
new file mode 100644
index 000000000..485f9f729
--- /dev/null
+++ b/EMAIL_DRAFT.md
@@ -0,0 +1,35 @@
+**To:** interpret@microsoft.com
+**Subject:** Resolving Issue #576: scikit-learn compliance fix for merge_ebms() [PR #661]
+
+Dear InterpretML Team,
+
+I hope this email finds you well.
+
+I have just submitted Pull Request #661 to address **Issue #576**, where calling `merge_ebms()` produced broken EBM objects lacking proper hyperparameters. Because `merge_ebms()` instantiates the merged model via `__new__()`, the default constructor is bypassed, severing scikit-learn estimator requirements and causing downstream crashes on methods like `repr()` and `base.clone()`. 
+
+To surgically resolve this natively, I implemented `_initialize_merged_model_params()` directly into the merge flow:
+
+```python
+# Our concise implementation utilizing get_params(deep=False)
+def _initialize_merged_model_params(merged_model, source_models):
+    first_source_model = source_models[0]
+    hyperparameters = first_source_model.get_params(deep=False)
+
+    # Scub stateful callback references preventing stale pointers post-merge
+    if "callback" in hyperparameters:
+        hyperparameters["callback"] = None
+
+    for parameter_name, parameter_value in hyperparameters.items():
+        setattr(merged_model, parameter_name, parameter_value)
+```
+
+To permanently guard against regressions, I’ve introduced a suite of 7 comprehensive regression tests simulating Classifier and Regressor edge-cases, which passed local and GitHub Actions pipelines flawlessly against `ruff` and 100% test coverage.
+
+*(Please attach a screenshot of your GitHub pull request showing the "All checks have passed" green checkmarks here)*
+
+I am highly enthusiastic about contributing to the structural reliability of InterpretML and would deeply appreciate a code review from the core maintainer team at your earliest convenience. Please let me know if any further refinements are required.
+
+Best regards,
+
+**[Your Real Name]**
+GitHub: @ugbotueferhire
diff --git a/EMAIL_DRAFT.txt b/EMAIL_DRAFT.txt
new file mode 100644
index 000000000..b6b3065a8
--- /dev/null
+++ b/EMAIL_DRAFT.txt
@@ -0,0 +1,24 @@
+To: interpret@microsoft.com
+Subject: PR #661 Ready for Review: Fix for Issue #576
+
+Dear InterpretML Team,
+
+I have submitted Pull Request #661 to fix Issue #576, where calling merge_ebms() produces models that lack fundamental hyperparameters and fail downstream scikit-learn operations.
+
+Key Updates:
+- Initialized missing parameters directly on the merged model using get_params(deep=False).
+- Neutralized stateful callback references post-merge.
+- Added 7 robust regression tests verifying strict scikit-learn compliance.
+
+The exact code changes and successful CI test passages are detailed in the images below:
+
+[Attach Screenshot of the Code Change Here]
+
+[Attach Screenshot of Passing GitHub Actions Checks Here]
+
+I would appreciate your code review at your earliest convenience. 
+
+Best regards,
+
+[Your Name]
+GitHub: @ugbotueferhire
diff --git a/PR_DRAFT.md b/PR_DRAFT.md
new file mode 100644
index 000000000..77111b71a
--- /dev/null
+++ b/PR_DRAFT.md
@@ -0,0 +1,45 @@
+Fixes #635 Repeated Callback Iterations
+
+## Description
+This PR resolves an issue during EBM training where the `callback` parameter is invoked repeatedly with the exact same `n_steps` value whenever the boosting loop fails to make progress.
+
+### The Bug
+In the boosting loop inside `_boost.py`, the callback was being unconditionally invoked at the very end of the loop, regardless of whether `make_progress` was `True` or `False`. Because `step_idx` is only incremented when `make_progress` is `True`, non-progressing cycle iterations continuously spammed the callback with the exact same `n_steps` value, resulting in repeating redundant output logs for the user.
+
+### The Fix
+I moved the callback invocation **inside** the `if make_progress:` block, right after early stopping tolerance evaluations.
+```diff
+-                        if callback is not None:
+-                            is_done = callback(
+-                                bag_idx, step_idx, make_progress, cur_metric
+-                            )
+-                            if is_done:
+-                                if stop_flag is not None:
+-                                    stop_flag[0] = True
+-                                break
+```
+```python
++                            if callback is not None:
++                                is_done = callback(
++                                    bag_idx, step_idx, make_progress, cur_metric
++                                )
++                                if is_done:
++                                    if stop_flag is not None:
++                                        stop_flag[0] = True
++                                    break
+```
+
+This change strictly guarantees that the callback only receives monotonic, progressing `n_steps` values.
+
+## Design Decisions
+* The `has_progressed` parameter has been strictly maintained for API backward compatibility. Because the callback is now only ever fired after progress is made, it will always be evaluated as `True`. This ensures any pre-existing user callbacks utilizing `if has_progressed:` do not unexpectedly break.
+
+## Testing
+Added 5 robust regression tests inside a new `test_callback.py` file to maintain the invariant:
+1. `test_callback_no_repeated_steps_classifier`: Verifies `n_steps` remains monotonically increasing across distinct boosting phases (main terms vs interaction pairs) without any duplicate step integers.
+2. `test_callback_no_repeated_steps_regressor`: Equivalent functionality verification for the regressor implementation.
+3. `test_callback_has_progressed_always_true`: Validates `has_progressed` correctly always outputs `True`.
+4. `test_callback_early_termination`: Assertions establishing that `is_done = True` continues to properly bypass early stopping settings to instantly conclude training.
+5. `test_callback_receives_valid_metrics`: Ensures the `best_score` metric received by the callback does not evaluate to NaN or infinite sequences.
+
+All existing and newly included formatting validation and regression tests successfully pass without issue.
diff --git a/python/interpret-core/interpret/glassbox/_ebm.py b/python/interpret-core/interpret/glassbox/_ebm.py
index 002848931..8404ceeff 100644
--- a/python/interpret-core/interpret/glassbox/_ebm.py
+++ b/python/interpret-core/interpret/glassbox/_ebm.py
@@ -999,6 +999,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                     delayed(booster)(
                         shm_name=shm_name,
                         bag_idx=idx,
+                        stage=0,
                         callback=callback,
                         dataset=(
                             shared.name if shared.name is not None else shared.dataset
@@ -1238,6 +1239,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                         delayed(booster)(
                             shm_name=shm_name,
                             bag_idx=idx,
+                            stage=1,
                             callback=callback,
                             dataset=(
                                 shared.name
@@ -1349,6 +1351,7 @@ def fit(self, X, y, sample_weight=None, bags=None, init_score=None):
                     exception, intercept_change, _, _, rng = booster(
                         shm_name=None,
                         bag_idx=0,
+                        stage=-1,
                         callback=None,
                         dataset=shared.dataset,
                         intercept_rounds=develop.get_option("n_intercept_rounds_final"),
@@ -3208,9 +3211,10 @@ class EBMModel(BaseEBM):
         the ensemble as a whole.
     callback : Optional[Callable[..., bool]], default=None
         A user-defined function invoked after each progressing boosting step. Must use
-        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``.
         If it returns True, boosting is stopped immediately.
         The callback receives: ``bag`` (int) the outer bag index,
+        ``stage`` (int) the boosting stage (0=mains, 1=pairs),
         ``step`` (int) the number of boosting steps completed,
         ``term`` (int) the index of the term that was just boosted,
         and ``metric`` (float) the current validation metric.
@@ -3575,9 +3579,10 @@ class EBMClassifier(EBMClassifierMixin, EBMModel):
         the ensemble as a whole.
     callback : Optional[Callable[..., bool]], default=None
         A user-defined function invoked after each progressing boosting step. Must use
-        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``.
         If it returns True, boosting is stopped immediately.
         The callback receives: ``bag`` (int) the outer bag index,
+        ``stage`` (int) the boosting stage (0=mains, 1=pairs),
         ``step`` (int) the number of boosting steps completed,
         ``term`` (int) the index of the term that was just boosted,
         and ``metric`` (float) the current validation metric.
@@ -3880,9 +3885,10 @@ class EBMRegressor(EBMRegressorMixin, EBMModel):
         the ensemble as a whole.
     callback : Optional[Callable[..., bool]], default=None
         A user-defined function invoked after each progressing boosting step. Must use
-        keyword-only arguments: ``def my_callback(*, bag, step, term, metric)``.
+        keyword-only arguments: ``def my_callback(*, bag, stage, step, term, metric)``.
         If it returns True, boosting is stopped immediately.
         The callback receives: ``bag`` (int) the outer bag index,
+        ``stage`` (int) the boosting stage (0=mains, 1=pairs),
         ``step`` (int) the number of boosting steps completed,
         ``term`` (int) the index of the term that was just boosted,
         and ``metric`` (float) the current validation metric.
diff --git a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
index 5d3e7415f..a789c91e8 100644
--- a/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
+++ b/python/interpret-core/interpret/glassbox/_ebm_core/_boost.py
@@ -28,6 +28,7 @@ def load_shared_memory(name: str) -> Iterator[shared_memory.SharedMemory]:
 def boost(
     shm_name,
     bag_idx,
+    stage,
     callback,
     dataset,
     intercept_rounds,
@@ -364,9 +365,13 @@ def boost(
                                 ):
                                     break
 
+                            if stop_flag is not None and stop_flag[0]:
+                                break
+
                             if callback is not None:
                                 is_done = callback(
                                     bag=bag_idx,
+                                    stage=stage,
                                     step=step_idx,
                                     term=term_idx,
                                     metric=cur_metric,
diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py
index 49c9e39b9..720e6e914 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_callback.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py
@@ -21,8 +21,8 @@ class RecordingCallback:
     def __init__(self):
         self.records = []
 
-    def __call__(self, *, bag, step, term, metric):
-        self.records.append((bag, step, term, metric))
+    def __call__(self, *, bag, stage, step, term, metric):
+        self.records.append((bag, stage, step, term, metric))
         return False
 
 
@@ -33,7 +33,7 @@ def __init__(self, stop_after):
         self.stop_after = stop_after
         self.call_count = 0
 
-    def __call__(self, *, bag, step, term, metric):
+    def __call__(self, *, bag, stage, step, term, metric):
         self.call_count += 1
         return self.call_count >= self.stop_after
 
@@ -83,8 +83,8 @@ def test_callback_no_repeated_steps_classifier():
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
     steps_by_bag = {}
-    for bag, step, _, _ in cb.records:
-        steps_by_bag.setdefault(bag, []).append(step)
+    for bag, stage, step, _, _ in cb.records:
+        steps_by_bag.setdefault(bag, []).append((stage, step))
 
     for bag, steps in steps_by_bag.items():
         for phase in _split_into_phases(steps):
@@ -116,8 +116,8 @@ def test_callback_no_repeated_steps_regressor():
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
     steps_by_bag = {}
-    for bag, step, _, _ in cb.records:
-        steps_by_bag.setdefault(bag, []).append(step)
+    for bag, stage, step, _, _ in cb.records:
+        steps_by_bag.setdefault(bag, []).append((stage, step))
 
     for bag, steps in steps_by_bag.items():
         for phase in _split_into_phases(steps):
@@ -148,7 +148,7 @@ def test_callback_receives_term_index():
 
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
-    for i, (_, _, term, _) in enumerate(cb.records):
+    for i, (_, _, _, term, _) in enumerate(cb.records):
         assert isinstance(term, (int, np.integer)), (
             f"term at call {i} should be an int, got {type(term)}"
         )
@@ -203,7 +203,7 @@ def test_callback_receives_valid_metrics():
 
     assert len(cb.records) > 0, "Callback should have been invoked at least once"
 
-    for i, (_, _, _, metric) in enumerate(cb.records):
+    for i, (_, _, _, _, metric) in enumerate(cb.records):
         assert np.isfinite(metric), f"Metric at step {i} is not finite: {metric}"
 
 
@@ -218,7 +218,7 @@ class KeywordOnlyCallback:
         def __init__(self):
             self.called = False
 
-        def __call__(self, *, bag, step, term, metric):
+        def __call__(self, *, bag, stage, step, term, metric):
             self.called = True
             # Verify all args were passed as keywords by checking they exist
             assert isinstance(bag, int)
diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
index f14b390c8..eadd334a6 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_ebm.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_ebm.py
@@ -1334,7 +1334,7 @@ class Callback:
             def __init__(self, seconds):
                 self._seconds = seconds
 
-            def __call__(self, *, bag, step, term, metric):
+            def __call__(self, *, bag, stage, step, term, metric):
                 import time
 
                 if not hasattr(self, "_end_time"):
@@ -1362,7 +1362,7 @@ class Callback:
             def __init__(self, seconds):
                 self._seconds = seconds
 
-            def __call__(self, *, bag, step, term, metric):
+            def __call__(self, *, bag, stage, step, term, metric):
                 import time
 
                 if not hasattr(self, "_end_time"):
diff --git a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
index d10edd5ed..0307fca3f 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_merge_ebms.py
@@ -393,7 +393,7 @@ def test_merge_ebms_callback_is_none():
     (e.g., the callback might hold references to training state).
     """
 
-    def training_callback(*, bag, step, term, metric):
+    def training_callback(*, bag, stage, step, term, metric):
         return False  # continue training
 
     classifier_one = ExplainableBoostingClassifier(

From de08f4adf31008905805e0cdfda34a0ce101add1 Mon Sep 17 00:00:00 2001
From: ugbotueferhire <ugbotueferhire@gmail.com>
Date: Sun, 19 Apr 2026 10:52:29 +0100
Subject: [PATCH 4/4] Remove _split_into_phases and draft files per review

Signed-off-by: ugbotueferhire <ugbotueferhire@gmail.com>
---
 COMMENT_DRAFT.md                              | 11 -----
 EMAIL_DRAFT.md                                | 35 ---------------
 EMAIL_DRAFT.txt                               | 24 ----------
 PR_DRAFT.md                                   | 45 -------------------
 .../tests/glassbox/ebm/test_callback.py       | 40 +++++------------
 5 files changed, 10 insertions(+), 145 deletions(-)
 delete mode 100644 COMMENT_DRAFT.md
 delete mode 100644 EMAIL_DRAFT.md
 delete mode 100644 EMAIL_DRAFT.txt
 delete mode 100644 PR_DRAFT.md

diff --git a/COMMENT_DRAFT.md b/COMMENT_DRAFT.md
deleted file mode 100644
index 86a029ab0..000000000
--- a/COMMENT_DRAFT.md
+++ /dev/null
@@ -1,11 +0,0 @@
-@paulbkoch That makes a lot of sense — separating progress callbacks from feature-examination callbacks is a cleaner design. 
-
-Before I start on this, I have a few questions to make sure I get the design exactly right:
-
-1. **Keyword argument names**: What exact parameter names do you want for each callback type? 
-   - For example, for the progress callback: `def progress_cb(*, bag_idx, n_steps, best_score):`? 
-   - And for the examination callback: `def exam_cb(*, bag_idx, n_steps, term_idx, avg_gain):`? Or different names?
-
-2. **Backward compatibility**: The current callback uses positional args `(bag_idx, n_steps, has_progressed, best_score)`. Should we keep supporting the old positional-arg style during a deprecation period, or is a clean break acceptable?
-
-3. **Tuple support now or later?**: You mentioned eventually passing tuples like `callback=(progress_cb, exam_cb)`. Should I implement tuple support in this PR, or just the keyword-arg introspection for now and leave tuples for a follow-up?
diff --git a/EMAIL_DRAFT.md b/EMAIL_DRAFT.md
deleted file mode 100644
index 485f9f729..000000000
--- a/EMAIL_DRAFT.md
+++ /dev/null
@@ -1,35 +0,0 @@
-**To:** interpret@microsoft.com
-**Subject:** Resolving Issue #576: scikit-learn compliance fix for merge_ebms() [PR #661]
-
-Dear InterpretML Team,
-
-I hope this email finds you well.
-
-I have just submitted Pull Request #661 to address **Issue #576**, where calling `merge_ebms()` produced broken EBM objects lacking proper hyperparameters. Because `merge_ebms()` instantiates the merged model via `__new__()`, the default constructor is bypassed, severing scikit-learn estimator requirements and causing downstream crashes on methods like `repr()` and `base.clone()`. 
-
-To surgically resolve this natively, I implemented `_initialize_merged_model_params()` directly into the merge flow:
-
-```python
-# Our concise implementation utilizing get_params(deep=False)
-def _initialize_merged_model_params(merged_model, source_models):
-    first_source_model = source_models[0]
-    hyperparameters = first_source_model.get_params(deep=False)
-
-    # Scub stateful callback references preventing stale pointers post-merge
-    if "callback" in hyperparameters:
-        hyperparameters["callback"] = None
-
-    for parameter_name, parameter_value in hyperparameters.items():
-        setattr(merged_model, parameter_name, parameter_value)
-```
-
-To permanently guard against regressions, I’ve introduced a suite of 7 comprehensive regression tests simulating Classifier and Regressor edge-cases, which passed local and GitHub Actions pipelines flawlessly against `ruff` and 100% test coverage.
-
-*(Please attach a screenshot of your GitHub pull request showing the "All checks have passed" green checkmarks here)*
-
-I am highly enthusiastic about contributing to the structural reliability of InterpretML and would deeply appreciate a code review from the core maintainer team at your earliest convenience. Please let me know if any further refinements are required.
-
-Best regards,
-
-**[Your Real Name]**
-GitHub: @ugbotueferhire
diff --git a/EMAIL_DRAFT.txt b/EMAIL_DRAFT.txt
deleted file mode 100644
index b6b3065a8..000000000
--- a/EMAIL_DRAFT.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-To: interpret@microsoft.com
-Subject: PR #661 Ready for Review: Fix for Issue #576
-
-Dear InterpretML Team,
-
-I have submitted Pull Request #661 to fix Issue #576, where calling merge_ebms() produces models that lack fundamental hyperparameters and fail downstream scikit-learn operations.
-
-Key Updates:
-- Initialized missing parameters directly on the merged model using get_params(deep=False).
-- Neutralized stateful callback references post-merge.
-- Added 7 robust regression tests verifying strict scikit-learn compliance.
-
-The exact code changes and successful CI test passages are detailed in the images below:
-
-[Attach Screenshot of the Code Change Here]
-
-[Attach Screenshot of Passing GitHub Actions Checks Here]
-
-I would appreciate your code review at your earliest convenience. 
-
-Best regards,
-
-[Your Name]
-GitHub: @ugbotueferhire
diff --git a/PR_DRAFT.md b/PR_DRAFT.md
deleted file mode 100644
index 77111b71a..000000000
--- a/PR_DRAFT.md
+++ /dev/null
@@ -1,45 +0,0 @@
-Fixes #635 Repeated Callback Iterations
-
-## Description
-This PR resolves an issue during EBM training where the `callback` parameter is invoked repeatedly with the exact same `n_steps` value whenever the boosting loop fails to make progress.
-
-### The Bug
-In the boosting loop inside `_boost.py`, the callback was being unconditionally invoked at the very end of the loop, regardless of whether `make_progress` was `True` or `False`. Because `step_idx` is only incremented when `make_progress` is `True`, non-progressing cycle iterations continuously spammed the callback with the exact same `n_steps` value, resulting in repeating redundant output logs for the user.
-
-### The Fix
-I moved the callback invocation **inside** the `if make_progress:` block, right after early stopping tolerance evaluations.
-```diff
--                        if callback is not None:
--                            is_done = callback(
--                                bag_idx, step_idx, make_progress, cur_metric
--                            )
--                            if is_done:
--                                if stop_flag is not None:
--                                    stop_flag[0] = True
--                                break
-```
-```python
-+                            if callback is not None:
-+                                is_done = callback(
-+                                    bag_idx, step_idx, make_progress, cur_metric
-+                                )
-+                                if is_done:
-+                                    if stop_flag is not None:
-+                                        stop_flag[0] = True
-+                                    break
-```
-
-This change strictly guarantees that the callback only receives monotonic, progressing `n_steps` values.
-
-## Design Decisions
-* The `has_progressed` parameter has been strictly maintained for API backward compatibility. Because the callback is now only ever fired after progress is made, it will always be evaluated as `True`. This ensures any pre-existing user callbacks utilizing `if has_progressed:` do not unexpectedly break.
-
-## Testing
-Added 5 robust regression tests inside a new `test_callback.py` file to maintain the invariant:
-1. `test_callback_no_repeated_steps_classifier`: Verifies `n_steps` remains monotonically increasing across distinct boosting phases (main terms vs interaction pairs) without any duplicate step integers.
-2. `test_callback_no_repeated_steps_regressor`: Equivalent functionality verification for the regressor implementation.
-3. `test_callback_has_progressed_always_true`: Validates `has_progressed` correctly always outputs `True`.
-4. `test_callback_early_termination`: Assertions establishing that `is_done = True` continues to properly bypass early stopping settings to instantly conclude training.
-5. `test_callback_receives_valid_metrics`: Ensures the `best_score` metric received by the callback does not evaluate to NaN or infinite sequences.
-
-All existing and newly included formatting validation and regression tests successfully pass without issue.
diff --git a/python/interpret-core/tests/glassbox/ebm/test_callback.py b/python/interpret-core/tests/glassbox/ebm/test_callback.py
index 720e6e914..b32c79aa7 100644
--- a/python/interpret-core/tests/glassbox/ebm/test_callback.py
+++ b/python/interpret-core/tests/glassbox/ebm/test_callback.py
@@ -38,24 +38,6 @@ def __call__(self, *, bag, stage, step, term, metric):
         return self.call_count >= self.stop_after
 
 
-def _split_into_phases(steps):
-    """Split a list of step values into phases.
-
-    EBM training has multiple boosting phases (main terms, then
-    interactions) where step resets to 1. This splits the
-    sequence at phase boundaries (where step drops).
-    """
-    if not steps:
-        return []
-    phases = [[steps[0]]]
-    for i in range(1, len(steps)):
-        if steps[i] < steps[i - 1]:
-            # step dropped — new phase started
-            phases.append([steps[i]])
-        else:
-            phases[-1].append(steps[i])
-    return phases
-
 
 def test_callback_no_repeated_steps_classifier():
     """Verify the callback receives strictly increasing step values.
@@ -87,12 +69,11 @@ def test_callback_no_repeated_steps_classifier():
         steps_by_bag.setdefault(bag, []).append((stage, step))
 
     for bag, steps in steps_by_bag.items():
-        for phase in _split_into_phases(steps):
-            for i in range(1, len(phase)):
-                assert phase[i] > phase[i - 1], (
-                    f"Bag {bag}: step went from {phase[i - 1]} to "
-                    f"{phase[i]} (expected strictly increasing within phase)"
-                )
+        for i in range(1, len(steps)):
+            assert steps[i] > steps[i - 1], (
+                f"Bag {bag}: (stage, step) went from {steps[i - 1]} to "
+                f"{steps[i]} (expected strictly increasing)"
+            )
 
 
 def test_callback_no_repeated_steps_regressor():
@@ -120,12 +101,11 @@ def test_callback_no_repeated_steps_regressor():
         steps_by_bag.setdefault(bag, []).append((stage, step))
 
     for bag, steps in steps_by_bag.items():
-        for phase in _split_into_phases(steps):
-            for i in range(1, len(phase)):
-                assert phase[i] > phase[i - 1], (
-                    f"Bag {bag}: step went from {phase[i - 1]} to "
-                    f"{phase[i]} (expected strictly increasing within phase)"
-                )
+        for i in range(1, len(steps)):
+            assert steps[i] > steps[i - 1], (
+                f"Bag {bag}: (stage, step) went from {steps[i - 1]} to "
+                f"{steps[i]} (expected strictly increasing)"
+            )
 
 
 def test_callback_receives_term_index():