[BUG] fix _score_params + add tests + fix errors 'masked' by the 'try... except'-block (#237)

SimonBlanke · web-flow · commit b8ecdf1055fd · 2026-02-20T16:18:12.000+01:00
### Bug fix: `_score_params` passed parameters incorrectly

`_score_params` called `experiment(**params)`, unpacking the dict as
keyword arguments. `BaseExperiment.__call__` expects a single positional
dict, so every call raised a `TypeError` that was silently swallowed by
the bare `except` clause. All candidates received `error_score` (NaN)
and `np.argmax` always picked index 0, making optimization results
appear stable but entirely meaningless.

Fixed by changing `experiment(**params)` to `experiment(params)`. The
`except` clause now also emits a `warnings.warn` so caught exceptions
are no longer silent.

### Fix: deterministic test parameters for `TSCOptCV`

The corrected `_score_params` exposed non-determinism in
`TSCOptCV.get_test_params`: the `"stratified"` DummyClassifier strategy
produces random predictions, and the default CV used `shuffle=True`
without a fixed seed. Together these caused `test_fit_idempotent` to
fail intermittently.

Fixed by replacing `"stratified"` with `"prior"` (deterministic) and
pinning `cv=KFold(n_splits=2, shuffle=False)`.

### Enhancement: `_predict_proba` override for `TSCOptCV`

`TSCOptCV` overrode `_predict` with a `refit=False` guard but left
`_predict_proba` unoverridden. With `refit=False`, `predict` raised
`RuntimeError` while `predict_proba` silently delegated to an unfitted
estimator. Added the same guard to `_predict_proba`.

### Tests

Added unit tests for `_score_params` covering correct dict passing,
return type, error score fallback, and warning emission.
diff --git a/src/hyperactive/integrations/sktime/_classification.py b/src/hyperactive/integrations/sktime/_classification.py
@@ -258,6 +258,39 @@ class labels for fitting
 
         return self
 
+    def _predict_proba(self, X):
+        """Predict class probabilities for sequences in X.
+
+        private _predict_proba containing the core logic, called from predict_proba
+
+        State required:
+            Requires state to be "fitted".
+
+        Accesses in self:
+            Fitted model attributes ending in "_"
+
+        Parameters
+        ----------
+        X : guaranteed to be of a type in self.get_tag("X_inner_mtype")
+            if self.get_tag("X_inner_mtype") = "numpy3D":
+                3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
+            if self.get_tag("X_inner_mtype") = "nested_univ":
+                pd.DataFrame with each column a dimension, each cell a pd.Series
+            for list of other mtypes, see datatypes.SCITYPE_REGISTER
+            for specifications, see examples/AA_datatypes_and_datasets.ipynb
+
+        Returns
+        -------
+        y : 2D array of shape [n_instances, n_classes] - predicted class probabilities
+        """
+        if not self.refit:
+            raise RuntimeError(
+                f"In {self.__class__.__name__}, refit must be True to make predictions,"
+                f" but found refit=False. If refit=False, {self.__class__.__name__} can"
+                " be used only to tune hyper-parameters, as a parameter estimator."
+            )
+        return super()._predict_proba(X=X)
+
     def _predict(self, X):
         """Predict labels for sequences in X.
 
@@ -317,15 +350,16 @@ def get_test_params(cls, parameter_set="default"):
 
         params_gridsearch = {
             "estimator": DummyClassifier(),
+            "cv": KFold(n_splits=2, shuffle=False),
             "optimizer": GridSearchSk(
-                param_grid={"strategy": ["most_frequent", "stratified"]}
+                param_grid={"strategy": ["most_frequent", "prior"]}
             ),
         }
         params_randomsearch = {
             "estimator": DummyClassifier(),
-            "cv": 2,
+            "cv": KFold(n_splits=2, shuffle=False),
             "optimizer": RandomSearchSk(
-                param_distributions={"strategy": ["most_frequent", "stratified"]},
+                param_distributions={"strategy": ["most_frequent", "prior"]},
             ),
             "scoring": accuracy_score,
         }
diff --git a/src/hyperactive/opt/_common.py b/src/hyperactive/opt/_common.py
@@ -1,5 +1,7 @@
 """Common functions used by multiple optimizers."""
 
+import warnings
+
 __all__ = ["_score_params"]
 
 
@@ -14,7 +16,11 @@ def _score_params(params, meta):
     error_score = meta["error_score"]
 
     try:
-        return float(experiment(**params))
-    except Exception:  # noqa: B904
-        # Catch all exceptions and assign error_score
-        return error_score
+        return float(experiment(params))
+    except Exception as e:
+        warnings.warn(
+            f"Experiment raised {type(e).__name__}: {e}. "
+            f"Assigning error_score={error_score}.",
+            stacklevel=2,
+        )
+        return float(error_score)
diff --git a/src/hyperactive/opt/tests/__init__.py b/src/hyperactive/opt/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for the opt module."""
diff --git a/src/hyperactive/opt/tests/test_score_params.py b/src/hyperactive/opt/tests/test_score_params.py
@@ -0,0 +1,120 @@
+"""Tests for _score_params to guard against parameter passing regressions."""
+
+import numpy as np
+import pytest
+
+from hyperactive.opt._common import _score_params
+
+
+class _DictExperiment:
+    """Minimal experiment stub that expects params as a single dict."""
+
+    def __call__(self, params):
+        return params["x"] ** 2 + params["y"] ** 2
+
+
+class _DictOnlyExperiment:
+    """Experiment stub that rejects keyword arguments.
+
+    Fails loudly if params are passed as **kwargs instead of a dict,
+    directly guarding against the ``experiment(**params)`` bug.
+    """
+
+    def __call__(self, params):
+        if not isinstance(params, dict):
+            raise TypeError(
+                f"Expected a dict, got {type(params).__name__}. "
+                "Parameters must be passed as a single dict, not as **kwargs."
+            )
+        return sum(v**2 for v in params.values())
+
+
+def _make_meta(experiment, error_score=np.nan):
+    return {"experiment": experiment, "error_score": error_score}
+
+
+class TestScoreParams:
+    """Tests for the _score_params helper function."""
+
+    def test_params_passed_as_dict(self):
+        """Params must be passed as a single dict, not unpacked as **kwargs."""
+        exp = _DictOnlyExperiment()
+        meta = _make_meta(exp)
+        params = {"x": 3.0, "y": 4.0}
+
+        score = _score_params(params, meta)
+
+        assert score == 25.0
+
+    def test_returns_correct_score(self):
+        """Score must match the experiment's return value."""
+        exp = _DictExperiment()
+        meta = _make_meta(exp)
+
+        assert _score_params({"x": 0.0, "y": 0.0}, meta) == 0.0
+        assert _score_params({"x": 1.0, "y": 0.0}, meta) == 1.0
+        assert _score_params({"x": 3.0, "y": 4.0}, meta) == 25.0
+
+    def test_returns_python_float(self):
+        """Return type must be a Python float, not numpy scalar."""
+        exp = _DictExperiment()
+        meta = _make_meta(exp)
+
+        result = _score_params({"x": 1.0, "y": 1.0}, meta)
+        assert type(result) is float
+
+    def test_error_score_on_exception(self):
+        """When the experiment raises, error_score must be returned."""
+
+        def _failing_experiment(params):
+            raise ValueError("intentional failure")
+
+        meta = _make_meta(_failing_experiment, error_score=-999.0)
+
+        with pytest.warns(match="intentional failure"):
+            result = _score_params({"x": 1.0}, meta)
+
+        assert result == -999.0
+
+    def test_error_score_emits_warning(self):
+        """A caught exception must produce a warning, never be silent."""
+
+        def _failing_experiment(params):
+            raise RuntimeError("boom")
+
+        meta = _make_meta(_failing_experiment, error_score=np.nan)
+
+        with pytest.warns(match="RuntimeError"):
+            _score_params({"x": 1.0}, meta)
+
+    def test_many_params_passed_as_dict(self):
+        """Regression: many keys must not be unpacked as keyword arguments.
+
+        With the old ``experiment(**params)`` bug, this would raise
+        TypeError inside __call__ because it only accepts one argument.
+        """
+
+        def _sum_experiment(params):
+            return sum(params.values())
+
+        meta = _make_meta(_sum_experiment)
+        params = {f"x{i}": float(i) for i in range(20)}
+
+        score = _score_params(params, meta)
+
+        assert score == float(sum(range(20)))
+
+    def test_with_base_experiment(self):
+        """Integration: works with a real BaseExperiment subclass."""
+        from hyperactive.experiment.bench import Sphere
+
+        exp = Sphere(n_dim=2)
+        meta = _make_meta(exp)
+
+        # Sphere minimum is at origin, value = 0
+        # __call__ returns sign-adjusted score (higher is better)
+        # Sphere is lower-is-better, so score = -evaluate
+        score_origin = _score_params({"x0": 0.0, "x1": 0.0}, meta)
+        score_away = _score_params({"x0": 3.0, "x1": 4.0}, meta)
+
+        assert score_origin > score_away