Fix WithinSession/WithinSubjectSplitter overwriting explicit n_splits in cv_kwargs (#1107)

Copilot · bruAristimunha · web-flow · commit 535a4b306a53 · 2026-06-28T01:14:16.000+02:00
* Initial plan

* Fix cv_kwargs n_splits override in WithinSession/WithinSubject splitters

* fix(evaluations): honour n_splits in Within* evaluations and make WithinSubjectSplitter reproducible

- WithinSessionEvaluation/WithinSubjectEvaluation now map the base-class
  n_splits to the inner n_folds instead of hardcoding 5 folds, matching
  CrossSubjectEvaluation.
- WithinSubjectSplitter.split() reseeds its RNG per call (shared across
  subjects to preserve the legacy fold sequence) so repeated calls with a
  fixed random_state are reproducible.
- Correct the cv_kwargs docstrings/changelog: only n_splits can be passed
  through cv_kwargs (shuffle/random_state are named constructor params).

* style: use dict literal in test_within_n_splits_drives_n_folds (ruff C408)

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: Bru &lt;b.aristimunha@gmail.com&gt;
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -71,6 +71,7 @@ Bugs
 - Fix Windows download path sanitization that changed absolute paths like ``C:\data`` into relative ``C-\data`` paths (:gh:`1079` by `Anton Andreev`_).
 - Fix missing electrode positions (NaN xyz) in six motor-imagery datasets so topographic maps, interpolation, and spatial methods work: :class:`moabb.datasets.Forenzo2023` and :class:`moabb.datasets.GuttmannFlury2025_MI`/``_ME`` normalize Neuroscan ALL_CAPS labels and apply ``standard_1005`` (CB1/CB2 kept as ``misc``); :class:`moabb.datasets.Dreyer2023` falls back to ``standard_1005`` when the BIDS archive ships no ``electrodes.tsv``; :class:`moabb.datasets.BNCI2003_004` maps its 26 legacy Berlin channel labels to their modern 10-5 equivalents for exact positions; :class:`moabb.datasets.BNCI2014_002` applies an approximate 3x5 grid for its unlabeled small-Laplacian channels; and :class:`moabb.datasets.Zhang2017` applies the ``GSN-HydroCel-32`` montage in EGI sensor order. Adds the shared :func:`moabb.datasets.utils.set_neuroscan_montage` helper (:gh:`1089` by `Bruno Aristimunha`_).
 - Fix ``BaseEvaluation._aggregate_fold_results`` aborting the whole evaluation with ``TypeError: agg function failed [how->mean,dtype->object]`` when a single fold contributes a non-numeric ``score`` (e.g. an error fold). The numeric aggregation columns are now coerced with ``pandas.to_numeric(errors="coerce")`` before ``groupby.agg``, so a bad fold becomes ``NaN`` and is skipped instead of taking down every subject/pipeline (:gh:`1095` by `Bruno Aristimunha`_).
+- Fix :class:`moabb.evaluations.splitters.WithinSessionSplitter` and :class:`moabb.evaluations.splitters.WithinSubjectSplitter` overwriting an explicit ``n_splits`` passed through ``cv_kwargs`` with the ``n_folds`` default; the caller-provided ``n_splits`` now takes precedence, so a single holdout split can be requested directly via ``cv_class=StratifiedShuffleSplit, n_splits=1``. :class:`moabb.evaluations.WithinSessionEvaluation` and :class:`moabb.evaluations.WithinSubjectEvaluation` now honour the ``n_splits`` argument instead of always running 5 folds, and :class:`moabb.evaluations.splitters.WithinSubjectSplitter` now yields reproducible per-subject folds for a fixed ``random_state`` (:gh:`1106` by `Bruno Aristimunha`_).
 Code health
 ~~~~~~~~~~~
 - Install CPU-only PyTorch wheels in CI by setting ``UV_TORCH_BACKEND=cpu`` in the test, braindecode, and docs workflows, so runners no longer download multi-GB CUDA builds of ``torch`` (pulled transitively via the ``deeplearning`` extra / braindecode) (:gh:`1083` by `Bhargav Kowshik`_).
diff --git a/moabb/evaluations/evaluations.py b/moabb/evaluations/evaluations.py
@@ -84,7 +84,7 @@ def _create_splitter(self):
         """Create the WithinSessionSplitter for parallel evaluation."""
         cv_class, cv_kwargs = self._resolve_cv(StratifiedKFold)
         return WithinSessionSplitter(
-            n_folds=5,
+            n_folds=self.n_splits or 5,
             shuffle=True,
             random_state=self.random_state,
             cv_class=cv_class,
@@ -658,7 +658,7 @@ def _create_splitter(self):
         """Create the WithinSubjectSplitter for parallel evaluation."""
         cv_class, cv_kwargs = self._resolve_cv(StratifiedKFold)
         return WithinSubjectSplitter(
-            n_folds=5,
+            n_folds=self.n_splits or 5,
             shuffle=True,
             random_state=self.random_state,
             cv_class=cv_class,
diff --git a/moabb/evaluations/splitters.py b/moabb/evaluations/splitters.py
@@ -59,6 +59,8 @@ class WithinSessionSplitter(BaseCrossValidator):
         Defaults to ``StratifiedKFold``.
     cv_kwargs : dict
         Additional arguments to pass to the inner cross-validation strategy.
+        An explicit ``n_splits`` provided here takes precedence over the
+        ``n_folds`` argument.
 
     """
 
@@ -95,7 +97,7 @@ def __init__(
             ("shuffle", shuffle),
             ("random_state", self._rng),
         ]:
-            if p in params:
+            if p in params and p not in cv_kwargs:
                 self._cv_kwargs[p] = v
         self._last_split_metadata = None
 
@@ -199,6 +201,8 @@ class WithinSubjectSplitter(BaseCrossValidator):
         Defaults to ``StratifiedKFold``.
     cv_kwargs : dict
         Additional arguments to pass to the inner cross-validation strategy.
+        An explicit ``n_splits`` provided here takes precedence over the
+        ``n_folds`` argument.
 
     """
 
@@ -235,7 +239,7 @@ def __init__(
             ("shuffle", shuffle),
             ("random_state", self._rng),
         ]:
-            if p in params:
+            if p in params and p not in cv_kwargs:
                 self._cv_kwargs[p] = v
         self._last_split_metadata = None
 
@@ -280,16 +284,26 @@ def split(self, y, metadata):
         # Shuffle subjects if required
         # Convert to numpy array to avoid ArrowStringArray shuffle warning
         subjects = np.array(metadata["subject"].unique())
+        # Reseed the RNG at each split() call so repeated calls are
+        # reproducible. A single RNG is shared across subjects (instead of a
+        # fresh per-subject one) to keep the fold sequence identical to the
+        # legacy within-subject behaviour.
+        rng = check_random_state(self.random_state) if self.shuffle else None
         if self.shuffle:
-            self._rng.shuffle(subjects)
+            rng.shuffle(subjects)
+
+        cv_kwargs = dict(self._cv_kwargs)
+        params = inspect.signature(self.cv_class).parameters
+        if self.shuffle and "random_state" in params:
+            cv_kwargs["random_state"] = rng
 
         for subject in subjects:
             subject_mask = metadata["subject"] == subject
             subject_indices = all_index[subject_mask]
             y_subject = y[subject_mask]
 
             # Instantiate a new internal splitter for each subject
-            splitter = self.cv_class(**self._cv_kwargs)
+            splitter = self.cv_class(**cv_kwargs)
 
             # Split using the cross-validation strategy across all sessions of the subject
             for train_ix, test_ix in splitter.split(subject_indices, y_subject):
diff --git a/moabb/tests/test_evaluations.py b/moabb/tests/test_evaluations.py
@@ -435,6 +435,26 @@ def setup_method(self):
         )
 
 
+@pytest.mark.parametrize(
+    "klass", [ev.WithinSessionEvaluation, ev.WithinSubjectEvaluation]
+)
+def test_within_n_splits_drives_n_folds(klass):
+    """n_splits sets the inner splitter's n_folds (defaults to 5 when unset)."""
+    kw = {
+        "paradigm": FakeImageryParadigm(),
+        "datasets": [dataset],
+        "hdf5_path": "res_test",
+    }
+    evals = {None: klass(**kw), 3: klass(n_splits=3, **kw)}
+    try:
+        for n, e in evals.items():
+            assert e._create_splitter().n_folds == (n or 5)
+    finally:
+        for e in evals.values():
+            if os.path.isfile(e.results.filepath):
+                os.remove(e.results.filepath)
+
+
 class Test_CrossSubj(TestWithinSess):
     def setup_method(self):
         self.eval = ev.CrossSubjectEvaluation(
diff --git a/moabb/tests/test_splits.py b/moabb/tests/test_splits.py
@@ -487,6 +487,44 @@ def test_within_subject_get_n_splits(data):
     assert n_splits == 5 * 5  # 5 subjects, 5 folds each
 
 
+@pytest.mark.parametrize("splitter", [WithinSessionSplitter, WithinSubjectSplitter])
+def test_cv_kwargs_n_splits_not_overwritten(data, splitter):
+    """Explicit n_splits in cv_kwargs must not be overwritten by n_folds."""
+    _, y, metadata = data
+
+    split = splitter(
+        cv_class=StratifiedShuffleSplit,
+        n_splits=1,
+        test_size=0.25,
+        shuffle=True,
+        random_state=42,
+    )
+
+    # The inner cv should keep the explicitly requested single split.
+    assert split._cv_kwargs["n_splits"] == 1
+
+    if splitter == WithinSessionSplitter:
+        num_groups = metadata.groupby(["subject", "session"]).ngroups
+    else:
+        num_groups = metadata["subject"].nunique()
+
+    splits = list(split.split(y, metadata))
+    assert len(splits) == num_groups  # one split per group, not n_folds per group
+
+
+@pytest.mark.parametrize("splitter", [WithinSessionSplitter, WithinSubjectSplitter])
+def test_within_split_is_reproducible(data, splitter):
+    """Repeated split() calls with a fixed seed must yield identical folds."""
+    _, y, metadata = data
+    split = splitter(shuffle=True, random_state=42)
+    first = list(split.split(y, metadata))
+    second = list(split.split(y, metadata))
+    assert len(first) == len(second)
+    for (train, test), (train_2, test_2) in zip(first, second):
+        assert np.array_equal(train, train_2)
+        assert np.array_equal(test, test_2)
+
+
 @pytest.mark.parametrize(
     "splitter", [CrossSessionSplitter, CrossSubjectSplitter, CrossDatasetSplitter]
 )