Skip to content

Commit f74795a

Browse files
committed
LibriBrain100: drop subjects='new' alias; 'broad' is the only multi-subject token
Matches the paper's deep/broad component naming and removes the redundant 'new' alias. No backwards-compat shim since LibriBrain100 hasn't shipped yet.
1 parent e07cc52 commit f74795a

3 files changed

Lines changed: 18 additions & 17 deletions

File tree

pnpl/datasets/libribrain100/dataset.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ class LibriBrain100(LibriBrain100Base):
3737
``"val"``/``"valid"`` accepted. ``None`` means "no
3838
partition filter — apply only the explicit selectors".
3939
subjects: Subject selector. Accepts ``"all"`` (default),
40-
``"new"`` / ``"broad"`` (sub-1..32), ``"deep"``
41-
(sub-0), an int, a string id (``"0"`` or ``"sub-0"``), or a
42-
list / range of ids.
40+
``"deep"`` (sub-0, the deep single-subject component),
41+
``"broad"`` (sub-1..32, the broad multi-subject
42+
component), an int, a string id (``"0"`` or ``"sub-0"``),
43+
or a list / range of ids.
4344
corpus: Corpus selector. Accepts ``"all"`` (default),
4445
``"sherlock"``, ``"timit"``, ``"mocha"``,
4546
``"podcasts"`` (aliases like ``"mocha-timit"``,
@@ -62,13 +63,13 @@ class LibriBrain100(LibriBrain100Base):
6263
6364
Notes:
6465
- The multi-subject (broad) data has no train partition by
65-
design; ``subjects="new" + partition="train"`` raises
66+
design; ``subjects="broad" + partition="train"`` raises
6667
:class:`ValueError`. For SFT workflows on broad subjects, use
6768
``partition="validation"`` as your fine-tuning training set
6869
and ``partition="test"`` for evaluation.
69-
- Multi-subject data lives only in the Sherlock corpus;
70-
``subjects="new" + corpus="timit"`` (or any non-Sherlock)
71-
raises :class:`ValueError`.
70+
- Multi-subject data was only collected with the Sherlock
71+
stimuli; ``subjects="broad" + corpus="timit"`` (or any
72+
non-Sherlock corpus) raises :class:`ValueError`.
7273
7374
Example:
7475
>>> from pnpl.datasets import LibriBrain100

pnpl/datasets/libribrain100/selectors.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,8 @@ def normalize_subjects(subjects: SubjectsArg) -> set[str]:
5656
5757
Accepted forms:
5858
- ``"all"`` — every subject (sub-0 + sub-1..32)
59-
- ``"new"`` — sub-1..32 only (no sub-0)
60-
- ``"deep"`` — alias for sub-0
61-
- ``"broad"`` — alias for sub-1..32
59+
- ``"deep"`` — sub-0 (the deep single-subject component)
60+
- ``"broad"`` — sub-1..32 (the broad multi-subject component)
6261
- ``0`` / ``"0"`` / ``"sub-0"`` — single subject
6362
- any iterable / range of ints or string ids
6463
"""
@@ -68,7 +67,7 @@ def normalize_subjects(subjects: SubjectsArg) -> set[str]:
6867
token = subjects.strip().lower()
6968
if token == "all":
7069
return set(SUBJECTS)
71-
if token == "new" or token == "broad":
70+
if token == "broad":
7271
return set(NEW_SUBJECTS)
7372
if token == "deep":
7473
return {DEEP_SUBJECT}
@@ -167,7 +166,7 @@ def validate_selector_combination(
167166

168167
if only_new and partition == PARTITION_TRAIN:
169168
raise ValueError(
170-
"subjects='new' (or any selection without subject 0) has no "
169+
"subjects='broad' (or any selection without subject 0) has no "
171170
"train partition by design — sub-1..32 contribute Sherlock1 "
172171
"ses-11 (validation) and ses-12 (test) only. For a "
173172
"supervised-fine-tuning workflow on the broad subjects, "

tests/test_libribrain100.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def test_libribrain100_mro():
5555
def test_normalize_subjects_aliases():
5656
from pnpl.datasets.libribrain100 import normalize_subjects
5757
assert normalize_subjects("all") == {str(i) for i in range(33)}
58-
assert normalize_subjects("new") == {str(i) for i in range(1, 33)}
5958
assert normalize_subjects("broad") == {str(i) for i in range(1, 33)}
6059
assert normalize_subjects("deep") == {"0"}
6160
assert normalize_subjects(0) == {"0"}
@@ -70,6 +69,8 @@ def test_normalize_subjects_rejects_unknown():
7069
from pnpl.datasets.libribrain100 import normalize_subjects
7170
with pytest.raises(ValueError):
7271
normalize_subjects("not-a-subject")
72+
with pytest.raises(ValueError):
73+
normalize_subjects("new") # was an alias in the first pass; intentionally dropped
7374
with pytest.raises(ValueError):
7475
normalize_subjects(-1)
7576
with pytest.raises(ValueError):
@@ -116,29 +117,29 @@ def test_normalize_partition_aliases():
116117
# Selector validation
117118
# ---------------------------------------------------------------------------
118119

119-
def test_validate_rejects_new_with_train():
120+
def test_validate_rejects_broad_with_train():
120121
from pnpl.datasets.libribrain100 import (
121122
normalize_corpus,
122123
normalize_subjects,
123124
validate_selector_combination,
124125
)
125126
with pytest.raises(ValueError, match="train partition"):
126127
validate_selector_combination(
127-
subjects=normalize_subjects("new"),
128+
subjects=normalize_subjects("broad"),
128129
corpus=normalize_corpus("sherlock"),
129130
partition="train",
130131
)
131132

132133

133-
def test_validate_rejects_new_with_non_sherlock():
134+
def test_validate_rejects_broad_with_non_sherlock():
134135
from pnpl.datasets.libribrain100 import (
135136
normalize_corpus,
136137
normalize_subjects,
137138
validate_selector_combination,
138139
)
139140
with pytest.raises(ValueError, match="non-deep"):
140141
validate_selector_combination(
141-
subjects=normalize_subjects("new"),
142+
subjects=normalize_subjects("broad"),
142143
corpus=normalize_corpus("timit"),
143144
partition=None,
144145
)

0 commit comments

Comments
 (0)