Skip to content

Commit 463f501

Browse files
ENH: Add overrides= parameter to read_raw_brainvision for BIDS header repair
Adds an opt-in dict parameter for reading non-spec-compliant .vhdr files where the header contradicts the actual layout (e.g. renamed BIDS siblings, missing MarkerFile=, truncated [Channel Infos]). Six keys: data_fname, marker_fname, n_channels, sfreq, ch_names, units_fallback. Each applied override emits a RuntimeWarning; unknown keys raise ValueError. None / {} keep stock behavior. Mirrors the read_raw_edf(units=...) precedent. Closes mne-tools/mne-bids#1598.
1 parent 3455d64 commit 463f501

4 files changed

Lines changed: 295 additions & 26 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add ``overrides`` parameter to :func:`mne.io.read_raw_brainvision` for reading non-spec-compliant ``.vhdr`` files where the header contradicts the actual layout (e.g. renamed BIDS siblings, missing ``MarkerFile=``, truncated ``[Channel Infos]``). Accepts a dict with keys ``data_fname``, ``marker_fname``, ``n_channels``, ``sfreq``, ``ch_names``, and ``units_fallback``; see the function docstring for details, by `Bruno Aristimunha`_.

mne/io/brainvision/brainvision.py

Lines changed: 161 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@
2020
from ...channels import make_dig_montage
2121
from ...defaults import HEAD_SIZE_DEFAULT
2222
from ...transforms import _sph_to_cart
23-
from ...utils import _DefaultEventParser, fill_doc, logger, verbose, warn
23+
from ...utils import (
24+
_check_dict_keys,
25+
_check_range,
26+
_DefaultEventParser,
27+
_validate_type,
28+
fill_doc,
29+
logger,
30+
verbose,
31+
warn,
32+
)
2433
from ..base import BaseRaw
2534

2635

@@ -49,6 +58,7 @@ class RawBrainVision(BaseRaw):
4958
``False``.
5059
5160
.. versionadded:: 1.8
61+
%(brainvision_overrides)s
5262
%(preload)s
5363
%(verbose)s
5464
@@ -90,9 +100,11 @@ def __init__(
90100
misc="auto",
91101
scale=1.0,
92102
ignore_marker_types=False,
103+
overrides=None,
93104
preload=False,
94105
verbose=None,
95106
): # noqa: D107
107+
overrides = _validate_overrides(overrides)
96108
# Channel info and events
97109
logger.info(f"Extracting parameters from {vhdr_fname}...")
98110
hdr_fname = op.abspath(vhdr_fname)
@@ -107,7 +119,7 @@ def __init__(
107119
mrk_fname,
108120
montage,
109121
orig_units,
110-
) = _get_hdr_info(hdr_fname, eog, misc, scale)
122+
) = _get_hdr_info(hdr_fname, eog, misc, scale, overrides)
111123

112124
with open(data_fname, "rb") as f:
113125
if isinstance(fmt, dict): # ASCII, this will be slow :(
@@ -146,11 +158,11 @@ def __init__(
146158
split_settings = settings.splitlines()
147159
self.impedances = _parse_impedance(split_settings, self.info["meas_date"])
148160

149-
# Get annotations from marker file
150-
annots = read_annotations(
151-
mrk_fname, info["sfreq"], ignore_marker_types=ignore_marker_types
152-
)
153-
self.set_annotations(annots)
161+
if mrk_fname is not None:
162+
annots = read_annotations(
163+
mrk_fname, info["sfreq"], ignore_marker_types=ignore_marker_types
164+
)
165+
self.set_annotations(annots)
154166

155167
# Drop the fake ahdr channel if needed
156168
if ahdr_format:
@@ -391,6 +403,47 @@ def _check_bv_version(header, kind):
391403
warn(_data_err % (kind, header))
392404

393405

406+
_OVERRIDES_VALID_KEYS = frozenset(
407+
{"data_fname", "marker_fname", "n_channels", "sfreq", "ch_names", "units_fallback"}
408+
)
409+
410+
411+
def _validate_overrides(overrides):
412+
"""Validate the ``overrides`` dict for ``read_raw_brainvision``."""
413+
_validate_type(overrides, (dict, None), "overrides")
414+
if overrides is None:
415+
return {}
416+
_check_dict_keys(
417+
overrides, _OVERRIDES_VALID_KEYS, "override key(s)", "valid override keys"
418+
)
419+
if "data_fname" in overrides:
420+
_validate_type(overrides["data_fname"], "path-like", "overrides['data_fname']")
421+
if "marker_fname" in overrides and overrides["marker_fname"] is not False:
422+
_validate_type(
423+
overrides["marker_fname"],
424+
"path-like",
425+
"overrides['marker_fname']",
426+
extra="(or False to skip annotation reading)",
427+
)
428+
if "n_channels" in overrides:
429+
_validate_type(overrides["n_channels"], "int-like", "overrides['n_channels']")
430+
_check_range(overrides["n_channels"], 1, np.inf, "overrides['n_channels']")
431+
if "sfreq" in overrides:
432+
_validate_type(overrides["sfreq"], "numeric", "overrides['sfreq']")
433+
_check_range(
434+
overrides["sfreq"], 0, np.inf, "overrides['sfreq']", min_inclusive=False
435+
)
436+
if "ch_names" in overrides:
437+
_validate_type(overrides["ch_names"], (list, tuple), "overrides['ch_names']")
438+
for i, name in enumerate(overrides["ch_names"]):
439+
_validate_type(name, str, f"overrides['ch_names'][{i}]")
440+
if len(set(overrides["ch_names"])) != len(overrides["ch_names"]):
441+
raise ValueError("overrides['ch_names'] must contain unique names")
442+
if "units_fallback" in overrides:
443+
_validate_type(overrides["units_fallback"], str, "overrides['units_fallback']")
444+
return overrides
445+
446+
394447
_orientation_dict = dict(MULTIPLEXED="F", VECTORIZED="C")
395448
_fmt_dict = dict(INT_16="short", INT_32="int", IEEE_FLOAT_32="single")
396449
_fmt_byte_dict = dict(short=2, int=4, single=4)
@@ -488,7 +541,7 @@ def _aux_hdr_info(hdr_fname):
488541

489542

490543
@fill_doc
491-
def _get_hdr_info(hdr_fname, eog, misc, scale):
544+
def _get_hdr_info(hdr_fname, eog, misc, scale, overrides=None):
492545
"""Extract all the information from the header file.
493546
494547
Parameters
@@ -505,6 +558,8 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
505558
scale : float
506559
The scaling factor for EEG data. Unless specified otherwise by header file,
507560
units are in microvolts. Default scale factor is 1.
561+
overrides : dict | None
562+
Validated dict of header overrides (see :func:`read_raw_brainvision`).
508563
509564
Returns
510565
-------
@@ -517,14 +572,16 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
517572
Orientation of the binary data.
518573
n_samples : int
519574
Number of data points in the binary data file.
520-
mrk_fname : str
521-
Path to the marker file.
575+
mrk_fname : str | None
576+
Path to the marker file. ``None`` when ``overrides['marker_fname']`` is
577+
``False``, signalling that the caller should skip annotation reading.
522578
montage : DigMontage
523579
Coordinates of the channels, if present in the header file.
524580
orig_units : dict
525581
Dictionary mapping channel names to their units as specified in the header file.
526582
Example: {'FC1': 'nV'}
527583
"""
584+
overrides = {} if overrides is None else overrides
528585
scale = float(scale)
529586
ext = op.splitext(hdr_fname)[-1]
530587
ahdr_format = ext == ".ahdr"
@@ -537,6 +594,10 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
537594
settings, cfg, cinfostr, info = _aux_hdr_info(hdr_fname)
538595
info._unlocked = True
539596

597+
if "sfreq" in overrides:
598+
info["sfreq"] = overrides["sfreq"]
599+
warn(f"sfreq overridden: {info['sfreq']} Hz")
600+
540601
order = cfg.get(cinfostr, "DataOrientation")
541602
if order not in _orientation_dict:
542603
raise NotImplementedError(f"Data Orientation {order} is not supported")
@@ -558,27 +619,45 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
558619

559620
# locate EEG binary file and marker file for the stim channel
560621
path = op.dirname(hdr_fname)
561-
data_fname = op.join(path, cfg.get(cinfostr, "DataFile"))
562-
mrk_fname = op.join(path, cfg.get(cinfostr, "MarkerFile"))
622+
if "data_fname" in overrides:
623+
data_fname = op.join(path, overrides["data_fname"])
624+
warn(f"data_fname overridden: {data_fname!r}")
625+
else:
626+
data_fname = op.join(path, cfg.get(cinfostr, "DataFile"))
627+
628+
if "marker_fname" in overrides:
629+
mrk_override = overrides["marker_fname"]
630+
if mrk_override is False:
631+
mrk_fname = None
632+
warn("marker_fname overridden: annotation reading skipped")
633+
else:
634+
mrk_fname = op.join(path, mrk_override)
635+
warn(f"marker_fname overridden: {mrk_fname!r}")
636+
else:
637+
mrk_fname = op.join(path, cfg.get(cinfostr, "MarkerFile"))
563638

564639
# Try to get measurement date from marker file
565640
# Usually saved with a marker "New Segment", see BrainVision documentation
566641
regexp = r"^Mk\d+=New Segment,.*,\d+,\d+,-?\d+,(\d{20})$"
567-
with open(mrk_fname) as tmp_mrk_f:
568-
lines = tmp_mrk_f.readlines()
569-
570-
for line in lines:
571-
match = re.findall(regexp, line.strip())
572-
# Always take first measurement date we find
573-
if match:
574-
date_str = match[0]
575-
info["meas_date"] = _str_to_meas_date(date_str)
576-
break
577-
else:
578-
info["meas_date"] = None
642+
info["meas_date"] = None
643+
if mrk_fname is not None:
644+
with open(mrk_fname) as tmp_mrk_f:
645+
lines = tmp_mrk_f.readlines()
646+
647+
for line in lines:
648+
match = re.findall(regexp, line.strip())
649+
# Always take first measurement date we find
650+
if match:
651+
date_str = match[0]
652+
info["meas_date"] = _str_to_meas_date(date_str)
653+
break
579654

580655
# load channel labels
581-
nchan = cfg.getint(cinfostr, "NumberOfChannels")
656+
if "n_channels" in overrides:
657+
nchan = overrides["n_channels"]
658+
warn(f"n_channels overridden: {nchan}")
659+
else:
660+
nchan = cfg.getint(cinfostr, "NumberOfChannels")
582661
if ahdr_format:
583662
# add one fake channel for ahdr format
584663
nchan += 1
@@ -603,8 +682,12 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
603682
ch_dict = dict()
604683
misc_chs = dict()
605684
orig_units = dict()
685+
dropped_ci_rows = 0
606686
for chan, props in cfg.items("Channel Infos"):
607687
n = int(re.findall(r"ch(\d+)", chan)[0]) - 1
688+
if n >= nchan:
689+
dropped_ci_rows += 1
690+
continue
608691
props = props.split(",")
609692

610693
# default to µV, following the BV specs; the unit is only allowed to be
@@ -633,6 +716,12 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
633716
ranges[n] = _unit_dict.get(unit, 1) * scale
634717
if unit not in ("V", "mV", "µV", "uV", "nV"):
635718
misc_chs[name] = FIFF.FIFF_UNIT_CEL if unit == "C" else FIFF.FIFF_UNIT_NONE
719+
if dropped_ci_rows:
720+
warn(
721+
f"n_channels override ({nchan}) dropped {dropped_ci_rows} trailing "
722+
f"[Channel Infos] entry(ies)."
723+
)
724+
636725
if ahdr_format:
637726
ch_dict[_AHDR_CHANNEL_NAME] = _AHDR_CHANNEL_NAME
638727
ch_names[-1] = _AHDR_CHANNEL_NAME
@@ -682,8 +771,33 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
682771
"explicitly."
683772
)
684773

774+
synthesized_chs: set[str] = set()
685775
if np.isnan(cals).any():
686-
raise RuntimeError("Missing channel units")
776+
missing_idx = np.where(np.isnan(cals))[0]
777+
units_fallback = overrides.get("units_fallback")
778+
if units_fallback is None:
779+
raise RuntimeError(
780+
f"Incomplete [Channel Infos]: missing entries at indices "
781+
f"{missing_idx.tolist()}. Pass overrides={{'units_fallback': "
782+
f"'<unit>'}} (e.g. 'µV') to recover with default values."
783+
)
784+
fallback_range = _unit_dict.get(units_fallback, 1) * scale
785+
for n in missing_idx:
786+
if not ch_names[n]:
787+
ch_names[n] = f"Ch{n + 1}"
788+
cals[n] = 1.0
789+
ranges[n] = fallback_range
790+
orig_units[ch_names[n]] = units_fallback
791+
synthesized_chs.add(ch_names[n])
792+
if (
793+
units_fallback not in ("V", "mV", "µV", "uV", "nV")
794+
and ch_names[n] not in misc
795+
):
796+
misc.append(ch_names[n])
797+
warn(
798+
f"units_fallback overridden: filled {len(missing_idx)} entries with "
799+
f"resolution=1.0, unit={units_fallback!r}."
800+
)
687801

688802
# Attempts to extract filtering info from header. If not found, both are set to
689803
# zero.
@@ -748,6 +862,8 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
748862
for i, ch in enumerate(ch_names, 1):
749863
if ahdr_format and i == len(ch_names) and ch == _AHDR_CHANNEL_NAME:
750864
break
865+
if ch in synthesized_chs:
866+
continue
751867
# double check alignment with channel by using the hw settings
752868
if idx == idx_amp:
753869
line_amp = settings[idx + i]
@@ -888,6 +1004,22 @@ def _get_hdr_info(hdr_fname, eog, misc, scale):
8881004
f"Hz{nyquist}) will be stored."
8891005
)
8901006

1007+
if "ch_names" in overrides:
1008+
new_names = list(overrides["ch_names"])
1009+
if ahdr_format and len(new_names) == nchan - 1:
1010+
new_names.append(_AHDR_CHANNEL_NAME)
1011+
if len(new_names) != nchan:
1012+
raise ValueError(
1013+
f"overrides['ch_names'] has length {len(overrides['ch_names'])} "
1014+
f"but the file declares {nchan} channels."
1015+
)
1016+
name_map = dict(zip(ch_names, new_names))
1017+
ch_names = new_names
1018+
orig_units = {name_map.get(k, k): v for k, v in orig_units.items()}
1019+
misc_chs = {name_map.get(k, k): v for k, v in misc_chs.items()}
1020+
misc = [name_map.get(m, m) if isinstance(m, str) else m for m in misc]
1021+
warn(f"ch_names overridden: {nchan} channel(s) renamed")
1022+
8911023
# Creates a list of dicts of eeg channels for raw.info
8921024
logger.info("Setting channel info structure...")
8931025
info["chs"] = []
@@ -939,6 +1071,7 @@ def read_raw_brainvision(
9391071
misc="auto",
9401072
scale=1.0,
9411073
ignore_marker_types=False,
1074+
overrides=None,
9421075
preload=False,
9431076
verbose=None,
9441077
) -> RawBrainVision:
@@ -965,6 +1098,7 @@ def read_raw_brainvision(
9651098
``False``.
9661099
9671100
.. versionadded:: 1.8
1101+
%(brainvision_overrides)s
9681102
%(preload)s
9691103
%(verbose)s
9701104
@@ -1002,6 +1136,7 @@ def read_raw_brainvision(
10021136
misc=misc,
10031137
scale=scale,
10041138
ignore_marker_types=ignore_marker_types,
1139+
overrides=overrides,
10051140
preload=preload,
10061141
verbose=verbose,
10071142
)

0 commit comments

Comments
 (0)