Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
66faeaa
initial test for ica dynamic item inclusion
vmcru Mar 4, 2025
4b62e60
exposed method for ica, used mne BIDSPath, path needs checking for pr…
vmcru Mar 5, 2025
c3ec3dc
Apply suggestions from code review
bruAristimunha Mar 5, 2025
77a8c59
Apply suggestions from code review
bruAristimunha Mar 5, 2025
a222c7c
Merge branch 'redesign/datasets' into redesign/datasets
vmcru Mar 7, 2025
3a78e1d
modifications to ica and validate to pass merge
vmcru Mar 7, 2025
86af6be
removed unused imports from all files and added credits
vmcru Mar 7, 2025
f29636c
updated validate_ica.py
vmcru Mar 7, 2025
30ff4be
Apply suggestions from code review
bruAristimunha Mar 18, 2025
f3330d9
support for ica as process added.
vmcru Mar 23, 2025
4e8bb55
Added hashing, setting check, and fixed caching bug.
vmcru Mar 23, 2025
b592c60
precommit mods
vmcru Mar 23, 2025
e2973aa
formatting fix
vmcru Mar 23, 2025
3f8b646
optional filtering added
vmcru Mar 23, 2025
73d1e93
added hashing to description name.
vmcru Mar 23, 2025
ade6b8d
added python-picard dependency in extra-requirements.txt for ica pica…
vmcru Mar 23, 2025
8b6633e
format fix extra-requirements.txt
vmcru Mar 23, 2025
a1031d2
Update benchmarks/MOABB/dataio/ica.py
vmcru Mar 25, 2025
282c016
Update benchmarks/MOABB/dataio/ica.py
vmcru Mar 25, 2025
ad2e39d
Update benchmarks/MOABB/dataio/datasets.py
vmcru Mar 26, 2025
822dc46
renamic critical to base and removing unnecessary comments
vmcru Mar 26, 2025
c524d11
tests upgrading pytest to see if it fixes breaks
vmcru Mar 26, 2025
a6fb933
added docstrings to process andn dynamic items functions.
vmcru Mar 26, 2025
544e638
formatting fixes
vmcru Mar 26, 2025
77daeeb
docstring fix
vmcru Mar 26, 2025
84e09dc
docstring adaptations for validate_ica.py
vmcru Mar 26, 2025
96e2546
precommit fixes
vmcru Mar 26, 2025
87ef955
Merge branch 'develop-eeg' into redesign/datasets
vmcru Mar 28, 2025
6e22fe5
rework of the metadata checking and storing
vmcru Mar 28, 2025
ab42aa9
metadata changes
vmcru Mar 28, 2025
2652128
precommit fixes
vmcru Mar 28, 2025
f2263e4
updates to the test files and minor tqeat to ica parameters.
vmcru Mar 28, 2025
210dd9e
adapted hashing for consistency and reproducibility. removed optional…
vmcru Mar 28, 2025
d787c0b
shpeechbrain changes.
vmcru Mar 28, 2025
4b03501
removed validate_ica.py from tracked files
vmcru Mar 28, 2025
06f873a
changed folder from derivaties to processor
vmcru Apr 1, 2025
ead65b3
precommit action error fix
vmcru Apr 1, 2025
82cdd90
precommit action error fix 2
vmcru Apr 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions benchmarks/MOABB/dataio/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from speechbrain.dataio.dataset import DynamicItemDataset
from speechbrain.utils.data_pipeline import provides, takes

from .ica import ICAProcessor


class RawEEGSample(TypedDict, total=False):
"""Default dictionary keys provided by `~RawEEGDataset`.
Expand Down Expand Up @@ -95,10 +97,12 @@ def __init__(
data,
preload=False,
verbose=None,
ica_processor: Optional[ICAProcessor] = None,
Comment thread
vmcru marked this conversation as resolved.
Outdated
dynamic_items=(),
output_keys=(),
):
self.verbose = verbose
self.ica_processor = ica_processor
dynamic_items = [self._make_load_raw_dynamic_item(preload)] + list(
dynamic_items
)
Expand Down Expand Up @@ -297,6 +301,9 @@ def _make_load_raw_dynamic_item(self, preload: bool):
def _load_raw(fpath: str):
raw = self._read_raw_bids_cached(fpath, preload)

if self.ica_processor is not None:
raw = self.ica_processor.process(raw, fpath)

Comment thread
bruAristimunha marked this conversation as resolved.
Outdated
yield raw.info
yield raw

Expand Down
102 changes: 102 additions & 0 deletions benchmarks/MOABB/dataio/ica.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Module for handling ICA computation and application for EEG data.
Author
------
Victor Cruz, 2025
"""
from pathlib import Path
from typing import Union, Optional, Dict, Any

import mne
from mne.preprocessing import ICA
from mne_bids import get_bids_path_from_fname


class ICAProcessor:
"""Handles ICA computation and application for EEG data.

Arguments
---------
n_components : int | float | None
Number of components to keep during ICA decomposition
method : str
The ICA method to use. Can be 'fastica', 'infomax' or 'picard'.
Defaults to 'fastica'.
random_state : int | None
Random state for reproducibility
fit_params : dict | None
Additional parameters to pass to the ICA fit method.
See mne.preprocessing.ICA for details.
filter_params : dict | None
Parameters for the high-pass filter applied before ICA.
Defaults to {'l_freq': 1.0, 'h_freq': None}
"""

def __init__(
self,
n_components=None,
method="fastica",
random_state=42,
fit_params: Optional[Dict[str, Any]] = None,
filter_params: Optional[Dict[str, Any]] = None,
):
self.n_components = n_components
self.method = method
self.random_state = random_state
self.fit_params = fit_params or {}
self.filter_params = filter_params or {"l_freq": 1.0, "h_freq": None}

def get_ica_path(self, raw_path: Union[str, Path]) -> Path:
"""Generate path where ICA solution should be stored.

Creates a derivatives folder to store ICA solutions, following BIDS conventions.
"""
bids_path = get_bids_path_from_fname(raw_path)
# For derivatives, you can put them in a derivatives folder:
bids_path.root = (
bids_path.root / ".." / "derivatives" / f"ica-{self.method}"
)
# Keep the same base entities:
bids_path.update(
suffix="eeg", # override or confirm suffix
extension=".fif",
description="ica", # <-- This sets a desc=ica entity
Comment thread
vmcru marked this conversation as resolved.
Outdated
check=True, # If you do not want BIDSPath to fail on derivative checks
)
# Make sure the folder is created
bids_path.fpath.parent.mkdir(parents=True, exist_ok=True)

return bids_path.fpath

def compute_ica(self, raw: mne.io.RawArray, ica_path: Path) -> ICA:
"""Compute ICA solution and save to disk."""
# High-pass filter for ICA
raw_filtered = raw.copy()
raw_filtered.filter(**self.filter_params)
Comment thread
vmcru marked this conversation as resolved.
Outdated

ica = ICA(
n_components=self.n_components,
method=self.method,
random_state=self.random_state,
**self.fit_params,
Comment thread
vmcru marked this conversation as resolved.
Outdated
)
ica.fit(raw_filtered)
ica.save(ica_path)
return ica

def process(
self, raw: mne.io.RawArray, raw_path: Union[str, Path]
) -> mne.io.RawArray:
"""Process raw data with ICA, computing or loading from cache."""

ica_path = self.get_ica_path(raw_path)

if not ica_path.exists():
ica = self.compute_ica(raw, ica_path)
else:
ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR")
Comment thread
vmcru marked this conversation as resolved.
Outdated

# Create a copy of the raw data before applying ICA
raw_ica = raw.copy()
ica.apply(raw_ica)

return raw_ica
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def process(
self, raw: mne.io.RawArray, raw_path: Union[str, Path]
) -> mne.io.RawArray:
"""Process raw data with ICA, computing or loading from cache."""
ica_path = self.get_ica_path(raw_path)
if not ica_path.exists():
ica = self.compute_ica(raw, ica_path)
else:
ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR")
# Create a copy of the raw data before applying ICA
raw_ica = raw.copy()
ica.apply(raw_ica)
return raw_ica
@property
def dynamic_item(self):
@takes("raw", "fpath")
@provides("raw", "ica_path")
def process(
raw: mne.io.RawArray, fpath: Union[str, Path]
):
"""Process raw data with ICA, computing or loading from cache."""
ica_path = self.get_ica_path(fpath)
if not ica_path.exists():
ica = self.compute_ica(raw, ica_path)
else:
ica = mne.preprocessing.read_ica(ica_path, verbose="ERROR")
# Create a copy of the raw data before applying ICA
raw_ica = raw.copy()
ica.apply(raw_ica)
yield raw_ica
yield ica_path
return process

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

function inside one function is not super nice

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is necessary though for it to work with the @takes and @provides decorators.

151 changes: 151 additions & 0 deletions benchmarks/MOABB/validate_ica.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""File for testing ICA computation and application for EEG data.
Comment thread
vmcru marked this conversation as resolved.
Outdated
Authors
-------
Victor Cruz, 2025
"""
import time
import mne
import moabb
from moabb.datasets import BNCI2014_001
from memory_profiler import profile

from dataio.datasets import EpochedEEGDataset, InMemoryDataset
from dataio.ica import ICAProcessor

# Set up logging
mne.set_log_level(verbose=False)
moabb.set_log_level(level="ERROR")


def test_ica_method(method: str, n_components: int = 15, **kwargs):
"""Test a specific ICA method and return timing results."""
print(f"\nTesting ICA method: {method}")
ica_processor = ICAProcessor(
n_components=n_components, method=method, **kwargs
)

dataset = EpochedEEGDataset.from_moabb(
BNCI2014_001(),
f"data/MNE-BIDS-bnci2014-001-epoched-{method}.json",
save_path="data",
tmin=0,
tmax=4.0,
preload=True,
output_keys=["label", "subject", "session", "epoch"],
ica_processor=ica_processor,
)

# First run - ICA computation
print("First run (computing ICA):")
start = time.time()
for _ in dataset:
pass
computation_time = time.time() - start
print(f"Time with {method} ICA (first run): {computation_time:.2f}s")

# Second run - using cached ICA
print("\nSecond run (using cached ICA):")
start = time.time()
for _ in dataset:
pass
cached_time = time.time() - start
print(f"Time with {method} ICA (cached): {cached_time:.2f}s")

# Memory-cached version
print("\nTesting with InMemoryDataset wrapper:")
dataset_cached = InMemoryDataset(dataset)
start = time.time()
for _ in dataset_cached:
pass
memory_cached_time = time.time() - start
print(
f"Time with {method} ICA (in-memory cache): {memory_cached_time:.2f}s"
)

return {
"method": method,
"computation_time": computation_time,
"cached_time": cached_time,
"memory_cached_time": memory_cached_time,
}


def compare_ica_methods():
# Test without ICA first as baseline
print("\nTesting without ICA (baseline):")
dataset_no_ica = EpochedEEGDataset.from_moabb(
BNCI2014_001(),
"data/MNE-BIDS-bnci2014-001-epoched.json",
save_path="data",
tmin=0,
tmax=4.0,
output_keys=["label", "subject", "session", "epoch"],
)

start = time.time()
for _ in dataset_no_ica:
pass
baseline_time = time.time() - start
print(f"Time without ICA: {baseline_time:.2f}s")

# Test different ICA methods
results = []

# Test Picard
results.append(
test_ica_method("picard", n_components=15, fit_params={"max_iter": 500})
)

# Test Infomax
results.append(
test_ica_method(
"infomax", n_components=15, fit_params={"max_iter": 1000}
)
)

# Print comparison
print("\nComparison Summary:")
print("-" * 50)
print(f"Baseline (no ICA): {baseline_time:.2f}s")
print("-" * 50)
for result in results:
print(f"Method: {result['method']}")
print(f" Computation time: {result['computation_time']:.2f}s")
print(f" Cached access time: {result['cached_time']:.2f}s")
print(f" In-memory cached time: {result['memory_cached_time']:.2f}s")
print("-" * 50)


@profile
def profile_memory_usage():
# Profile memory usage for both methods
for method in ["picard", "infomax"]:
print(f"\nProfiling {method} ICA:")
ica_processor = ICAProcessor(
n_components=15,
method=method,
fit_params={"max_iter": 500}
if method == "picard"
else {"max_iter": 1000},
)
dataset = EpochedEEGDataset.from_moabb(
BNCI2014_001(),
f"data/MNE-BIDS-bnci2014-001-epoched-{method}.json",
save_path="data",
tmin=0,
tmax=4.0,
preload=True,
output_keys=["label", "subject", "session", "epoch"],
ica_processor=ica_processor,
)

for _ in dataset:
pass


if __name__ == "__main__":
print("Running ICA method comparison...")
compare_ica_methods()

print("\nRunning memory profile...")
profile_memory_usage()