Skip to content

Commit 0af5659

Browse files
author
Gereon Elvers
committed
Quiet chunked preprocess: tqdm bar instead of N×3 step prints
A 1-hour Armeni session at the default 120 s chunks runs the pipeline ~30 times, and the per-step prints ("Applied notch / bp / Downsampled") fire once per chunk — 90+ identical lines that look indistinguishable from a stuck job in Colab. Now we redirect stdout while running each chunk and show a single tqdm bar that ticks per chunk. mne warnings still surface on stderr.
1 parent feed613 commit 0af5659

2 files changed

Lines changed: 63 additions & 18 deletions

File tree

pnpl/datasets/armeni2022/dataset.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -454,30 +454,55 @@ def _preprocess_raw_to_h5(
454454
duration = float(raw_lazy.times[-1])
455455
boundaries = _chunk_boundaries(duration, chunk_seconds)
456456

457+
# Each chunk runs the same notch + bp + ds steps and the
458+
# pipeline prints a 3-line "Applied / Applied / Downsampled"
459+
# banner per chunk. With ~30 chunks for a 1-hour Armeni
460+
# session that's 90+ identical lines. Hide them behind a
461+
# single tqdm progress bar.
462+
from contextlib import redirect_stdout
463+
import io
464+
465+
try:
466+
from tqdm.auto import tqdm # type: ignore
467+
468+
pbar = tqdm(
469+
total=len(boundaries),
470+
desc=f"Preprocessing {self.preprocessing}",
471+
unit="chunk",
472+
leave=True,
473+
)
474+
except Exception:
475+
pbar = None
476+
457477
processed_chunks: list = []
458478
for start, end in boundaries:
459479
chunk = raw_lazy.copy().crop(tmin=start, tmax=end)
460480
chunk.load_data(verbose=False)
461481
pipeline = Pipeline.from_string(
462482
self.preprocessing, config=resolved.config
463483
)
464-
chunk = pipeline.run(
465-
chunk,
466-
subject=subject,
467-
session=session,
468-
task=task,
469-
run=run,
470-
bids_root=self.data_path,
471-
verbose=False,
472-
)
484+
with redirect_stdout(io.StringIO()):
485+
chunk = pipeline.run(
486+
chunk,
487+
subject=subject,
488+
session=session,
489+
task=task,
490+
run=run,
491+
bids_root=self.data_path,
492+
verbose=False,
493+
)
473494
# Filters are done; downcast each chunk to float32 to halve
474495
# the memory cost of holding all processed chunks before
475496
# concatenation. fif_to_h5 saves float32 anyway, so this is
476497
# lossless relative to the on-disk format.
477498
if chunk._data is not None and chunk._data.dtype != np.float32:
478499
chunk._data = chunk._data.astype(np.float32, copy=False)
479500
processed_chunks.append(chunk)
501+
if pbar is not None:
502+
pbar.update(1)
480503
gc.collect()
504+
if pbar is not None:
505+
pbar.close()
481506

482507
if len(processed_chunks) == 1:
483508
raw = processed_chunks[0]

pnpl/datasets/schoffelen2019/dataset.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -412,26 +412,46 @@ def _preprocess_raw_to_h5(
412412
duration = float(raw_lazy.times[-1])
413413
boundaries = _chunk_boundaries(duration, chunk_seconds)
414414

415+
from contextlib import redirect_stdout
416+
import io
417+
418+
try:
419+
from tqdm.auto import tqdm # type: ignore
420+
421+
pbar = tqdm(
422+
total=len(boundaries),
423+
desc=f"Preprocessing {self.preprocessing}",
424+
unit="chunk",
425+
leave=True,
426+
)
427+
except Exception:
428+
pbar = None
429+
415430
processed_chunks: list = []
416431
for start, end in boundaries:
417432
chunk = raw_lazy.copy().crop(tmin=start, tmax=end)
418433
chunk.load_data(verbose=False)
419434
pipeline = Pipeline.from_string(
420435
self.preprocessing, config=resolved.config
421436
)
422-
chunk = pipeline.run(
423-
chunk,
424-
subject=subject,
425-
session=session,
426-
task=task,
427-
run=run,
428-
bids_root=self.data_path,
429-
verbose=False,
430-
)
437+
with redirect_stdout(io.StringIO()):
438+
chunk = pipeline.run(
439+
chunk,
440+
subject=subject,
441+
session=session,
442+
task=task,
443+
run=run,
444+
bids_root=self.data_path,
445+
verbose=False,
446+
)
431447
if chunk._data is not None and chunk._data.dtype != np.float32:
432448
chunk._data = chunk._data.astype(np.float32, copy=False)
433449
processed_chunks.append(chunk)
450+
if pbar is not None:
451+
pbar.update(1)
434452
gc.collect()
453+
if pbar is not None:
454+
pbar.close()
435455

436456
if len(processed_chunks) == 1:
437457
raw = processed_chunks[0]

0 commit comments

Comments
 (0)