From 8e3dec5abcb6586ec1432cd5c93258dc3cb96f31 Mon Sep 17 00:00:00 2001 From: MaximeBICMTL Date: Sat, 16 May 2026 06:35:55 +0000 Subject: [PATCH 1/2] bids tsv typing --- python/lib/physio/hed.py | 4 +-- .../src/loris_bids_importer/events.py | 11 +++---- .../validation/subjects.py | 30 +++++++++---------- .../loris_bids_reader/files/participants.py | 23 ++++++++++---- .../src/loris_bids_reader/files/scans.py | 21 +++++++------ .../src/loris_bids_reader/tsv.py | 12 ++++++-- 6 files changed, 59 insertions(+), 42 deletions(-) diff --git a/python/lib/physio/hed.py b/python/lib/physio/hed.py index fe4926d3b..96c4fd1c2 100644 --- a/python/lib/physio/hed.py +++ b/python/lib/physio/hed.py @@ -121,7 +121,7 @@ def build_hed_tag_groups(hed_union: Sequence[DbHedSchemaNode], hed_string: str) return tag_groups -def standardize_row_columns(row: dict[str, str | None]) -> dict[str, str | None]: +def standardize_row_columns(row: dict[str, str | None]) -> dict[str, str]: """ Standardizes LORIS-recognized events.tsv columns to their DB column name @@ -130,7 +130,7 @@ def standardize_row_columns(row: dict[str, str | None]) -> dict[str, str | None] :return: Standardized row """ - standardized_row: dict[str, Any] = {} + standardized_row: dict[str, str] = {} recognized_event_fields = [ 'Onset', 'Duration', 'TrialType', 'ResponseTime', 'EventCode', diff --git a/python/loris_bids_importer/src/loris_bids_importer/events.py b/python/loris_bids_importer/src/loris_bids_importer/events.py index 646323aa3..b8884d05d 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/events.py +++ b/python/loris_bids_importer/src/loris_bids_importer/events.py @@ -116,9 +116,9 @@ def insert_bids_events_file( for row in events_file.rows: # has additional fields? additional_fields: dict[str, str] = {} - for field in row.data: - if field not in known_fields and str(row.data[field]).lower() != 'nan': - additional_fields[field] = row.data[field] + for field, value in row.data.items(): + if field not in known_fields and value is not None and value.lower() != 'nan': + additional_fields[field] = value # insert one event and get its db id task_event = insert_physio_task_event( @@ -137,8 +137,9 @@ def insert_bids_events_file( # Insert HED tags after filtering out inherited tags from events.json, so that they are # not "duplicated" - if row.data.get('HED') is not None and len(row.data['HED']) > 0 and row.data['HED'] != 'n/a': - tag_groups = build_hed_tag_groups(hed_union, row.data['HED']) + hed = row.data.get('HED') + if hed is not None and len(hed) > 0 and hed != 'n/a': + tag_groups = build_hed_tag_groups(hed_union, hed) tag_groups_without_inherited = filter_inherited_tags( row.data, tag_groups, dataset_tag_dict, file_tag_dict ) diff --git a/python/loris_bids_importer/src/loris_bids_importer/validation/subjects.py b/python/loris_bids_importer/src/loris_bids_importer/validation/subjects.py index 0dba27423..275fc5bdf 100644 --- a/python/loris_bids_importer/src/loris_bids_importer/validation/subjects.py +++ b/python/loris_bids_importer/src/loris_bids_importer/validation/subjects.py @@ -131,24 +131,24 @@ def get_bids_participant_row_sex(env: Env, participant: BidsParticipantTsvRow) - Raise an exception if a sex is specified but does not exist in LORIS. """ - if 'sex' not in participant.data: + if participant.sex is None: return None - tsv_participant_sex = participant.data['sex'].lower() + participant_sex = participant.sex.lower() - if tsv_participant_sex in ['m', 'male']: + if participant_sex in ['m', 'male']: sex_name = 'Male' - elif tsv_participant_sex in ['f', 'female']: + elif participant_sex in ['f', 'female']: sex_name = 'Female' - elif tsv_participant_sex in ['o', 'other']: + elif participant_sex in ['o', 'other']: sex_name = 'Other' else: - sex_name = participant.data['sex'] + sex_name = participant.sex sex = try_get_sex_with_name(env.db, sex_name) if sex is None: raise Exception( - f"No LORIS sex found for the BIDS participants.tsv sex name or alias '{participant.data['sex']}'." + f"No LORIS sex found for the BIDS participants.tsv sex name or alias '{participant.sex}'." ) return sex.name @@ -160,22 +160,22 @@ def get_bids_participant_row_site(env: Env, participant: BidsParticipantTsvRow) specified or does not exist in LORIS. """ - if 'site' not in participant.data: + if participant.site is None: raise Exception( "No 'site' column found in the BIDS participants.tsv file, this field is required to create candidates or" " sessions. " ) - site = try_get_site_with_name(env.db, participant.data['site']) + site = try_get_site_with_name(env.db, participant.site) if site is not None: return site - site = try_get_site_with_alias(env.db, participant.data['site']) + site = try_get_site_with_alias(env.db, participant.site) if site is not None: return site raise Exception( - f"No site found for the BIDS participants.tsv site name or alias '{participant.data['site']}'." + f"No site found for the BIDS participants.tsv site name or alias '{participant.site}'." ) @@ -185,20 +185,20 @@ def get_bids_participant_row_project(env: Env, participant: BidsParticipantTsvRo specified or does not exist in LORIS. """ - if 'project' not in participant.data: + if participant.project is None: raise Exception( "No 'project' column found in the BIDS participants.tsv file, this field is required to create candidates" " or sessions. " ) - project = try_get_project_with_name(env.db, participant.data['project']) + project = try_get_project_with_name(env.db, participant.project) if project is not None: return project - project = try_get_project_with_alias(env.db, participant.data['project']) + project = try_get_project_with_alias(env.db, participant.project) if project is not None: return project raise Exception( - f"No project found for the BIDS participants.tsv project name or alias '{participant.data['project']}'." + f"No project found for the BIDS participants.tsv project name or alias '{participant.project}'." ) diff --git a/python/loris_bids_reader/src/loris_bids_reader/files/participants.py b/python/loris_bids_reader/src/loris_bids_reader/files/participants.py index 0683bcccc..f29556232 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/files/participants.py +++ b/python/loris_bids_reader/src/loris_bids_reader/files/participants.py @@ -16,14 +16,24 @@ class BidsParticipantTsvRow(BidsTsvRow): """ participant_id: str - birth_date: date | None + project: str | None + site: str | None cohort: str | None + birth_date: date | None + sex: str | None - def __init__(self, data: dict[str, str]): + def __init__(self, data: dict[str, str | None]): super().__init__(data) - self.participant_id = data['participant_id'].removeprefix('sub-') - self.birth_date = self._read_birth_date() + participant_id = self.data.get('participant_id') + if participant_id is None: + raise Exception("Missing participant_id field in `participants.tsv` file.") + + self.participant_id = participant_id.removeprefix('sub-') + self.project = self.data.get('project') + self.site = self.data.get('site') self.cohort = self._read_cohort() + self.birth_date = self._read_birth_date() + self.sex = self.data.get('sex') def _read_birth_date(self) -> date | None: """ @@ -31,9 +41,10 @@ def _read_birth_date(self) -> date | None: """ for birth_date_field_name in ['date_of_birth', 'birth_date', 'dob']: - if birth_date_field_name in self.data: + birth_date_string = self.data.get(birth_date_field_name) + if birth_date_string is not None: try: - return dateutil.parser.parse(self.data[birth_date_field_name]).date() + return dateutil.parser.parse(birth_date_string).date() except ParserError: pass diff --git a/python/loris_bids_reader/src/loris_bids_reader/files/scans.py b/python/loris_bids_reader/src/loris_bids_reader/files/scans.py index b2c396adf..e0e19a892 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/files/scans.py +++ b/python/loris_bids_reader/src/loris_bids_reader/files/scans.py @@ -19,18 +19,16 @@ def get_acquisition_time(self) -> datetime | None: Get the acquisition time of the acquisition file. """ - if 'acq_time' in self.data: - # the variable name could be mri_acq_time, but is eeg originally. - eeg_acq_time = self.data['acq_time'] - - if eeg_acq_time == 'n/a': + acq_time_string = self.data.get('acq_time') + if acq_time_string is not None: + if acq_time_string == 'n/a': return None try: - eeg_acq_time = dateutil.parser.parse(eeg_acq_time) + acq_time = dateutil.parser.parse(acq_time_string) except ValueError as e: - raise Exception(f"Could not convert acquisition time {eeg_acq_time}' to datetime: {e}") - return eeg_acq_time + raise Exception(f"Could not convert acquisition time {acq_time_string}' to datetime: {e}") + return acq_time return None @@ -43,8 +41,9 @@ def get_age_at_scan(self) -> str | None: age_header_list = ['age', 'age_at_scan', 'age_acq_time'] for header_name in age_header_list: - if header_name in self.data: - return self.data[header_name].strip() + age_string = self.data.get(header_name) + if age_string is not None: + return age_string.strip() return None @@ -64,7 +63,7 @@ def get_row(self, file_path: Path) -> BidsScanTsvRow | None: Get the row corresponding to the given file path. """ - return find(self.rows, lambda row: file_path.name in row.data['filename']) + return find(self.rows, lambda row: file_path.name == row.data['filename']) def set_row(self, scan: BidsScanTsvRow): """ diff --git a/python/loris_bids_reader/src/loris_bids_reader/tsv.py b/python/loris_bids_reader/src/loris_bids_reader/tsv.py index 62897bd04..c38cf1e2b 100644 --- a/python/loris_bids_reader/src/loris_bids_reader/tsv.py +++ b/python/loris_bids_reader/src/loris_bids_reader/tsv.py @@ -1,6 +1,6 @@ import csv from pathlib import Path -from typing import Any, Generic, TypeVar +from typing import Generic, TypeVar from loris_utils.parse import nullify_empty_string @@ -11,9 +11,9 @@ class BidsTsvRow: Documentation: https://bids-specification.readthedocs.io/en/stable/common-principles.html#tabular-files """ - data: dict[str, Any] + data: dict[str, str | None] - def __init__(self, data: dict[str, Any]): + def __init__(self, data: dict[str, str | None]): self.data = data @@ -33,9 +33,15 @@ def __init__(self, model: type[T], path: Path): self.path = path self.rows = [] + # The 'utf-8-sig' encoding is used to support some datasets where metadata files may contain + # a byte-order mark (BOM). with open(self.path, encoding='utf-8-sig') as file: reader = csv.DictReader(file, delimiter='\t') for row in reader: + # Skip empty lines (such as trailing newlines). + if row == {}: + continue + row = {key: nullify_empty_string(value) for key, value in row.items()} self.rows.append(model(row)) From 229d957799dd129c1ad9de3fb9ae3ba0f5860c02 Mon Sep 17 00:00:00 2001 From: MaximeBICMTL Date: Thu, 28 May 2026 05:42:15 +0000 Subject: [PATCH 2/2] fix invalid decimal parsing --- python/loris_utils/src/loris_utils/parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/loris_utils/src/loris_utils/parse.py b/python/loris_utils/src/loris_utils/parse.py index 1fbee2dd2..3937e683c 100644 --- a/python/loris_utils/src/loris_utils/parse.py +++ b/python/loris_utils/src/loris_utils/parse.py @@ -1,4 +1,4 @@ -from decimal import Decimal +from decimal import Decimal, InvalidOperation def try_parse_decimal(string: str) -> Decimal | None: @@ -11,7 +11,7 @@ def try_parse_decimal(string: str) -> Decimal | None: try: return Decimal(string) - except ValueError: + except InvalidOperation: return None