From 40be9a669713b3436a61d49854bbf49713be53c6 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Thu, 25 Sep 2025 16:56:06 +0200 Subject: [PATCH 1/3] add defaults to psm_reader.yaml for newer spectronaut version & fix hardcoded charge column name in _pre_process() to instead use the configuration yaml --- alphabase/constants/const_files/psm_reader.yaml | 10 +++++----- alphabase/psm_reader/dia_psm_reader.py | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml index b7cd8ca2..23339ade 100644 --- a/alphabase/constants/const_files/psm_reader.yaml +++ b/alphabase/constants/const_files/psm_reader.yaml @@ -221,7 +221,7 @@ spectronaut_report: 'proteins': ['PG.ProteinNames','PG.ProteinGroups'] 'genes': 'PG.Genes' 'uniprot_ids': 'PG.UniProtIds' - 'charge': 'charge' + 'charge': ['charge'] mod_seq_columns: - 'ModifiedSequence' precursor_id_columns: @@ -233,14 +233,14 @@ spectronaut: rt_unit: irt fixed_C57: False column_mapping: - 'raw_name': 'ReferenceRun' + 'raw_name': ['ReferenceRun', 'R.Label'] 'sequence': ['StrippedPeptide','PeptideSequence'] - 'charge': 'PrecursorCharge' + 'charge': ['PrecursorCharge', 'FG.Charge'] 'rt': ['RT','iRT','Tr_recalibrated','RetentionTime','NormalizedRetentionTime'] 'ccs': 'CCS' 'precursor_mz': 'PrecursorMz' 'mobility': ['Mobility','IonMobility','PrecursorIonMobility'] - 'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups'] + 'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups','PG.ProteinGroups'] 'uniprot_ids': ['UniProtIds','UniProtID','UniprotId'] 'genes': ['Genes','Gene','GeneName','GeneNames'] mod_seq_columns: @@ -249,7 +249,7 @@ spectronaut: - 'FullUniModPeptideName' - 'ModifiedPeptideSequence' - 'LabeledSequence' - - 'FullUniModPeptideName' + - "EG.ModifiedSequence" precursor_id_columns: - "EG.PrecursorId" modification_mapping_type: 'maxquant' diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 3e6e8bae..6f6b4d51 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -2,6 +2,7 @@ from typing import List, Optional +from pathlib import Path import numpy as np import pandas as pd @@ -9,6 +10,8 @@ from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader from alphabase.psm_reader.psm_reader import psm_reader_provider +from alphabase.constants._const import CONST_FILE_FOLDER, PSM_READER_YAML_FILE_NAME +from alphabase.yaml_utils import load_yaml class SpectronautReader(ModifiedSequenceReader): """Reader for Spectronaut's output library TSV/CSV.""" @@ -19,12 +22,22 @@ class SpectronautReader(ModifiedSequenceReader): def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut-specific preprocessing of output data.""" + + # Obtain matching charge columns from the psm_reader.yaml + available_charge_columns = load_yaml(Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME)[self._reader_type]["column_mapping"]["charge"] + + self.precursor_charge_column = "PrecursorCharge" + for charge_col in available_charge_columns: + if charge_col in df.columns: + self.precursor_charge_column = charge_col + break + if "ReferenceRun" in df.columns: df.drop_duplicates( - ["ReferenceRun", self.mod_seq_column, "PrecursorCharge"], inplace=True + ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], inplace=True ) else: - df.drop_duplicates([self.mod_seq_column, "PrecursorCharge"], inplace=True) + df.drop_duplicates([self.mod_seq_column, self.precursor_charge_column], inplace=True) df.reset_index(drop=True, inplace=True) return df From ccb94e164c996d4ef872989ce8fe4cb362f6a8a2 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Thu, 25 Sep 2025 17:30:25 +0200 Subject: [PATCH 2/3] reformat --- alphabase/psm_reader/dia_psm_reader.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 6f6b4d51..734e0582 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -1,16 +1,16 @@ """Readers for Spectronaut's output library and reports, Swath data and DIANN data.""" +from pathlib import Path from typing import List, Optional -from pathlib import Path import numpy as np import pandas as pd +from alphabase.constants._const import CONST_FILE_FOLDER, PSM_READER_YAML_FILE_NAME from alphabase.psm_reader.keys import PsmDfCols from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader from alphabase.psm_reader.psm_reader import psm_reader_provider -from alphabase.constants._const import CONST_FILE_FOLDER, PSM_READER_YAML_FILE_NAME from alphabase.yaml_utils import load_yaml class SpectronautReader(ModifiedSequenceReader): @@ -24,7 +24,9 @@ def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut-specific preprocessing of output data.""" # Obtain matching charge columns from the psm_reader.yaml - available_charge_columns = load_yaml(Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME)[self._reader_type]["column_mapping"]["charge"] + available_charge_columns = load_yaml( + Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME + )[self._reader_type]["column_mapping"]["charge"] self.precursor_charge_column = "PrecursorCharge" for charge_col in available_charge_columns: @@ -34,10 +36,13 @@ def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: if "ReferenceRun" in df.columns: df.drop_duplicates( - ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], inplace=True + ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], + inplace=True ) else: - df.drop_duplicates([self.mod_seq_column, self.precursor_charge_column], inplace=True) + df.drop_duplicates( + [self.mod_seq_column, self.precursor_charge_column], inplace=True + ) df.reset_index(drop=True, inplace=True) return df From 34f41f976be3c6c6daed01dab237a24a799f4c26 Mon Sep 17 00:00:00 2001 From: Vincenth Brennsteiner Date: Thu, 25 Sep 2025 17:35:53 +0200 Subject: [PATCH 3/3] correct formatting --- alphabase/psm_reader/dia_psm_reader.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 734e0582..3c3b5f33 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -10,9 +10,9 @@ from alphabase.psm_reader.keys import PsmDfCols from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader from alphabase.psm_reader.psm_reader import psm_reader_provider - from alphabase.yaml_utils import load_yaml + class SpectronautReader(ModifiedSequenceReader): """Reader for Spectronaut's output library TSV/CSV.""" @@ -22,13 +22,12 @@ class SpectronautReader(ModifiedSequenceReader): def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut-specific preprocessing of output data.""" - # Obtain matching charge columns from the psm_reader.yaml available_charge_columns = load_yaml( Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME )[self._reader_type]["column_mapping"]["charge"] - - self.precursor_charge_column = "PrecursorCharge" + + self.precursor_charge_column = "PrecursorCharge" for charge_col in available_charge_columns: if charge_col in df.columns: self.precursor_charge_column = charge_col @@ -36,8 +35,8 @@ def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: if "ReferenceRun" in df.columns: df.drop_duplicates( - ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], - inplace=True + ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], + inplace=True, ) else: df.drop_duplicates(