diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml index b7cd8ca2..23339ade 100644 --- a/alphabase/constants/const_files/psm_reader.yaml +++ b/alphabase/constants/const_files/psm_reader.yaml @@ -221,7 +221,7 @@ spectronaut_report: 'proteins': ['PG.ProteinNames','PG.ProteinGroups'] 'genes': 'PG.Genes' 'uniprot_ids': 'PG.UniProtIds' - 'charge': 'charge' + 'charge': ['charge'] mod_seq_columns: - 'ModifiedSequence' precursor_id_columns: @@ -233,14 +233,14 @@ spectronaut: rt_unit: irt fixed_C57: False column_mapping: - 'raw_name': 'ReferenceRun' + 'raw_name': ['ReferenceRun', 'R.Label'] 'sequence': ['StrippedPeptide','PeptideSequence'] - 'charge': 'PrecursorCharge' + 'charge': ['PrecursorCharge', 'FG.Charge'] 'rt': ['RT','iRT','Tr_recalibrated','RetentionTime','NormalizedRetentionTime'] 'ccs': 'CCS' 'precursor_mz': 'PrecursorMz' 'mobility': ['Mobility','IonMobility','PrecursorIonMobility'] - 'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups'] + 'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups','PG.ProteinGroups'] 'uniprot_ids': ['UniProtIds','UniProtID','UniprotId'] 'genes': ['Genes','Gene','GeneName','GeneNames'] mod_seq_columns: @@ -249,7 +249,7 @@ spectronaut: - 'FullUniModPeptideName' - 'ModifiedPeptideSequence' - 'LabeledSequence' - - 'FullUniModPeptideName' + - "EG.ModifiedSequence" precursor_id_columns: - "EG.PrecursorId" modification_mapping_type: 'maxquant' diff --git a/alphabase/psm_reader/dia_psm_reader.py b/alphabase/psm_reader/dia_psm_reader.py index 3e6e8bae..3c3b5f33 100644 --- a/alphabase/psm_reader/dia_psm_reader.py +++ b/alphabase/psm_reader/dia_psm_reader.py @@ -1,13 +1,16 @@ """Readers for Spectronaut's output library and reports, Swath data and DIANN data.""" +from pathlib import Path from typing import List, Optional import numpy as np import pandas as pd +from alphabase.constants._const import CONST_FILE_FOLDER, PSM_READER_YAML_FILE_NAME from alphabase.psm_reader.keys import PsmDfCols from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader from alphabase.psm_reader.psm_reader import psm_reader_provider +from alphabase.yaml_utils import load_yaml class SpectronautReader(ModifiedSequenceReader): @@ -19,12 +22,26 @@ class SpectronautReader(ModifiedSequenceReader): def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut-specific preprocessing of output data.""" + # Obtain matching charge columns from the psm_reader.yaml + available_charge_columns = load_yaml( + Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME + )[self._reader_type]["column_mapping"]["charge"] + + self.precursor_charge_column = "PrecursorCharge" + for charge_col in available_charge_columns: + if charge_col in df.columns: + self.precursor_charge_column = charge_col + break + if "ReferenceRun" in df.columns: df.drop_duplicates( - ["ReferenceRun", self.mod_seq_column, "PrecursorCharge"], inplace=True + ["ReferenceRun", self.mod_seq_column, self.precursor_charge_column], + inplace=True, ) else: - df.drop_duplicates([self.mod_seq_column, "PrecursorCharge"], inplace=True) + df.drop_duplicates( + [self.mod_seq_column, self.precursor_charge_column], inplace=True + ) df.reset_index(drop=True, inplace=True) return df