Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ spectronaut_report:
'proteins': ['PG.ProteinNames','PG.ProteinGroups']
'genes': 'PG.Genes'
'uniprot_ids': 'PG.UniProtIds'
'charge': 'charge'
'charge': ['charge']
mod_seq_columns:
- 'ModifiedSequence'
precursor_id_columns:
Expand All @@ -233,14 +233,14 @@ spectronaut:
rt_unit: irt
fixed_C57: False
column_mapping:
'raw_name': 'ReferenceRun'
'raw_name': ['ReferenceRun', 'R.Label']
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also here: please indicate the version (for those columns you know by heart right now ;-))

'sequence': ['StrippedPeptide','PeptideSequence']
'charge': 'PrecursorCharge'
'charge': ['PrecursorCharge', 'FG.Charge']
'rt': ['RT','iRT','Tr_recalibrated','RetentionTime','NormalizedRetentionTime']
'ccs': 'CCS'
'precursor_mz': 'PrecursorMz'
'mobility': ['Mobility','IonMobility','PrecursorIonMobility']
'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups']
'proteins': ['Protein Name','ProteinId','ProteinID','ProteinName','ProteinGroup','ProteinGroups','PG.ProteinGroups']
'uniprot_ids': ['UniProtIds','UniProtID','UniprotId']
'genes': ['Genes','Gene','GeneName','GeneNames']
mod_seq_columns:
Expand All @@ -249,7 +249,7 @@ spectronaut:
- 'FullUniModPeptideName'
- 'ModifiedPeptideSequence'
- 'LabeledSequence'
- 'FullUniModPeptideName'
- "EG.ModifiedSequence"
precursor_id_columns:
- "EG.PrecursorId"
modification_mapping_type: 'maxquant'
Expand Down
21 changes: 19 additions & 2 deletions alphabase/psm_reader/dia_psm_reader.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
"""Readers for Spectronaut's output library and reports, Swath data and DIANN data."""

from pathlib import Path
from typing import List, Optional

import numpy as np
import pandas as pd

from alphabase.constants._const import CONST_FILE_FOLDER, PSM_READER_YAML_FILE_NAME
from alphabase.psm_reader.keys import PsmDfCols
from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader
from alphabase.psm_reader.psm_reader import psm_reader_provider
from alphabase.yaml_utils import load_yaml


class SpectronautReader(ModifiedSequenceReader):
Expand All @@ -19,12 +22,26 @@ class SpectronautReader(ModifiedSequenceReader):

def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame:
"""Spectronaut-specific preprocessing of output data."""
# Obtain matching charge columns from the psm_reader.yaml
available_charge_columns = load_yaml(
Path(CONST_FILE_FOLDER) / PSM_READER_YAML_FILE_NAME
)[self._reader_type]["column_mapping"]["charge"]

self.precursor_charge_column = "PrecursorCharge"
for charge_col in available_charge_columns:
if charge_col in df.columns:
self.precursor_charge_column = charge_col
break
Comment on lines +26 to +34
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this extra logic required? isn't this covered by _translate_columns?


if "ReferenceRun" in df.columns:
df.drop_duplicates(
["ReferenceRun", self.mod_seq_column, "PrecursorCharge"], inplace=True
["ReferenceRun", self.mod_seq_column, self.precursor_charge_column],
inplace=True,
)
else:
df.drop_duplicates([self.mod_seq_column, "PrecursorCharge"], inplace=True)
df.drop_duplicates(
[self.mod_seq_column, self.precursor_charge_column], inplace=True
)
df.reset_index(drop=True, inplace=True)

return df
Expand Down
Loading