From 69f7f5c45d8cb129e053070bf8694c4df2555e76 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Fri, 13 Feb 2026 13:24:13 +0100 Subject: [PATCH 1/7] Add missing value testing level determination --- alphaquant/cluster/cluster_missingval.py | 56 ++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index f1d2763a..3f4f7c07 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -8,6 +8,44 @@ PVALUE_THRESHOLD_FOR_INTENSITY_BASED_COUNTING = 0.1 +# Determines at which level missing value testing is performed. +# Set once based on tree structure, then reused. +MISSINGVAL_TEST_LEVEL = None + + +def determine_missingval_test_level(root_node): + """Determine the appropriate level for missing value statistical testing. + + Scenarios: + 1) "mod_seq_charge" exists in tree -> test at mod_seq_charge level + 2) "mod_seq" is one level above leaves -> test at base ion level + 3) "seq" is one level above leaves -> test at base ion level + 4) "gene" is one level above leaves -> test at base ion level + """ + global MISSINGVAL_TEST_LEVEL + + # Check if mod_seq_charge nodes exist (fragment-level data) + mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") + if len(mod_seq_charge_nodes) > 0: + MISSINGVAL_TEST_LEVEL = "mod_seq_charge" + return + + # For all other cases, check what's one level above leaves + leaf_parent_type = root_node.leaves[0].parent.type + + if leaf_parent_type == "mod_seq": + # Scenario 2: charged peptides without fragments + MISSINGVAL_TEST_LEVEL = "base" + elif leaf_parent_type == "seq": + # Scenario 3: peptides without charge info + MISSINGVAL_TEST_LEVEL = "base" + elif leaf_parent_type == "gene": + # Scenario 4: simplest hierarchy, leaves directly under gene + MISSINGVAL_TEST_LEVEL = "base" + else: + raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. " + f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.") + def create_protnode_from_missingval_ions(gene_name,diffions, normed_c1, normed_c2): return MissingValProtNodeCreator(gene_name, diffions, normed_c1, normed_c2).prot_node @@ -76,11 +114,21 @@ def _assign_properties_to_missingval_base_ions(self, root_node): @staticmethod - def _get_nodes_to_test(root_node): #get the nodes in the lowest level that is relevant for the binomial test - if root_node.leaves[0].parent.type == "mod_seq": #when AlphaQuant works with precursors only (not fragments), the precursors themselves are the "base ions" and the "mod_seq_charge" node does not exist - return root_node.children - else: + def _get_nodes_to_test(root_node): + """Get the nodes at which to perform the missing value statistical test. + + Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure. + """ + global MISSINGVAL_TEST_LEVEL + + # Set the test level if not already determined + if MISSINGVAL_TEST_LEVEL is None: + determine_missingval_test_level(root_node) + + if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") + else: # "base" + return root_node.leaves def _propagate_properties_to_nodes_to_test(self,nodes_to_test): #goes through each node to test and merges the properties from it's base to the node itself From 989e3e783ec04e98391aa8ab328b2653d7520f8e Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Fri, 13 Feb 2026 13:24:51 +0100 Subject: [PATCH 2/7] add fragpipe config that includes charge state --- alphaquant/config/quant_reader_config.yaml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/alphaquant/config/quant_reader_config.yaml b/alphaquant/config/quant_reader_config.yaml index 0b253cda..050149b6 100644 --- a/alphaquant/config/quant_reader_config.yaml +++ b/alphaquant/config/quant_reader_config.yaml @@ -1342,8 +1342,25 @@ diaumpire_precursor_ms1: ion_cols: - Peptide Key +fragpipe_precursor: + format: widetable + quant_pre_or_suffix: " Intensity" + protein_cols: + - Protein + ion_hierarchy: + sequence_int: + order: [SEQ, MOD, CHARGE] + mapping: + SEQ: + - Peptide Sequence + MOD: + - Modified Sequence + CHARGE: + - Charge + use_iontree: False + ml_level: SEQ -fragpipe_precursors: +fragpipe_modseq: format: widetable quant_pre_or_suffix: " Intensity" protein_cols: @@ -1358,3 +1375,6 @@ fragpipe_precursors: - Modified Sequence use_iontree: False ml_level: SEQ + + + From 3254cffe6c836211a3d9a41f533e519313d3a5c5 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 25 May 2026 11:32:15 +0200 Subject: [PATCH 3/7] Remove unused quant_reader_config_lightweight.yaml File has no references anywhere in the codebase. Co-Authored-By: Claude Sonnet 4.6 --- .../quant_reader_config_lightweight.yaml | 481 ------------------ 1 file changed, 481 deletions(-) delete mode 100644 alphaquant/config/quant_reader_config_lightweight.yaml diff --git a/alphaquant/config/quant_reader_config_lightweight.yaml b/alphaquant/config/quant_reader_config_lightweight.yaml deleted file mode 100644 index 74704faa..00000000 --- a/alphaquant/config/quant_reader_config_lightweight.yaml +++ /dev/null @@ -1,481 +0,0 @@ ---- -#this file determines the parameters used to convert long format tables as e.g. produced by Spectronaut or DIA-NN into a wide table format -alphadia_precursor_protein: - format: longtable - sample_ID: run - quant_ID: - precursor: mean_overlapping_intensity - protein_cols: - - pg_master - ion_hierarchy: - precursor: - order: [SEQ, MOD, CHARGE] - mapping: - SEQ: - - sequence - MOD: - - mods - CHARGE: - - charge - use_iontree: True - ml_level: CHARGE - filters: - protein_qval: - param: pg_qval - comparator: "<=" - value: 0.01 - - -alphapept_peptides: - format: longtable - sample_ID: shortname - quant_ID: - precursor_intensity: int_sum - protein_cols: - - protein_group - ion_hierarchy: - precursor_intensity: - order: [SEQ, CHARGE] - mapping: - SEQ: - - naked_sequence - CHARGE: - - charge - - use_iontree: False - -maxquant_peptides: - format: widetable - quant_pre_or_suffix: "Intensity " - protein_cols: - - Gene names - ion_cols: - - Sequence - ion_hierarchy: - sequence_int: - order: [SEQ, MOD] - mapping: - SEQ: - - Sequence - MOD: - - Mass - filters: - reverse: - param: Reverse - comparator: "!=" - value: "+" - contaminant: - param: Potential contaminant - comparator: "!=" - value: "+" - amino_acid: - param: Amino acid before - comparator: "!=" - value: "XYZ" - ml_level: SEQ - use_iontree: False - - -maxquant_evidence: - format: longtable - sample_ID: Experiment #Raw file - quant_ID: Intensity - protein_cols: - - Gene names - ion_cols: - - Modified sequence - - Charge - - -diann_fragion_ms1_corrected: - format: longtable - sample_ID: Run - quant_ID: - fragion: Fragment.Quant.Corrected - #Fragment.Quant.Raw - ms1iso: Ms1.Area - protein_cols: - - Genes - split_cols: - Fragment.Quant.Corrected: ";" - ion_hierarchy: - fragion: - order: [SEQ, MOD, CHARGE, FRGION] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - FRGION: - - Fragment.Quant.Corrected - ms1iso: - order: [SEQ, MOD, CHARGE, MS1ISOTOPES] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - MS1ISOTOPES: - - Precursor.Charge - use_iontree: True - ml_level: CHARGE - filters: - protein_qval: - param: Lib.PG.Q.Value - comparator: "<=" - value: 0.01 - - -diann_precursor_fragion_ms1: - format: longtable - sample_ID: Run - quant_ID: - fragion: Fragment.Quant.Raw - ms1iso: Ms1.Area - precursor: Precursor.Normalised - protein_cols: - - Genes - split_cols: - Fragment.Quant.Raw: ";" - ion_hierarchy: - fragion: - order: [SEQ, MOD, CHARGE, FRGION] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - FRGION: - - Fragment.Quant.Raw - ms1iso: - order: [SEQ, MOD, CHARGE, MS1ISOTOPES] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - MS1ISOTOPES: - - Precursor.Charge - precursor: - order: [SEQ, MOD, CHARGE, PRECURSOR] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - PRECURSOR: - - Precursor.Charge - use_iontree: True - ml_level: CHARGE - filters: - protein_qval: - param: Lib.PG.Q.Value - comparator: "<=" - value: 0.01 - - -diann_precursor: - format: longtable - sample_ID: Run - quant_ID: - precursor: Precursor.Normalised - protein_cols: - - Genes - ion_hierarchy: - precursor: - order: [SEQ, MOD, CHARGE] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CHARGE: - - Precursor.Charge - use_iontree: True - ml_level: CHARGE - filters: - protein_qval: - param: Lib.PG.Q.Value - comparator: "<=" - value: 0.01 - - - -spectronaut_ptm_fragion: - format: longtable - sample_ID: R.Label - quant_ID: - fragion: F.PeakArea - protein_cols: - - ptm_id - ion_cols: - - FG.Id - - F.FrgIon - - F.FrgLossType - - F.Charge - ion_hierarchy: - fragion: - order: [SEQ, MOD, CHARGE, FRGION] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - ptm_mapped_modseq - CHARGE: - - FG.Charge - FRGION: - - F.FrgIon - - F.FrgLossType - - F.Charge - filters: - fragion_intensity: - param: F.PeakArea - comparator: ">" - value: 5.0 - use_iontree: True - ml_level: CHARGE - annotation_columns: - - PEP.StrippedSequence - - -spectronaut_fragion_ms1_gene: - format: longtable - sample_ID: R.Label - quant_ID: - fragion: F.PeakArea - ms1iso: FG.MS1IsotopeIntensities (Measured) - protein_cols: - - PG.Genes - ion_cols: - - FG.Id - - F.FrgIon - - F.FrgLossType - - F.Charge - split_cols: - FG.MS1IsotopeIntensities (Measured): ";" - ion_hierarchy: - fragion: - order: [SEQ, MOD, CHARGE, FRGION] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - FRGION: - - F.FrgIon - - F.FrgLossType - - F.Charge - ms1iso: - order: [SEQ, MOD, CHARGE, MS1ISOTOPES] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - MS1ISOTOPES: - - FG.MS1IsotopeIntensities (Measured) - filters: - fragion_intensity: - param: F.PeakArea - comparator: ">" - value: 5.0 - gene_unique: - param: PEP.IsGeneSpecific - comparator: "==" - value: "True" - use_iontree: True - ml_level: CHARGE - - -spectronaut_fragion_ms1_gene: - format: longtable - sample_ID: R.Label - quant_ID: - fragion: F.PeakArea - ms1iso: FG.MS1IsotopeIntensities (Measured) - protein_cols: - - PG.Genes - ion_cols: - - FG.Id - - F.FrgIon - - F.FrgLossType - - F.Charge - split_cols: - FG.MS1IsotopeIntensities (Measured): ";" - ion_hierarchy: - fragion: - order: [SEQ, MOD, CHARGE, FRGION] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - FRGION: - - F.FrgIon - - F.FrgLossType - - F.Charge - ms1iso: - order: [SEQ, MOD, CHARGE, MS1ISOTOPES] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - MS1ISOTOPES: - - FG.MS1IsotopeIntensities (Measured) - filters: - fragion_intensity: - param: F.PeakArea - comparator: ">" - value: 5.0 - use_iontree: True - ml_level: CHARGE - - -spectronaut_precursor_gene: - format: longtable - sample_ID: R.Label - quant_ID: - precursor: FG.Quantity - protein_cols: - - PG.Genes - ion_hierarchy: - precursor: - order: [SEQ, MOD, CHARGE] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - filters: - gene_unique: - param: PEP.IsGeneSpecific - comparator: "==" - value: "True" - use_iontree: True - ml_level: CHARGE - - -spectronaut_precursor_gene: - format: longtable - sample_ID: R.Label - quant_ID: - precursor: FG.Quantity - protein_cols: - - PG.Genes - ion_hierarchy: - precursor: - order: [SEQ, MOD, CHARGE] - mapping: - SEQ: - - PEP.StrippedSequence - MOD: - - EG.ModifiedSequence - CHARGE: - - FG.Charge - use_iontree: True - ml_level: CHARGE - - -openswath_precursor_aligned: - format: longtable - sample_ID: run_id - quant_ID: Intensity - protein_cols: - - ProteinName - ion_cols: - - peptide_group_label - filters: - decoy: - param: decoy - comparator: "==" - value: 0 - -openswath_pyprophet: - format: longtable - sample_ID: filename - quant_ID: Intensity - protein_cols: - - ProteinName - ion_cols: - - FullPeptideName - - Charge - filters: - decoy: - param: decoy - comparator: "==" - value: 0 - -skyline_precursor: - format: longtable - sample_ID: ReplicateName - quant_ID: TotalAreaFragment - protein_cols: - - ProteinName - ion_cols: - - PeptideModifiedSequence - - PrecursorCharge - -diaumpire_precursor_ms1: - format: widetable - protein_cols: - - Proteins - ion_cols: - - Peptide Key - - -diann_wideformat: - format: widetable - protein_cols: - - Protein.Group - ion_cols: - - Stripped.Sequence - - Modified.Sequence - - Precursor.Charge - ion_hierarchy: - sequence_int: - order: [SEQ, MOD] - mapping: - SEQ: - - Stripped.Sequence - MOD: - - Modified.Sequence - CH: - - Precursor.Charge - ml_level: SEQ - use_iontree: False - -fragpipe_precursors: - format: widetable - quant_pre_or_suffix: " Intensity" - protein_cols: - - Protein - ion_hierarchy: - sequence_int: - order: [SEQ, MOD] - mapping: - SEQ: - - Peptide Sequence - MOD: - - Modified Sequence - use_iontree: False From 31630caaac1f7f0ce7e94ce1d63e3c1c8dded37f Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 25 May 2026 11:32:28 +0200 Subject: [PATCH 4/7] Make determine_missingval_test_level private Not part of the public API; only called internally from _get_nodes_to_test. Co-Authored-By: Claude Sonnet 4.6 --- alphaquant/cluster/cluster_missingval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index 3f4f7c07..a3979bc3 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -13,7 +13,7 @@ MISSINGVAL_TEST_LEVEL = None -def determine_missingval_test_level(root_node): +def _determine_missingval_test_level(root_node): """Determine the appropriate level for missing value statistical testing. Scenarios: @@ -123,7 +123,7 @@ def _get_nodes_to_test(root_node): # Set the test level if not already determined if MISSINGVAL_TEST_LEVEL is None: - determine_missingval_test_level(root_node) + _determine_missingval_test_level(root_node) if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") From 2c0b9691c463295a4212ebf8670e82c6295afd2b Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 25 May 2026 11:32:39 +0200 Subject: [PATCH 5/7] Add Scenario 1 comment to _determine_missingval_test_level Scenario 1 was missing an inline comment explaining why mod_seq_charge is the test level when fragment data is present. Co-Authored-By: Claude Sonnet 4.6 --- alphaquant/cluster/cluster_missingval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index a3979bc3..223b902f 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -27,6 +27,7 @@ def _determine_missingval_test_level(root_node): # Check if mod_seq_charge nodes exist (fragment-level data) mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") if len(mod_seq_charge_nodes) > 0: + # Scenario 1: fragment-level data — everything below mod_seq_charge is collapsed to mod_seq_charge as the lowest identification level MISSINGVAL_TEST_LEVEL = "mod_seq_charge" return From ea4d7de660135d92096153caa129c12e7297bf6d Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 25 May 2026 11:33:05 +0200 Subject: [PATCH 6/7] Eliminate global mutation in _determine_missingval_test_level Return the level value instead of setting the global inside the function; assign at the call site in _get_nodes_to_test. Co-Authored-By: Claude Sonnet 4.6 --- alphaquant/cluster/cluster_missingval.py | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index 223b902f..983d788b 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -15,34 +15,31 @@ def _determine_missingval_test_level(root_node): """Determine the appropriate level for missing value statistical testing. - + Scenarios: 1) "mod_seq_charge" exists in tree -> test at mod_seq_charge level 2) "mod_seq" is one level above leaves -> test at base ion level - 3) "seq" is one level above leaves -> test at base ion level + 3) "seq" is one level above leaves -> test at base ion level 4) "gene" is one level above leaves -> test at base ion level """ - global MISSINGVAL_TEST_LEVEL - # Check if mod_seq_charge nodes exist (fragment-level data) mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") if len(mod_seq_charge_nodes) > 0: # Scenario 1: fragment-level data — everything below mod_seq_charge is collapsed to mod_seq_charge as the lowest identification level - MISSINGVAL_TEST_LEVEL = "mod_seq_charge" - return - + return "mod_seq_charge" + # For all other cases, check what's one level above leaves leaf_parent_type = root_node.leaves[0].parent.type - + if leaf_parent_type == "mod_seq": # Scenario 2: charged peptides without fragments - MISSINGVAL_TEST_LEVEL = "base" + return "base" elif leaf_parent_type == "seq": # Scenario 3: peptides without charge info - MISSINGVAL_TEST_LEVEL = "base" + return "base" elif leaf_parent_type == "gene": # Scenario 4: simplest hierarchy, leaves directly under gene - MISSINGVAL_TEST_LEVEL = "base" + return "base" else: raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. " f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.") @@ -117,15 +114,15 @@ def _assign_properties_to_missingval_base_ions(self, root_node): @staticmethod def _get_nodes_to_test(root_node): """Get the nodes at which to perform the missing value statistical test. - + Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure. """ global MISSINGVAL_TEST_LEVEL - + # Set the test level if not already determined if MISSINGVAL_TEST_LEVEL is None: - _determine_missingval_test_level(root_node) - + MISSINGVAL_TEST_LEVEL = _determine_missingval_test_level(root_node) + if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") else: # "base" From 05acc41199d5e95171e42d653671cfe4c9bef75a Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Mon, 25 May 2026 11:33:15 +0200 Subject: [PATCH 7/7] Clarify why root_node.leaves is returned in the base case In short trees (no fragment data), the leaves are the precursors themselves, making base-level testing the correct granularity. Co-Authored-By: Claude Sonnet 4.6 --- alphaquant/cluster/cluster_missingval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index 983d788b..6dfa12b4 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -126,6 +126,7 @@ def _get_nodes_to_test(root_node): if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") else: # "base" + # In short trees (no fragments), leaves are the precursors themselves — the right level to test return root_node.leaves