Skip to content

Commit 5e22dea

Browse files
authored
Merge pull request #125 from MannLabs/fix_counting_stats_w_shorter_trees
Fix counting statistics with shorter trees
2 parents 42bd262 + 07806ae commit 5e22dea

3 files changed

Lines changed: 72 additions & 486 deletions

File tree

alphaquant/cluster/cluster_missingval.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,42 @@
88

99
PVALUE_THRESHOLD_FOR_INTENSITY_BASED_COUNTING = 0.1
1010

11+
# Determines at which level missing value testing is performed.
12+
# Set once based on tree structure, then reused.
13+
MISSINGVAL_TEST_LEVEL = None
14+
15+
16+
def _determine_missingval_test_level(root_node):
17+
"""Determine the appropriate level for missing value statistical testing.
18+
19+
Scenarios:
20+
1) "mod_seq_charge" exists in tree -> test at mod_seq_charge level
21+
2) "mod_seq" is one level above leaves -> test at base ion level
22+
3) "seq" is one level above leaves -> test at base ion level
23+
4) "gene" is one level above leaves -> test at base ion level
24+
"""
25+
# Check if mod_seq_charge nodes exist (fragment-level data)
26+
mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge")
27+
if len(mod_seq_charge_nodes) > 0:
28+
# Scenario 1: fragment-level data — everything below mod_seq_charge is collapsed to mod_seq_charge as the lowest identification level
29+
return "mod_seq_charge"
30+
31+
# For all other cases, check what's one level above leaves
32+
leaf_parent_type = root_node.leaves[0].parent.type
33+
34+
if leaf_parent_type == "mod_seq":
35+
# Scenario 2: charged peptides without fragments
36+
return "base"
37+
elif leaf_parent_type == "seq":
38+
# Scenario 3: peptides without charge info
39+
return "base"
40+
elif leaf_parent_type == "gene":
41+
# Scenario 4: simplest hierarchy, leaves directly under gene
42+
return "base"
43+
else:
44+
raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. "
45+
f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.")
46+
1147
def create_protnode_from_missingval_ions(gene_name,diffions, normed_c1, normed_c2):
1248
return MissingValProtNodeCreator(gene_name, diffions, normed_c1, normed_c2).prot_node
1349

@@ -76,11 +112,22 @@ def _assign_properties_to_missingval_base_ions(self, root_node):
76112

77113

78114
@staticmethod
79-
def _get_nodes_to_test(root_node): #get the nodes in the lowest level that is relevant for the binomial test
80-
if root_node.leaves[0].parent.type == "mod_seq": #when AlphaQuant works with precursors only (not fragments), the precursors themselves are the "base ions" and the "mod_seq_charge" node does not exist
81-
return root_node.children
82-
else:
115+
def _get_nodes_to_test(root_node):
116+
"""Get the nodes at which to perform the missing value statistical test.
117+
118+
Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure.
119+
"""
120+
global MISSINGVAL_TEST_LEVEL
121+
122+
# Set the test level if not already determined
123+
if MISSINGVAL_TEST_LEVEL is None:
124+
MISSINGVAL_TEST_LEVEL = _determine_missingval_test_level(root_node)
125+
126+
if MISSINGVAL_TEST_LEVEL == "mod_seq_charge":
83127
return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge")
128+
else: # "base"
129+
# In short trees (no fragments), leaves are the precursors themselves — the right level to test
130+
return root_node.leaves
84131

85132

86133
def _propagate_properties_to_nodes_to_test(self,nodes_to_test): #goes through each node to test and merges the properties from it's base to the node itself

alphaquant/config/quant_reader_config.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,8 +1342,25 @@ diaumpire_precursor_ms1:
13421342
ion_cols:
13431343
- Peptide Key
13441344

1345+
fragpipe_precursor:
1346+
format: widetable
1347+
quant_pre_or_suffix: " Intensity"
1348+
protein_cols:
1349+
- Protein
1350+
ion_hierarchy:
1351+
sequence_int:
1352+
order: [SEQ, MOD, CHARGE]
1353+
mapping:
1354+
SEQ:
1355+
- Peptide Sequence
1356+
MOD:
1357+
- Modified Sequence
1358+
CHARGE:
1359+
- Charge
1360+
use_iontree: False
1361+
ml_level: SEQ
13451362

1346-
fragpipe_precursors:
1363+
fragpipe_modseq:
13471364
format: widetable
13481365
quant_pre_or_suffix: " Intensity"
13491366
protein_cols:
@@ -1358,3 +1375,6 @@ fragpipe_precursors:
13581375
- Modified Sequence
13591376
use_iontree: False
13601377
ml_level: SEQ
1378+
1379+
1380+

0 commit comments

Comments
 (0)