|
8 | 8 |
|
9 | 9 | PVALUE_THRESHOLD_FOR_INTENSITY_BASED_COUNTING = 0.1 |
10 | 10 |
|
| 11 | +# Determines at which level missing value testing is performed. |
| 12 | +# Set once based on tree structure, then reused. |
| 13 | +MISSINGVAL_TEST_LEVEL = None |
| 14 | + |
| 15 | + |
| 16 | +def _determine_missingval_test_level(root_node): |
| 17 | + """Determine the appropriate level for missing value statistical testing. |
| 18 | +
|
| 19 | + Scenarios: |
| 20 | + 1) "mod_seq_charge" exists in tree -> test at mod_seq_charge level |
| 21 | + 2) "mod_seq" is one level above leaves -> test at base ion level |
| 22 | + 3) "seq" is one level above leaves -> test at base ion level |
| 23 | + 4) "gene" is one level above leaves -> test at base ion level |
| 24 | + """ |
| 25 | + # Check if mod_seq_charge nodes exist (fragment-level data) |
| 26 | + mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") |
| 27 | + if len(mod_seq_charge_nodes) > 0: |
| 28 | + # Scenario 1: fragment-level data — everything below mod_seq_charge is collapsed to mod_seq_charge as the lowest identification level |
| 29 | + return "mod_seq_charge" |
| 30 | + |
| 31 | + # For all other cases, check what's one level above leaves |
| 32 | + leaf_parent_type = root_node.leaves[0].parent.type |
| 33 | + |
| 34 | + if leaf_parent_type == "mod_seq": |
| 35 | + # Scenario 2: charged peptides without fragments |
| 36 | + return "base" |
| 37 | + elif leaf_parent_type == "seq": |
| 38 | + # Scenario 3: peptides without charge info |
| 39 | + return "base" |
| 40 | + elif leaf_parent_type == "gene": |
| 41 | + # Scenario 4: simplest hierarchy, leaves directly under gene |
| 42 | + return "base" |
| 43 | + else: |
| 44 | + raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. " |
| 45 | + f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.") |
| 46 | + |
11 | 47 | def create_protnode_from_missingval_ions(gene_name,diffions, normed_c1, normed_c2): |
12 | 48 | return MissingValProtNodeCreator(gene_name, diffions, normed_c1, normed_c2).prot_node |
13 | 49 |
|
@@ -76,11 +112,22 @@ def _assign_properties_to_missingval_base_ions(self, root_node): |
76 | 112 |
|
77 | 113 |
|
78 | 114 | @staticmethod |
79 | | - def _get_nodes_to_test(root_node): #get the nodes in the lowest level that is relevant for the binomial test |
80 | | - if root_node.leaves[0].parent.type == "mod_seq": #when AlphaQuant works with precursors only (not fragments), the precursors themselves are the "base ions" and the "mod_seq_charge" node does not exist |
81 | | - return root_node.children |
82 | | - else: |
| 115 | + def _get_nodes_to_test(root_node): |
| 116 | + """Get the nodes at which to perform the missing value statistical test. |
| 117 | +
|
| 118 | + Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure. |
| 119 | + """ |
| 120 | + global MISSINGVAL_TEST_LEVEL |
| 121 | + |
| 122 | + # Set the test level if not already determined |
| 123 | + if MISSINGVAL_TEST_LEVEL is None: |
| 124 | + MISSINGVAL_TEST_LEVEL = _determine_missingval_test_level(root_node) |
| 125 | + |
| 126 | + if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": |
83 | 127 | return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") |
| 128 | + else: # "base" |
| 129 | + # In short trees (no fragments), leaves are the precursors themselves — the right level to test |
| 130 | + return root_node.leaves |
84 | 131 |
|
85 | 132 |
|
86 | 133 | def _propagate_properties_to_nodes_to_test(self,nodes_to_test): #goes through each node to test and merges the properties from it's base to the node itself |
|
0 commit comments