Undo separate handling of autogen trees in uncertainty

sevyharris · sevyharris · commit 5bd3826131b5 · 2026-05-08T09:31:27.000-04:00
Earlier attempts tried to fit the autogenerated trees into this uncertainty framework by retrieving information from the tree nodes about the number of training reactions and leave-one-out variance. This caused major problems when trying to compute covariance matrices because you had to choose between using the BM node variance or the uncertainty framework's Delta lnk = 0.5 assumption. The new plan is to compute an empirical covariance matrix of the BM nodes and integrate this into a new, more generalized framework. But this separate handling of the trees has to be undone first to validate that the two frameworks produce identical results.
diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py
@@ -198,39 +198,23 @@ def get_uncertainty_value(self, source):
             varlnk += self.dlnk_family * self.dlnk_family
 
             N = len(rule_weights) + len(training_weights)
-            if 'node_std_dev' in source_dict:
-                # Handle autogen BM trees
-                if source_dict['node_std_dev'] < 0:
-                    raise ValueError('Invalid value for std dev of kinetics family rule node')
-                varlnk += np.float_power(source_dict['node_std_dev'], 2.0)
-                if source_dict['node_n_train'] is None:
-                    raise ValueError('Invalid number of training reactions for kinetics family rule node')
-                N = source_dict['node_n_train']
-
-                # Technically every lookup in the autogenerated trees is an "exact" match because
-                # every node template has its own fitted rate rule by definition, but here we use the
-                # number of training reactions as an approximation of the node's specificity/generality
-                # and add a penalty for being too general (large # of training reactions)
-                varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
-            else:
-                # Handle hand-made trees
-                if not exact:
-                    # nonexactness contribution increases as N increases
-                    varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
+            if not exact:
+                # nonexactness contribution increases as N increases
+                varlnk += (np.log10(N + 1) * self.dlnk_nonexact) * (np.log10(N + 1) * self.dlnk_nonexact)
 
-                if 'surface' in family_label.lower():
-                    varlnk += np.sum([weight * weight * self.dlnk_surf_rule * self.dlnk_surf_rule for weight in rule_weights])
-                    varlnk += np.sum([weight * weight * self.dlnk_surf_training * self.dlnk_surf_training for weight in training_weights])
-                else:
-                    # Add the contributions from rules
-                    varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in rule_weights])
-                    # Add the contributions from training
-                    # Even though these source from training reactions, we actually
-                    # use the uncertainty for rate rules, since these are now approximations
-                    # of the original reaction.  We consider these to be independent of original the training
-                    # parameters because the rate rules may be reversing the training reactions,
-                    # which leads to more complicated dependence
-                    varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in training_weights])
+            if 'surface' in family_label.lower():
+                varlnk += np.sum([weight * weight * self.dlnk_surf_rule * self.dlnk_surf_rule for weight in rule_weights])
+                varlnk += np.sum([weight * weight * self.dlnk_surf_training * self.dlnk_surf_training for weight in training_weights])
+            else:
+                # Add the contributions from rules
+                varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in rule_weights])
+                # Add the contributions from training
+                # Even though these source from training reactions, we actually
+                # use the uncertainty for rate rules, since these are now approximations
+                # of the original reaction. We consider these to be independent of original the training
+                # parameters because the rate rules may be reversing the training reactions,
+                # which leads to more complicated dependence
+                varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in training_weights])
 
         return np.sqrt(varlnk)
 
diff --git a/test/rmgpy/tools/uncertaintyTest.py b/test/rmgpy/tools/uncertaintyTest.py
@@ -174,7 +174,7 @@ def test_uncertainty_assignment(self):
         )
         np.testing.assert_allclose(
             kinetic_unc,
-            [0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 5.9369, 5.9369, 0.5],
+            [0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 1.5363, 1.5363, 0.5],
             rtol=1e-4
         )
 

Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,7 @@ def test_uncertainty_assignment(self):`
`174`	`174`	`)`
`175`	`175`	`np.testing.assert_allclose(`
`176`	`176`	`kinetic_unc,`
`177`		`- [0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 5.9369, 5.9369, 0.5],`
	`177`	`+ [0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 1.5363, 1.5363, 0.5],`
`178`	`178`	`rtol=1e-4`
`179`	`179`	`)`
`180`	`180`