Skip to content

Commit 5bd3826

Browse files
committed
Undo separate handling of autogen trees in uncertainty
Earlier attempts tried to fit the autogenerated trees into this uncertainty framework by retrieving information from the tree nodes about the number of training reactions and leave-one-out variance. This caused major problems when trying to compute covariance matrices because you had to choose between using the BM node variance or the uncertainty framework's Delta lnk = 0.5 assumption. The new plan is to compute an empirical covariance matrix of the BM nodes and integrate this into a new, more generalized framework. But this separate handling of the trees has to be undone first to validate that the two frameworks produce identical results.
1 parent 2b516ee commit 5bd3826

2 files changed

Lines changed: 17 additions & 33 deletions

File tree

rmgpy/tools/uncertainty.py

Lines changed: 16 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -198,39 +198,23 @@ def get_uncertainty_value(self, source):
198198
varlnk += self.dlnk_family * self.dlnk_family
199199

200200
N = len(rule_weights) + len(training_weights)
201-
if 'node_std_dev' in source_dict:
202-
# Handle autogen BM trees
203-
if source_dict['node_std_dev'] < 0:
204-
raise ValueError('Invalid value for std dev of kinetics family rule node')
205-
varlnk += np.float_power(source_dict['node_std_dev'], 2.0)
206-
if source_dict['node_n_train'] is None:
207-
raise ValueError('Invalid number of training reactions for kinetics family rule node')
208-
N = source_dict['node_n_train']
209-
210-
# Technically every lookup in the autogenerated trees is an "exact" match because
211-
# every node template has its own fitted rate rule by definition, but here we use the
212-
# number of training reactions as an approximation of the node's specificity/generality
213-
# and add a penalty for being too general (large # of training reactions)
214-
varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
215-
else:
216-
# Handle hand-made trees
217-
if not exact:
218-
# nonexactness contribution increases as N increases
219-
varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
201+
if not exact:
202+
# nonexactness contribution increases as N increases
203+
varlnk += (np.log10(N + 1) * self.dlnk_nonexact) * (np.log10(N + 1) * self.dlnk_nonexact)
220204

221-
if 'surface' in family_label.lower():
222-
varlnk += np.sum([weight * weight * self.dlnk_surf_rule * self.dlnk_surf_rule for weight in rule_weights])
223-
varlnk += np.sum([weight * weight * self.dlnk_surf_training * self.dlnk_surf_training for weight in training_weights])
224-
else:
225-
# Add the contributions from rules
226-
varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in rule_weights])
227-
# Add the contributions from training
228-
# Even though these source from training reactions, we actually
229-
# use the uncertainty for rate rules, since these are now approximations
230-
# of the original reaction. We consider these to be independent of original the training
231-
# parameters because the rate rules may be reversing the training reactions,
232-
# which leads to more complicated dependence
233-
varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in training_weights])
205+
if 'surface' in family_label.lower():
206+
varlnk += np.sum([weight * weight * self.dlnk_surf_rule * self.dlnk_surf_rule for weight in rule_weights])
207+
varlnk += np.sum([weight * weight * self.dlnk_surf_training * self.dlnk_surf_training for weight in training_weights])
208+
else:
209+
# Add the contributions from rules
210+
varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in rule_weights])
211+
# Add the contributions from training
212+
# Even though these source from training reactions, we actually
213+
# use the uncertainty for rate rules, since these are now approximations
214+
# of the original reaction. We consider these to be independent of original the training
215+
# parameters because the rate rules may be reversing the training reactions,
216+
# which leads to more complicated dependence
217+
varlnk += np.sum([weight * weight * self.dlnk_rule * self.dlnk_rule for weight in training_weights])
234218

235219
return np.sqrt(varlnk)
236220

test/rmgpy/tools/uncertaintyTest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def test_uncertainty_assignment(self):
174174
)
175175
np.testing.assert_allclose(
176176
kinetic_unc,
177-
[0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 5.9369, 5.9369, 0.5],
177+
[0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 1.5363, 1.5363, 0.5],
178178
rtol=1e-4
179179
)
180180

0 commit comments

Comments
 (0)