Skip to content

Commit 6ebc632

Browse files
committed
uncertainty: separate family and non-exact error
The previous implementation combined family and non-exact error into a total estimation error. But this requires assuming that family and non-exact error are perfectly correlated. I don't think that was the paper's original intention, nor do I think it makes sense, but more importantly the log10(N+1) doesn't play nicely with the new uncertainty formulation if you write it this way. If you look at the local analysis example in uncertaintyTest.py, you'll see that the correlated variance goes from about 3 to 1.8, which makes sense because we are no longer stacking non-exact on top of family, but considering them as unrelated.
1 parent 95a6e27 commit 6ebc632

2 files changed

Lines changed: 32 additions & 24 deletions

File tree

rmgpy/tools/uncertainty.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non
223223
Obtain the partial uncertainty dlnk/dlnk_corr*dlnk_corr, where dlnk_corr is the correlated parameter
224224
225225
`corr_param` is the parameter identifier itself, which is the string identifier of the rate rule
226-
`corr_source_type` is a string, being either 'Rate Rules', 'Library', 'PDep', 'Training' or 'Estimation'
226+
`corr_source_type` is a string, being either 'Rate Rules', 'Library', 'PDep', 'Training', 'Estimation Nonexact', or 'Estimation Family'
227227
`corr_family` is a string used only when the source type is 'Rate Rules' and indicates the family
228228
"""
229229

@@ -275,24 +275,22 @@ def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=Non
275275
else:
276276
return self.dlnk_training
277277

278-
elif corr_source_type == 'Estimation':
279-
# Return all the uncorrelated uncertainty associated with using an estimation scheme
280278

279+
elif corr_source_type == 'Estimation Nonexact':
280+
# Return the uncorrelated uncertainty associated with using a non-exact rate rule
281281
if 'Rate Rules' in source:
282282
source_dict = source['Rate Rules'][1]
283283
exact = source_dict['exact']
284-
285-
family_label = source['Rate Rules'][0]
286-
dlnk = self.dlnk_family # Base uncorrelated uncertainty just from using rate rule estimation
287-
288-
# Additional uncertainty from using non-exact rate rule
289284
N = len(source_dict['rules']) + len(source_dict['training'])
290285
if not exact:
291286
# nonexactness contribution increases as N increases
292-
dlnk += np.log10(N + 1) * self.dlnk_nonexact
293-
return dlnk
287+
return np.log10(N + 1) * self.dlnk_nonexact
288+
elif corr_source_type == 'Estimation Family':
289+
# Return the uncertainty associated with using a family decision tree
290+
if 'Rate Rules' in source:
291+
return self.dlnk_family
294292
else:
295-
raise Exception('Kinetics correlated source must be Rate Rules, Library, PDep, Training, or Estimation')
293+
raise Exception('Kinetics correlated source must be Rate Rules, Library, PDep, Training, Estimation Nonexact, or Estimation Family')
296294

297295
# If we get here, it means that we did not find the correlated parameter in the source
298296
return None
@@ -729,11 +727,16 @@ def assign_parameter_uncertainties(self, g_param_engine=None, k_param_engine=Non
729727
label = '{} {}'.format(family, ruleEntry)
730728
dlnk[label] = dplnk
731729

732-
# There is also estimation error if rate rules are used
733-
est_dplnk = k_param_engine.get_partial_uncertainty_value(source, 'Estimation')
734-
if est_dplnk:
735-
label = 'Estimation {}'.format(reaction.to_chemkin(self.species_list, kinetics=False))
736-
dlnk[label] = est_dplnk
730+
# There is also estimation error if rate rules are used (nonexact and family contribute to this)
731+
nonexact_dplnk = k_param_engine.get_partial_uncertainty_value(source, 'Estimation Nonexact', corr_family=family)
732+
if nonexact_dplnk:
733+
label = 'Estimation Nonexact {}'.format(reaction.to_chemkin(self.species_list, kinetics=False))
734+
dlnk[label] = nonexact_dplnk
735+
736+
family_dplnk = k_param_engine.get_partial_uncertainty_value(source, 'Estimation Family', corr_family=family)
737+
if family_dplnk:
738+
label = 'Estimation Family {}'.format(reaction.to_chemkin(self.species_list, kinetics=False))
739+
dlnk[label] = family_dplnk
737740

738741
elif 'PDep' in source:
739742
dplnk = k_param_engine.get_partial_uncertainty_value(source, 'PDep', source['PDep'])

test/rmgpy/tools/uncertaintyTest.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ def test_local_analysis(self):
188188
expected_uncorrelated_total_variance = 1.8329056941266446
189189
expected_uncorrelated_thermo_variances = np.array([0.09781627, 0.00041446, 9.953e-05, 0.06186124, 0.17092419, 0.04856985, 0.00306632, 0.00391013])
190190
expected_uncorrelated_kinetics_variances = np.array([0.00080811, 0.01687337, 0.02022449, 0.15888459, 0.085189, 0.01588366, 0.00012957, 0.01605351, 1.1311145, 0.0010829])
191-
expected_correlated_total_variance = 3.006637059881831
191+
expected_correlated_total_variance = 1.7732795017083922
192192
expected_correlated_thermo_variances = np.array([0.04573167, 0.00014685, 3.263e-05, 0.03236887, 0.07672388, 0.0013764, 0.0001338, 0.00143231, 0.00028968, 0.01098087, 0.00031352, 0.01747643, 0.04856985, 0.09145902])
193-
expected_correlated_kinetics_variances = np.array([0.00011471, 0.11981721, 0.15516713, 2.0212174, 0.28939964, 0.00193506, 0.02838292, 0.0161796, 0.0040449, 0.00136045, 0.00168253, 0.00253735, 0.00253735, 0.00136045, 0.01687337, 0.01605351, 0.00080811, 0.00012957])
193+
expected_correlated_kinetics_variances = np.array([0.00011471, 0.11981721, 0.02176716, 0.47926886, 0.04059757, 0.00045884, 0.00673013, 0.0161796, 0.06070094, 0.53202843, 0.11321232, 0.00050935, 0.007471, 0.0040449, 0.00136045, 0.00168253, 0.00253735, 0.00253735, 0.00136045, 0.01687337, 0.01605351, 0.00080811, 0.00012957])
194194
expected_uncorrelated_thermo_labels = {
195195
'dln[C2H6(18)]/dG[C2H3(20)]',
196196
'dln[C2H6(18)]/dG[C2H4(11)]',
@@ -232,12 +232,17 @@ def test_local_analysis(self):
232232
expected_correlated_kinetics_labels = {
233233
'Disproportionation Root_Ext-1R!H-R_N-4R->O_N-Sp-5R!H=1R!H_Ext-4CHNS-R_N-6R!H->S_4CHNS->C_N-Sp-6BrBrBrCCCClClClFFFIIINNNOOOPPPSiSiSi#4C_6BrCClFINOPSi->C_N-1R!H-inRing_N-Sp-6C-4C',
234234
'Disproportionation Root_Ext-2R!H-R_2R!H->C_4R->C',
235-
'Estimation C2H3(20)+C3H8(19)<=>C2H4(11)+PC3H7(15)',
236-
'Estimation C2H5(12)+CH3CHCH3(21)<=>C2H6(18)+C3H6(22)',
237-
'Estimation C2H6(18)+PC3H7(15)<=>C2H5(12)+C3H8(19)',
238-
'Estimation C3H5(24)+CH2CH2CH2(17)<=>C3H5(23)+C3H6(22)',
239-
'Estimation CH3(14)+C3H8(19)<=>CH4(16)+PC3H7(15)',
240-
'Estimation CH3(14)+PC3H7(15)<=>CH4(16)+CH2CH2CH2(17)',
235+
'Estimation Family C2H3(20)+C3H8(19)<=>C2H4(11)+PC3H7(15)',
236+
'Estimation Family C2H5(12)+CH3CHCH3(21)<=>C2H6(18)+C3H6(22)',
237+
'Estimation Family C2H6(18)+PC3H7(15)<=>C2H5(12)+C3H8(19)',
238+
'Estimation Family C3H5(24)+CH2CH2CH2(17)<=>C3H5(23)+C3H6(22)',
239+
'Estimation Family CH3(14)+C3H8(19)<=>CH4(16)+PC3H7(15)',
240+
'Estimation Family CH3(14)+PC3H7(15)<=>CH4(16)+CH2CH2CH2(17)',
241+
'Estimation Nonexact C2H3(20)+C3H8(19)<=>C2H4(11)+PC3H7(15)',
242+
'Estimation Nonexact C2H5(12)+CH3CHCH3(21)<=>C2H6(18)+C3H6(22)',
243+
'Estimation Nonexact C2H6(18)+PC3H7(15)<=>C2H5(12)+C3H8(19)',
244+
'Estimation Nonexact C3H5(24)+CH2CH2CH2(17)<=>C3H5(23)+C3H6(22)',
245+
'Estimation Nonexact CH3(14)+C3H8(19)<=>CH4(16)+PC3H7(15)',
241246
'H_Abstraction C/H3/Cs;C_methyl',
242247
'H_Abstraction C/H3/Cs\\H2\\Cs|O;Cd_Cd\\H2_rad/Cs',
243248
'H_Abstraction C/H3/Cs\\H2\\O;C_methyl',

0 commit comments

Comments
 (0)