Skip to content

Commit dadbbf2

Browse files
sevyharrisrwest
authored andcommitted
make uncertainties add in quadrature
1 parent e31cc99 commit dadbbf2

2 files changed

Lines changed: 46 additions & 37 deletions

File tree

rmgpy/tools/uncertainty.py

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object):
4343
This class is an engine that generates the species uncertainty based on its thermo sources.
4444
"""
4545

46-
def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.1, dG_ADS_correction=6.918, dG_surf_lib=6.918):
46+
def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.316, dG_ADS_correction=6.918, dG_surf_lib=6.918):
4747
"""
4848
Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values
4949
in units of kcal/mol.
@@ -62,22 +62,22 @@ def get_uncertainty_value(self, source):
6262
"""
6363
Retrieve the uncertainty value in kcal/mol when the source of the thermo of a species is given.
6464
"""
65-
dG = 0.0
65+
varG = 0.0
6666
if 'Library' in source:
67-
dG += self.dG_library
67+
varG += self.dG_library ** 2
6868
if 'Surface_Library' in source:
69-
dG += self.dG_surf_lib
69+
varG += self.dG_surf_lib ** 2
7070
if 'QM' in source:
71-
dG += self.dG_QM
71+
varG += self.dG_QM ** 2
7272
if 'GAV' in source:
73-
dG += self.dG_GAV # Add a fixed uncertainty for the GAV method
73+
varG += self.dG_GAV ** 2 # Add a fixed uncertainty for the GAV method
7474
for group_type, group_entries in source['GAV'].items():
7575
group_weights = [groupTuple[-1] for groupTuple in group_entries]
76-
dG += np.sum([weight * self.dG_group for weight in group_weights])
76+
varG += np.sum([weight ** 2 * self.dG_group ** 2 for weight in group_weights])
7777
if 'ADS' in source:
78-
dG += self.dG_ADS_correction # Add adsorption correction uncertainty
78+
varG += self.dG_ADS_correction ** 2 # Add adsorption correction uncertainty
7979

80-
return dG
80+
return np.sqrt(varG)
8181

8282
def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_group_type=None):
8383
"""
@@ -170,39 +170,39 @@ def get_uncertainty_value(self, source):
170170
"""
171171
Retrieve the dlnk uncertainty when the source of the reaction kinetics are given
172172
"""
173-
dlnk = 0.0
173+
varlnk = 0.0
174174
if 'Library' in source:
175175
# Should be a single library reaction source
176-
dlnk += self.dlnk_library
176+
varlnk += self.dlnk_library ** 2
177177
elif 'Surface_Library' in source:
178178
# Should be a single library reaction source
179-
dlnk += self.dlnk_surf_library
179+
varlnk += self.dlnk_surf_library ** 2
180180
elif 'PDep' in source:
181181
# Should be a single pdep reaction source
182-
dlnk += self.dlnk_pdep
182+
varlnk += self.dlnk_pdep ** 2
183183
elif 'Training' in source:
184184
# Should be a single training reaction
185185
# Although some training entries may be used in reverse,
186-
# We still consider the kinetics to be directly dependent
186+
# We still consider the kinetics to be directly dependent
187187
if 'surface' in source['Training'][0].lower():
188-
dlnk += self.dlnk_surf_training
188+
varlnk += self.dlnk_surf_training ** 2
189189
else:
190-
dlnk += self.dlnk_training
190+
varlnk += self.dlnk_training ** 2
191191
elif 'Rate Rules' in source:
192192
family_label = source['Rate Rules'][0]
193193
source_dict = source['Rate Rules'][1]
194194
exact = source_dict['exact']
195195
rule_weights = [ruleTuple[-1] for ruleTuple in source_dict['rules']]
196196
training_weights = [trainingTuple[-1] for trainingTuple in source_dict['training']]
197197

198-
dlnk += self.dlnk_family
198+
varlnk += self.dlnk_family ** 2
199199

200200
N = len(rule_weights) + len(training_weights)
201201
if 'node_std_dev' in source_dict:
202202
# Handle autogen BM trees
203203
if source_dict['node_std_dev'] < 0:
204204
raise ValueError('Invalid value for std dev of kinetics family rule node')
205-
dlnk += source_dict['node_std_dev']
205+
varlnk += np.float_power(source_dict['node_std_dev'], 2.0)
206206
if source_dict['node_n_train'] is None:
207207
raise ValueError('Invalid number of training reactions for kinetics family rule node')
208208
N = source_dict['node_n_train']
@@ -211,28 +211,28 @@ def get_uncertainty_value(self, source):
211211
# every node template has its own fitted rate rule by definition, but here we use the
212212
# number of training reactions as an approximation of the node's specificity/generality
213213
# and add a penalty for being too general (large # of training reactions)
214-
dlnk += np.log10(N + 1) * self.dlnk_nonexact
214+
varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
215215
else:
216216
# Handle hand-made trees
217217
if not exact:
218218
# nonexactness contribution increases as N increases
219-
dlnk += np.log10(N + 1) * self.dlnk_nonexact
219+
varlnk += (np.log10(N + 1) * self.dlnk_nonexact) ** 2
220220

221221
if 'surface' in family_label.lower():
222-
dlnk += np.sum([weight * self.dlnk_surf_rule for weight in rule_weights])
223-
dlnk += np.sum([weight * self.dlnk_surf_training for weight in training_weights])
222+
varlnk += np.sum([weight ** 2 * self.dlnk_surf_rule ** 2 for weight in rule_weights])
223+
varlnk += np.sum([weight ** 2 * self.dlnk_surf_training ** 2 for weight in training_weights])
224224
else:
225225
# Add the contributions from rules
226-
dlnk += np.sum([weight * self.dlnk_rule for weight in rule_weights])
226+
varlnk += np.sum([weight ** 2 * self.dlnk_rule ** 2 for weight in rule_weights])
227227
# Add the contributions from training
228228
# Even though these source from training reactions, we actually
229229
# use the uncertainty for rate rules, since these are now approximations
230230
# of the original reaction. We consider these to be independent of original the training
231231
# parameters because the rate rules may be reversing the training reactions,
232232
# which leads to more complicated dependence
233-
dlnk += np.sum([weight * self.dlnk_rule for weight in training_weights])
233+
varlnk += np.sum([weight ** 2 * self.dlnk_rule ** 2 for weight in training_weights])
234234

235-
return dlnk
235+
return np.sqrt(varlnk)
236236

237237
def get_partial_uncertainty_value(self, source, corr_source_type, corr_param=None, corr_family=None):
238238
"""
@@ -442,10 +442,16 @@ def extract_sources_from_model(self):
442442
"""
443443
self.species_sources_dict = {}
444444
self.extra_species = []
445+
allowed_source_keys = {'Library', 'QM', 'GAV', 'ADS'}
445446
for species in self.species_list:
446447
if species not in self.extra_species:
447448
source = self.database.thermo.extract_source_from_comments(species)
448-
assert source.keys() <= {'Library', 'QM', 'GAV', 'ADS'}, 'Source of thermo must be either Library, QM, GAV, or ADS'
449+
unexpected_source_keys = set(source.keys()) - allowed_source_keys
450+
if unexpected_source_keys:
451+
raise ValueError(
452+
f'Source of thermo must be either Library, QM, GAV, or ADS; '
453+
f'got unexpected source keys {unexpected_source_keys} for species {species.label}'
454+
)
449455

450456
# Now prep the source data
451457
# Do not alter the GAV information, but reassign QM and Library sources to the species indices that they came from
@@ -501,7 +507,11 @@ def extract_sources_from_model(self):
501507
elif len(source) == 3:
502508
# combination of adsorption correction, GAV (radical), and Library/ML
503509

504-
assert species.contains_surface_site(), 'only surface species should have 3 sources: adsorption correction, GAV, library/ML'
510+
if not species.contains_surface_site():
511+
raise ValueError(
512+
f'Only surface species should have 3 thermo sources (adsorption correction, GAV, and library/QM); '
513+
f'got species={species.label}, source={source}'
514+
)
505515

506516
# retrieve the desorbed version of the surface species-- the thing the adsorption correction was applied to during thermo estimation
507517
dummy_gas_species = Species()

test/rmgpy/tools/uncertaintyTest.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,12 @@ def test_uncertainty_assignment(self):
169169

170170
np.testing.assert_allclose(
171171
thermo_unc,
172-
[1.5, 1.5, 2.0, 1.9, 3.1, 1.5, 1.9, 2.0, 2.0, 1.9, 2.2, 1.9, 2.0, 1.5, 3.1, 1.9, 1.5, 2.0, 1.7, 1.8, 1.8, 1.9, 1.8, 1.9, 1.9],
172+
[1.5, 1.5, 1.7745, 1.7461, 2.1447, 1.5, 1.6879, 1.7173, 1.7745, 1.7461, 1.7745, 1.7461, 1.7745, 1.5, 2.1447, 1.6879, 1.5, 1.7745, 1.6277, 1.6581, 1.6581, 1.6879, 1.5967, 1.6277, 1.6277],
173173
rtol=1e-4,
174174
)
175175
np.testing.assert_allclose(
176176
kinetic_unc,
177-
# the 7.81 value comes from the SIDT tree node uncertainty: 5.756 + non-exact penalty for N=1: log10(1+1) * 3.5 + family uncertainty: 1.0
178-
[0.5, 1.5, 3.169924, 3.169924, 2.553605, 0.5, 2.0, 7.81, 7.81, 0.5],
177+
[0.5, 1.118, 1.9783, 1.9783, 1.5363, 0.5, 2.0, 5.9369, 5.9369, 0.5],
179178
rtol=1e-4
180179
)
181180

@@ -185,13 +184,13 @@ def test_specific_species_uncertainties(self):
185184
"""
186185

187186
expected_results = { # order is (total_uncertainty, [group_names], [group_counts])
188-
'CCCC': (1.9, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]),
189-
'CCCCCCCCCC': (2.5, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]),
190-
'CC(OO)CC': (2.1, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]),
191-
'C=NCC': (1.9, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]),
192-
'C=C': (1.7, ['Cds-CdsHH'], [2]),
193-
'C*': (10.018, ['CH3'], [1]), # Gas library + radical + adsorption correction
194-
'O=[CH]*': (8.618, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction
187+
'CCCC': (1.7460950718675086, ['Cs-CsCsHH', 'Cs-CsHHH'], [2, 2]),
188+
'CCCCCCCCCC': (3.006693865361088, ['Cs-CsCsHH', 'Cs-CsHHH'], [8, 2]),
189+
'CC(OO)CC': (1.7460950718675086, ['O2s-OsCs', 'O2s-OsH', 'Cs-CsCsOsH', 'Cs-CsCsHH', 'Cs-CsHHH'], [1, 1, 1, 1, 2]),
190+
'C=NCC': (1.6277051330016747, ['N3d-CdCs', 'Cs-(N3dCd)CsHH', 'Cs-CsHHH', 'Cd-N3dHH'], [1, 1, 1, 1]),
191+
'C=C': (1.6277051330016747, ['Cds-CdsHH'], [2]),
192+
'C*': (7.242829557569335, ['CH3'], [1]), # Gas library + radical + adsorption correction
193+
'O=[CH]*': (7.0928439994123655, ['Cds-OdHH', 'HCdsJO'], [1, 1]), # GAV + radical + adsorption correction
195194
}
196195

197196
uncertainty = rmgpy.tools.uncertainty.Uncertainty()

0 commit comments

Comments
 (0)