Skip to content

Commit 33e1e95

Browse files
authored
Merge pull request #2917 to change Blowers-Masel fitting algorithm (when training kinetics trees)
Fix Blowers-Masel fitting: initial guesses, negative E0 handling, and spurious fit rejection Collected and rebased fixes from David Farina and Nora Khalil (some dating to 2021) for ArrheniusBM and ArrheniusChargeTransferBM, with additional refactoring Main behavioral change is that it now reject BM fits where abs(n) > 5.0 in _make_rule, as these indicate reactions not following BM enthalpy-dependence. Other changes are mostly optimizations and simplifications and edge cases.
2 parents 9cc2cfd + d9ab313 commit 33e1e95

3 files changed

Lines changed: 195 additions & 118 deletions

File tree

rmgpy/data/kinetics/family.py

Lines changed: 88 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -3678,19 +3678,22 @@ def cross_validate(self, folds=5, template_rxn_map=None, test_rxn_inds=None, T=1
36783678

36793679
uncertainties[rxn] = self.rules.entries[entry.label][0].data.uncertainty
36803680

3681-
36823681
L = list(set(template_rxn_map[entry.label]) - set(rxns_test))
36833682

36843683
if L != []:
36853684
if isinstance(L[0].kinetics, Arrhenius):
36863685
kinetics = ArrheniusBM().fit_to_reactions(L, recipe=self.forward_recipe.actions)
3687-
if kinetics.E0.value_si < 0.0 or len(L) == 1:
3686+
# These conditions (eg. n>5) for rejecting the BM fit should correspond
3687+
# to those in _marke_rule, and those just below:
3688+
if len(L) == 1 or kinetics.E0.value_si < 0.0 or abs(kinetics.n.value_si) > 5.0:
36883689
kinetics = average_kinetics([r.kinetics for r in L])
36893690
else:
36903691
kinetics = kinetics.to_arrhenius(rxn.get_enthalpy_of_reaction(298.))
36913692
else:
36923693
kinetics = ArrheniusChargeTransferBM().fit_to_reactions(L, recipe=self.forward_recipe.actions)
3693-
if kinetics.E0.value_si < 0.0 or len(L) == 1:
3694+
# These conditions (eg. n>5) for rejecting the BM fit should correspond
3695+
# to those in _marke_rule, and those just above:
3696+
if len(L) == 1 or kinetics.E0.value_si < 0.0 or abs(kinetics.n.value_si) > 5.0:
36943697
kinetics = average_kinetics([r.kinetics for r in L])
36953698
else:
36963699
kinetics = kinetics.to_arrhenius_charge_transfer(rxn.get_enthalpy_of_reaction(298.))
@@ -4596,79 +4599,93 @@ def _make_rule(rr):
45964599
for i, rxn in enumerate(rxns):
45974600
rxn.rank = ranks[i]
45984601
rxns = np.array(rxns)
4599-
rs = np.array([r for r in rxns if type(r.kinetics) != KineticsModel])
4602+
rs = np.array([r for r in rxns if type(r.kinetics) is not KineticsModel]) # KineticsModel is the base class with no data.
46004603
n = len(rs)
4601-
if n > 0 and isinstance(rs[0].kinetics, Marcus):
4604+
if n == 0:
4605+
return None
4606+
4607+
if isinstance(rs[0].kinetics, Marcus):
46024608
kin = average_kinetics([r.kinetics for r in rs])
46034609
return kin
46044610
data_mean = np.mean(np.log([r.kinetics.get_rate_coefficient(Tref) for r in rs]))
4605-
if n > 0:
4606-
if isinstance(rs[0].kinetics, Arrhenius):
4607-
arr = ArrheniusBM
4608-
else:
4609-
arr = ArrheniusChargeTransferBM
4610-
if n > 1:
4611-
kin = arr().fit_to_reactions(rs, recipe=recipe)
4612-
if n == 1 or kin.E0.value_si < 0.0:
4613-
kin = average_kinetics([r.kinetics for r in rs])
4614-
#kin.comment = "Only one reaction or Arrhenius BM fit bad. Instead averaged from {} reactions.".format(n)
4615-
if n == 1:
4616-
kin.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=label)
4611+
4612+
if isinstance(rs[0].kinetics, Arrhenius):
4613+
arr = ArrheniusBM
4614+
else:
4615+
arr = ArrheniusChargeTransferBM
4616+
if n > 1:
4617+
kin = arr().fit_to_reactions(rs, recipe=recipe)
4618+
4619+
# If you update the following conditions, also update the cross_validate function.
4620+
if n == 1 or kin.E0.value_si < 0.0 or abs(kin.n.value_si) > 5.0:
4621+
if n > 1: #we have an ArrheniusBM
4622+
if kin.E0.value_si < 0.0:
4623+
reason = "E0<0"
4624+
elif abs(kin.n.value_si) > 5.0:
4625+
reason = "abs(n)>5"
46174626
else:
4627+
reason = "?"
4628+
4629+
# still run it through the averaging function when n=1 to standardize the units and run checks
4630+
kin = average_kinetics([r.kinetics for r in rs])
4631+
4632+
if n == 1:
4633+
kin.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=label)
4634+
else:
4635+
kin.comment = f"Blowers-Masel fit was bad ({reason}) so instead averaged from {n} reactions."
4636+
dlnks = np.array([
4637+
np.log(
4638+
average_kinetics([r.kinetics for r in np.delete(rs, i)]).get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4639+
) for i, rxn in enumerate(rs)
4640+
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
4641+
varis = (np.array([rank_accuracy_map[rxn.rank].value_si for rxn in rs]) / (2.0 * constants.R * Tref)) ** 2
4642+
# weighted average calculations
4643+
ws = 1.0 / varis
4644+
V1 = ws.sum()
4645+
V2 = (ws ** 2).sum()
4646+
mu = np.dot(ws, dlnks) / V1
4647+
s = np.sqrt(np.dot(ws, (dlnks - mu) ** 2) / (V1 - V2 / V1))
4648+
kin.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=label)
4649+
else:
4650+
if n == 1:
4651+
kin.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=label)
4652+
else:
4653+
if isinstance(rs[0].kinetics, Arrhenius):
46184654
dlnks = np.array([
46194655
np.log(
4620-
average_kinetics([r.kinetics for r in rs[list(set(range(len(rs))) - {i})]]).get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4621-
) for i, rxn in enumerate(rs)
4622-
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
4623-
varis = (np.array([rank_accuracy_map[rxn.rank].value_si for rxn in rs]) / (2.0 * 8.314 * Tref)) ** 2
4624-
# weighted average calculations
4625-
ws = 1.0 / varis
4626-
V1 = ws.sum()
4627-
V2 = (ws ** 2).sum()
4628-
mu = np.dot(ws, dlnks) / V1
4629-
s = np.sqrt(np.dot(ws, (dlnks - mu) ** 2) / (V1 - V2 / V1))
4630-
kin.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=label)
4631-
else:
4632-
if n == 1:
4633-
kin.uncertainty = RateUncertainty(mu=0.0, var=(np.log(fmax) / 2.0) ** 2, N=1, Tref=Tref, data_mean=data_mean, correlation=label)
4656+
arr().fit_to_reactions(np.delete(rs, i), recipe=recipe)
4657+
.to_arrhenius(rxn.get_enthalpy_of_reaction(Tref))
4658+
.get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4659+
) for i, rxn in enumerate(rs)
4660+
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
46344661
else:
4635-
if isinstance(rs[0].kinetics, Arrhenius):
4636-
dlnks = np.array([
4637-
np.log(
4638-
arr().fit_to_reactions(rs[list(set(range(len(rs))) - {i})], recipe=recipe)
4639-
.to_arrhenius(rxn.get_enthalpy_of_reaction(Tref))
4640-
.get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4641-
) for i, rxn in enumerate(rs)
4642-
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
4643-
else:
4644-
dlnks = np.array([
4645-
np.log(
4646-
arr().fit_to_reactions(rs[list(set(range(len(rs))) - {i})], recipe=recipe)
4647-
.to_arrhenius_charge_transfer(rxn.get_enthalpy_of_reaction(Tref))
4648-
.get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4649-
) for i, rxn in enumerate(rs)
4650-
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
4651-
varis = (np.array([rank_accuracy_map[rxn.rank].value_si for rxn in rs]) / (2.0 * 8.314 * Tref)) ** 2
4652-
# weighted average calculations
4653-
ws = 1.0 / varis
4654-
V1 = ws.sum()
4655-
V2 = (ws ** 2).sum()
4656-
mu = np.dot(ws, dlnks) / V1
4657-
s = np.sqrt(np.dot(ws, (dlnks - mu) ** 2) / (V1 - V2 / V1))
4658-
kin.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=label)
4659-
4660-
#site solute parameters
4661-
site_datas = [get_site_solute_data(rxn) for rxn in rxns]
4662-
site_datas = [sdata for sdata in site_datas if sdata is not None]
4663-
if len(site_datas) > 0:
4664-
site_data = SoluteTSData()
4665-
for sdata in site_datas:
4666-
site_data += sdata
4667-
site_data = site_data * (1.0/len(site_datas))
4668-
kin.solute = site_data
4669-
return kin
4670-
else:
4671-
return None
4662+
dlnks = np.array([
4663+
np.log(
4664+
arr().fit_to_reactions(np.delete(rs, i), recipe=recipe)
4665+
.to_arrhenius_charge_transfer(rxn.get_enthalpy_of_reaction(Tref))
4666+
.get_rate_coefficient(T=Tref) / rxn.get_rate_coefficient(T=Tref)
4667+
) for i, rxn in enumerate(rs)
4668+
]) # 1) fit to set of reactions without the current reaction (k) 2) compute log(kfit/kactual) at Tref
4669+
varis = (np.array([rank_accuracy_map[rxn.rank].value_si for rxn in rs]) / (2.0 * constants.R * Tref)) ** 2
4670+
# weighted average calculations
4671+
ws = 1.0 / varis
4672+
V1 = ws.sum()
4673+
V2 = (ws ** 2).sum()
4674+
mu = np.dot(ws, dlnks) / V1
4675+
s = np.sqrt(np.dot(ws, (dlnks - mu) ** 2) / (V1 - V2 / V1))
4676+
kin.uncertainty = RateUncertainty(mu=mu, var=s ** 2, N=n, Tref=Tref, data_mean=data_mean, correlation=label)
4677+
4678+
#site solute parameters
4679+
site_datas = [get_site_solute_data(rxn) for rxn in rxns]
4680+
site_datas = [sdata for sdata in site_datas if sdata is not None]
4681+
if len(site_datas) > 0:
4682+
site_data = SoluteTSData()
4683+
for sdata in site_datas:
4684+
site_data += sdata
4685+
site_data = site_data * (1.0/len(site_datas))
4686+
kin.solute = site_data
4687+
return kin
4688+
46724689

46734690

46744691
def _spawn_tree_process(family, template_rxn_map, obj, T, nprocs, depth, min_splitable_entry_num, min_rxns_to_spawn, extension_iter_max, extension_iter_item_cap):
@@ -4772,7 +4789,7 @@ def average_kinetics(kinetics_list):
47724789
beta=(beta,"1/m"),
47734790
wr=(wr * 0.001, "kJ/mol"),
47744791
wp=(wp * 0.001, "kJ/mol"),
4775-
comment="Averaged from {} reactions.".format(len(kinetics_list)),
4792+
comment=f"Averaged from {len(kinetics_list)} rate expressions.",
47764793
)
47774794
elif isinstance(kinetics, SurfaceChargeTransfer):
47784795
averaged_kinetics = SurfaceChargeTransfer(
@@ -4782,6 +4799,7 @@ def average_kinetics(kinetics_list):
47824799
alpha=alpha,
47834800
V0=(V0,'V'),
47844801
Ea=(Ea * 0.001, "kJ/mol"),
4802+
comment=f"Averaged from {len(kinetics_list)} rate expressions.",
47854803
)
47864804
elif isinstance(kinetics, ArrheniusChargeTransfer):
47874805
averaged_kinetics = ArrheniusChargeTransfer(
@@ -4791,6 +4809,7 @@ def average_kinetics(kinetics_list):
47914809
alpha=alpha,
47924810
V0=(V0,'V'),
47934811
Ea=(Ea * 0.001, "kJ/mol"),
4812+
comment=f"Averaged from {len(kinetics_list)} rate expressions.",
47944813
)
47954814
else:
47964815
averaged_kinetics = Arrhenius(

rmgpy/display.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,18 @@
2929

3030
"""
3131
This module contains only a display() function, which, if you are running in
32-
the IPython --pylab mode, will render things inline in pretty SVG or PNG graphics.
33-
If you are NOT running in IPython --pylab mode, it will do nothing.
32+
a Jupyter notebook or IPython session, will render things inline in pretty SVG or PNG graphics.
33+
If you are NOT running in such an environment, it will do nothing.
3434
"""
3535

3636
try:
37-
from IPython.core.interactiveshell import InteractiveShell
37+
from IPython import get_ipython
38+
_ipython = get_ipython()
3839
except ImportError:
40+
_ipython = None
41+
42+
if _ipython is not None:
43+
from IPython.display import display
44+
else:
3945
def display(obj):
4046
pass
41-
else:
42-
if InteractiveShell.initialized():
43-
from IPython.core.display import display
44-
else:
45-
def display(obj):
46-
pass

0 commit comments

Comments
 (0)