Skip to content

Commit 78e7505

Browse files
authored
Enhances Arkane QM correction matching (#858)
2 parents e782431 + 606bbe2 commit 78e7505

6 files changed

Lines changed: 282 additions & 60 deletions

File tree

arc/main.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from arc.species.converter import str_to_xyz
4242
from arc.species.species import ARCSpecies
4343
from arc.statmech.adapter import StatmechEnum
44-
from arc.statmech.arkane import check_arkane_bacs
44+
from arc.statmech.arkane import check_arkane_aec, check_arkane_bacs
4545
from arc.utils.scale import determine_scaling_factors
4646

4747

@@ -1201,13 +1201,26 @@ def check_arkane_level_of_theory(self):
12011201
"""
12021202
Check that the level of theory has AEC in Arkane.
12031203
"""
1204+
explicitly_set = self.arkane_level_of_theory is not None
12041205
if self.arkane_level_of_theory is None:
12051206
self.arkane_level_of_theory = self.composite_method if self.composite_method is not None \
12061207
else self.sp_level if self.sp_level is not None else None
12071208
if self.arkane_level_of_theory is None:
12081209
logger.warning('Could not determine a level of theory to be used for Arkane!')
1209-
elif self.bac_type is not None:
1210-
check_arkane_bacs(sp_level=self.arkane_level_of_theory, bac_type=self.bac_type, raise_error=self.compute_thermo)
1210+
else:
1211+
if explicitly_set:
1212+
source = ''
1213+
elif self.composite_method is not None:
1214+
source = ' (from composite method)'
1215+
else:
1216+
source = ' (from sp level)'
1217+
logger.info(f'Arkane level of theory:{source} {self.arkane_level_of_theory}')
1218+
if self.bac_type is not None:
1219+
check_arkane_bacs(sp_level=self.arkane_level_of_theory, bac_type=self.bac_type,
1220+
raise_error=self.compute_thermo)
1221+
else:
1222+
check_arkane_aec(sp_level=self.arkane_level_of_theory,
1223+
raise_error=self.compute_thermo)
12111224

12121225
def backup_restart(self):
12131226
"""

arc/output.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020
from arc.job.local import execute_command
2121
from arc.parser.parser import parse_1d_scan_energies, parse_e_elect, parse_ess_version, parse_opt_steps, parse_zpe_correction
2222
from arc.species.converter import xyz_to_str
23-
from arc.statmech.arkane import get_arkane_model_chemistry
23+
from arc.statmech.arkane import (
24+
AEC_SECTION_START, AEC_SECTION_END,
25+
MBAC_SECTION_START, MBAC_SECTION_END,
26+
PBAC_SECTION_START, PBAC_SECTION_END,
27+
find_best_across_files, get_qm_corrections_files,
28+
)
2429

2530

2631
logger = get_logger()
@@ -282,23 +287,31 @@ def _get_energy_corrections(arkane_level_of_theory, bac_type: Optional[str]) ->
282287
Look up the AEC (per-atom, Hartree) and BAC (per-bond, kJ/mol) values
283288
that Arkane used from the RMG database for the given level of theory.
284289
285-
Uses ``get_arkane_model_chemistry`` to find the matched key, then calls
286-
``arc/scripts/get_qm_corrections.py`` as a subprocess to parse the
287-
actual correction dicts from the RMG database.
290+
Finds the AEC and BAC keys independently via fuzzy matching in their
291+
respective database sections, then calls ``arc/scripts/get_qm_corrections.py``
292+
as a subprocess to extract the actual correction dicts.
288293
289294
Returns:
290295
(aec_dict_or_None, bac_dict_or_None)
291296
"""
292297
if arkane_level_of_theory is None:
293298
return None, None
294299
try:
295-
matched_key = get_arkane_model_chemistry(
296-
sp_level=arkane_level_of_theory,
297-
freq_scale_factor=1.0, # dummy — we only need the energy key
298-
)
299-
if matched_key is None:
300+
qm_corr_files = get_qm_corrections_files()
301+
302+
aec_key = find_best_across_files(arkane_level_of_theory, qm_corr_files,
303+
AEC_SECTION_START, AEC_SECTION_END)
304+
if aec_key is None:
300305
return None, None
301306

307+
bac_key = None
308+
if bac_type in ('p', 'm'):
309+
if bac_type == 'm':
310+
bac_start, bac_end = MBAC_SECTION_START, MBAC_SECTION_END
311+
else:
312+
bac_start, bac_end = PBAC_SECTION_START, PBAC_SECTION_END
313+
bac_key = find_best_across_files(arkane_level_of_theory, qm_corr_files, bac_start, bac_end)
314+
302315
script_path = os.path.join(ARC_PATH, 'arc', 'scripts', 'get_qm_corrections.py')
303316
rmg_env = settings.get('RMG_ENV_NAME', 'rmg_env')
304317

@@ -308,7 +321,8 @@ def _get_energy_corrections(arkane_level_of_theory, bac_type: Optional[str]) ->
308321
os.close(fd_in)
309322
os.close(fd_out)
310323
save_yaml_file(path=tmp_in, content={
311-
'matched_key': matched_key,
324+
'aec_key': aec_key,
325+
'bac_key': bac_key,
312326
'bac_type': bac_type,
313327
})
314328

arc/output_test.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,40 @@ def test_no_bac_when_type_none(self):
479479
aec, bac = _get_energy_corrections(lot, None)
480480
self.assertIsNone(bac)
481481

482+
def test_independent_aec_and_bac_keys(self):
483+
"""AEC and BAC keys should be resolved independently, not reusing the AEC key for BAC."""
484+
lot = Level(method='wb97xd', basis='def2tzvp', software='gaussian')
485+
aec_key = "LevelOfTheory(method='wb97xd',basis='def2tzvp',software='gaussian')"
486+
bac_key = "LevelOfTheory(method='wb97xd',basis='def2tzvp')" # different key (no software)
487+
488+
calls = []
489+
def mock_find_best(level, files, start, end):
490+
calls.append(start)
491+
if 'atom_energies' in start:
492+
return aec_key
493+
elif 'pbac' in start:
494+
return bac_key
495+
return None
496+
497+
with patch('arc.output.find_best_across_files', side_effect=mock_find_best), \
498+
patch('arc.output.get_qm_corrections_files', return_value=['/fake/data.py']), \
499+
patch('arc.output.execute_command', return_value=('', '')), \
500+
patch('arc.output.read_yaml_file', return_value={'aec': {'H': -0.5}, 'bac': {'C-H': -0.06}}), \
501+
patch('arc.output.save_yaml_file') as mock_save:
502+
aec, bac = _get_energy_corrections(lot, 'p')
503+
504+
# Verify both sections were searched independently
505+
self.assertTrue(any('atom_energies' in c for c in calls))
506+
self.assertTrue(any('pbac' in c for c in calls))
507+
# Verify the script received separate keys
508+
save_call = mock_save.call_args
509+
saved_content = save_call[1].get('content') or save_call[0][1]
510+
self.assertEqual(saved_content['aec_key'], aec_key)
511+
self.assertEqual(saved_content['bac_key'], bac_key)
512+
# Verify results returned
513+
self.assertIsNotNone(aec)
514+
self.assertIsNotNone(bac)
515+
482516

483517
class TestGetTsImagFreqFromFreqs(unittest.TestCase):
484518
"""Tests for _get_ts_imag_freq using spc.freqs as primary source."""

arc/scripts/get_qm_corrections.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,24 +44,24 @@ def _lot_from_string(lot_str):
4444
def main(input_path, output_path):
4545
"""Look up AEC and BAC for the given level of theory key."""
4646
params = read_yaml_file(input_path) or {}
47-
matched_key = params.get('matched_key')
4847
bac_type = params.get('bac_type')
4948

5049
result = {'aec': None, 'bac': None}
5150

52-
if not matched_key:
53-
save_yaml_file(output_path, result)
54-
return
51+
# Support both old format (single matched_key) and new format (separate aec_key/bac_key)
52+
aec_key = params.get('aec_key') or params.get('matched_key')
53+
bac_key = params.get('bac_key') or params.get('matched_key')
5554

56-
lot = _lot_from_string(matched_key)
55+
if aec_key:
56+
lot = _lot_from_string(aec_key)
57+
aec = atom_energies.get(lot)
58+
if aec is not None:
59+
result['aec'] = {str(k): float(v) for k, v in aec.items()}
5760

58-
aec = atom_energies.get(lot)
59-
if aec is not None:
60-
result['aec'] = {str(k): float(v) for k, v in aec.items()}
61-
62-
if bac_type in ('p', 'm'):
61+
if bac_key and bac_type in ('p', 'm'):
62+
bac_lot = _lot_from_string(bac_key)
6363
bac_dict = pbac if bac_type == 'p' else mbac
64-
bac = bac_dict.get(lot)
64+
bac = bac_dict.get(bac_lot)
6565
if bac is not None:
6666
result['bac'] = {str(k): float(v) for k, v in bac.items()}
6767

arc/statmech/arkane.py

Lines changed: 78 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@
2828
RMG_ENV_NAME = settings.get('RMG_ENV_NAME', 'rmg_env')
2929
logger = get_logger()
3030

31+
# Section boundary markers in the RMG quantum_corrections/data.py file.
32+
AEC_SECTION_START = "atom_energies = {"
33+
AEC_SECTION_END = "pbac = {"
34+
PBAC_SECTION_START = "pbac = {"
35+
PBAC_SECTION_END = "mbac = {"
36+
MBAC_SECTION_START = "mbac = {"
37+
MBAC_SECTION_END = "freq_dict ="
38+
FREQ_SECTION_START = "freq_dict = {"
39+
3140

3241
main_input_template = """#!/usr/bin/env python
3342
# -*- coding: utf-8 -*-
@@ -699,7 +708,7 @@ def _extract_section(file_path: str, section_start: str, section_end: Optional[s
699708

700709

701710

702-
def _get_qm_corrections_files() -> List[str]:
711+
def get_qm_corrections_files() -> List[str]:
703712
"""
704713
Return quantum corrections data.py paths from the RMG database.
705714
"""
@@ -834,7 +843,7 @@ def _format_years(years: List[Optional[int]]) -> str:
834843
return ", ".join("none" if y is None else str(y) for y in years)
835844

836845

837-
def _find_best_across_files(level: "Level",
846+
def find_best_across_files(level: "Level",
838847
qm_corr_files: List[str],
839848
section_start: str,
840849
section_end: Optional[str],
@@ -886,6 +895,10 @@ def _warn_no_match(level: "Level",
886895
f"available years: {_format_years(years)}. "
887896
f"Specify a year to select a matching entry."
888897
)
898+
else:
899+
logger.warning(
900+
f"No Arkane {label} entry found for {level.simple()} in the RMG database."
901+
)
889902

890903

891904
def _find_best_level_key_for_sp_level(level: "Level",
@@ -1015,33 +1028,28 @@ def get_arkane_model_chemistry(sp_level: 'Level',
10151028
Returns:
10161029
Optional[str]: Arkane-compatible model chemistry string.
10171030
"""
1018-
qm_corr_files = _get_qm_corrections_files()
1019-
1020-
aec_start = "atom_energies = {"
1021-
aec_end = "pbac = {"
1022-
freq_start = "freq_dict = {"
1023-
freq_end = None # freq_dict is the last section in data.py; read to EOF
1031+
qm_corr_files = get_qm_corrections_files()
10241032

10251033
# Composite methods and user-supplied freq_scale_factor both only need an AEC entry.
10261034
if sp_level.method_type == 'composite' or freq_scale_factor is not None:
1027-
best_energy = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end)
1035+
best_energy = find_best_across_files(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END)
10281036
if best_energy is None:
1029-
_warn_no_match(sp_level, qm_corr_files, aec_start, aec_end, label="AEC")
1037+
_warn_no_match(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END, label="AEC")
10301038
return None
10311039
return best_energy
10321040

10331041
# CompositeLevelOfTheory: need both energy (AEC) and frequency entries.
10341042
if freq_level is None:
10351043
raise ValueError("freq_level required when freq_scale_factor isn't provided")
10361044

1037-
best_energy = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end)
1038-
best_freq = _find_best_across_files(freq_level, qm_corr_files, freq_start, freq_end)
1045+
best_energy = find_best_across_files(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END)
1046+
best_freq = find_best_across_files(freq_level, qm_corr_files, FREQ_SECTION_START, None)
10391047

10401048
if best_energy is None or best_freq is None:
10411049
if best_energy is None:
1042-
_warn_no_match(sp_level, qm_corr_files, aec_start, aec_end, label="AEC")
1050+
_warn_no_match(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END, label="AEC")
10431051
if best_freq is None:
1044-
_warn_no_match(freq_level, qm_corr_files, freq_start, freq_end, label="frequency correction")
1052+
_warn_no_match(freq_level, qm_corr_files, FREQ_SECTION_START, None, label="frequency correction")
10451053
return None
10461054

10471055
return (
@@ -1052,6 +1060,33 @@ def get_arkane_model_chemistry(sp_level: 'Level',
10521060
)
10531061

10541062

1063+
def check_arkane_aec(sp_level: 'Level', raise_error: bool = False) -> bool:
1064+
"""
1065+
Check that Arkane has AEC for the given sp level of theory (no BAC check).
1066+
Used when bac_type is None but we still want to verify AEC availability.
1067+
1068+
Args:
1069+
sp_level (Level): Level of theory for energy.
1070+
raise_error (bool): Whether to raise an error if AEC is missing.
1071+
1072+
Returns:
1073+
bool: True if AEC is available, False otherwise.
1074+
"""
1075+
try:
1076+
qm_corr_files = get_qm_corrections_files()
1077+
except InputError as e:
1078+
logger.warning(f'Could not load Arkane quantum corrections data: {e}')
1079+
return False
1080+
best_aec_key = find_best_across_files(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END)
1081+
if best_aec_key is not None:
1082+
logger.info(f'Arkane atom energy corrections (AEC) matched for {best_aec_key} (BAC disabled)')
1083+
else:
1084+
_warn_no_match(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END, label="AEC")
1085+
if raise_error:
1086+
raise ValueError(f'Arkane has no atom energy corrections (AEC) for {_level_to_str(sp_level)}.')
1087+
return best_aec_key is not None
1088+
1089+
10551090
def check_arkane_bacs(sp_level: 'Level',
10561091
bac_type: str = 'p',
10571092
raise_error: bool = False,
@@ -1071,19 +1106,19 @@ def check_arkane_bacs(sp_level: 'Level',
10711106
Returns:
10721107
bool: True if both AECs and BACs are available, False otherwise.
10731108
"""
1074-
qm_corr_files = _get_qm_corrections_files()
1109+
try:
1110+
qm_corr_files = get_qm_corrections_files()
1111+
except InputError as e:
1112+
logger.warning(f'Could not load Arkane quantum corrections data: {e}')
1113+
return False
10751114

1076-
aec_start = "atom_energies = {"
1077-
aec_end = "pbac = {"
10781115
if bac_type.lower() == 'm':
1079-
bac_start = "mbac = {"
1080-
bac_end = "freq_dict ="
1116+
bac_start, bac_end = MBAC_SECTION_START, MBAC_SECTION_END
10811117
else:
1082-
bac_start = "pbac = {"
1083-
bac_end = "mbac = {"
1118+
bac_start, bac_end = PBAC_SECTION_START, PBAC_SECTION_END
10841119

1085-
best_aec_key = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end)
1086-
best_bac_key = _find_best_across_files(sp_level, qm_corr_files, bac_start, bac_end)
1120+
best_aec_key = find_best_across_files(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END)
1121+
best_bac_key = find_best_across_files(sp_level, qm_corr_files, bac_start, bac_end)
10871122

10881123
has_aec = best_aec_key is not None
10891124
has_bac = best_bac_key is not None
@@ -1092,7 +1127,7 @@ def check_arkane_bacs(sp_level: 'Level',
10921127
if not has_encorr:
10931128
repr_level = best_aec_key if best_aec_key is not None else _level_to_str(sp_level)
10941129
year_note = ""
1095-
aec_years = _all_available_years(sp_level, qm_corr_files, aec_start, aec_end)
1130+
aec_years = _all_available_years(sp_level, qm_corr_files, AEC_SECTION_START, AEC_SECTION_END)
10961131
bac_years = _all_available_years(sp_level, qm_corr_files, bac_start, bac_end)
10971132
if sp_level.year is not None:
10981133
year_note = (
@@ -1105,14 +1140,29 @@ def check_arkane_bacs(sp_level: 'Level',
11051140
f"available BAC years: {_format_years(bac_years)}. "
11061141
f"Specify a year to select a matching entry."
11071142
)
1108-
mssg = (
1109-
f"Arkane does not have the required energy corrections for {repr_level} "
1110-
f"(AEC: {has_aec}, BAC: {has_bac}).{year_note}"
1111-
)
1143+
if has_aec and not has_bac:
1144+
mssg = (
1145+
f"Arkane atom energy corrections (AEC) matched for {repr_level}, "
1146+
f"but bond additivity corrections (BAC) were NOT found in the RMG database. "
1147+
f"Thermo/kinetics results will use AEC but lack BAC.{year_note}"
1148+
)
1149+
elif has_bac and not has_aec:
1150+
mssg = (
1151+
f"Arkane {bac_type.upper()}BAC matched for {best_bac_key}, "
1152+
f"but atom energy corrections (AEC) were NOT found in the RMG database. "
1153+
f"Energy corrections will be disabled.{year_note}"
1154+
)
1155+
else:
1156+
mssg = (
1157+
f"Arkane does not have atom energy corrections (AEC) or bond additivity corrections (BAC) "
1158+
f"for {repr_level}.{year_note}"
1159+
)
11121160
if raise_error:
11131161
raise ValueError(mssg)
11141162
else:
11151163
logger.warning(mssg)
1164+
else:
1165+
logger.info(f'Arkane energy corrections matched for {best_aec_key} (AEC and {bac_type.upper()}BAC)')
11161166
return has_encorr
11171167

11181168

0 commit comments

Comments
 (0)