Skip to content

Commit b30e977

Browse files
committed
Docs: sp_composite user guide, examples, and parseability tests
docs/source/advanced.rst grows a "Composite single-point protocols (sp_composite)" subsection covering all four YAML forms (preset / preset+override / explicit recipe with CBS / per-species override), interactions with sp_level, composite_method, adaptive_levels, and conformer_sp_level, AEC routing + BAC-skipped-with-warning policy, restart behavior, the provenance notebook + Run-All workflow, units, and limitations. References with DOIs for HEAT, Helgaker/Halkier CBS, Martin 3-pt, and Dunning basis-set families. examples/Composite/ ships a README and four runnable inputs: * heat345q_preset — preset by name * heat345q_partial_override — preset with overrides * explicit_fpa — explicit recipe incl. CBS term * per_species_override — mixed inherit/null/explicit The README flags HEAT-style post-(T) examples as illustrative and calls out explicit_fpa as the affordable demo. Tests: arc/level/examples_test.py YAML-parses every shipped example and builds every sp_composite block via CompositeProtocol.from_user_input, and asserts that all four forms appear so docs and examples stay in sync.
1 parent e98d211 commit b30e977

7 files changed

Lines changed: 436 additions & 0 deletions

File tree

arc/level/examples_test.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env python3
2+
# encoding: utf-8
3+
4+
"""
5+
Tests that every ``examples/Composite/*/input.yml`` example is valid YAML and
6+
that its ``sp_composite`` block (or per-species ``sp_composite`` entries)
7+
builds a valid :class:`CompositeProtocol` via
8+
:meth:`CompositeProtocol.from_user_input`. Keeps the docs + examples honest.
9+
"""
10+
11+
import glob
12+
import os
13+
import unittest
14+
15+
import yaml
16+
17+
from arc.common import ARC_PATH
18+
from arc.level.protocol import CompositeProtocol
19+
20+
21+
EXAMPLES_DIR = os.path.join(ARC_PATH, "examples", "Composite")
22+
23+
24+
class TestCompositeExamples(unittest.TestCase):
25+
"""Parse every shipped example and validate its sp_composite payload."""
26+
27+
def _example_files(self):
28+
pattern = os.path.join(EXAMPLES_DIR, "*", "input.yml")
29+
return sorted(glob.glob(pattern))
30+
31+
def test_examples_directory_ships_at_least_four_inputs(self):
32+
self.assertGreaterEqual(len(self._example_files()), 4)
33+
34+
def test_examples_readme_exists(self):
35+
self.assertTrue(os.path.isfile(os.path.join(EXAMPLES_DIR, "README.md")))
36+
37+
def test_every_example_is_valid_yaml(self):
38+
for path in self._example_files():
39+
with self.subTest(path=path):
40+
with open(path, "r") as fh:
41+
data = yaml.safe_load(fh)
42+
self.assertIsInstance(data, dict)
43+
self.assertIn("project", data)
44+
self.assertIn("species", data)
45+
46+
def test_every_project_level_sp_composite_builds(self):
47+
"""Project-level ``sp_composite`` (if present) is parseable."""
48+
for path in self._example_files():
49+
with open(path, "r") as fh:
50+
data = yaml.safe_load(fh)
51+
sp = data.get("sp_composite")
52+
if sp is None:
53+
continue
54+
with self.subTest(path=path):
55+
protocol = CompositeProtocol.from_user_input(sp)
56+
self.assertIsInstance(protocol, CompositeProtocol)
57+
58+
def test_every_species_sp_composite_builds_if_explicit(self):
59+
"""Per-species ``sp_composite`` (string/dict, not null) is parseable."""
60+
for path in self._example_files():
61+
with open(path, "r") as fh:
62+
data = yaml.safe_load(fh)
63+
for spc in data.get("species", []):
64+
sp = spc.get("sp_composite", "__missing__")
65+
if sp == "__missing__":
66+
continue
67+
if sp is None:
68+
continue
69+
with self.subTest(path=path, label=spc.get("label")):
70+
protocol = CompositeProtocol.from_user_input(sp)
71+
self.assertIsInstance(protocol, CompositeProtocol)
72+
73+
def test_all_four_forms_covered(self):
74+
"""Each of the four documented YAML forms must appear at least once."""
75+
form1 = form2 = form3 = form4 = False
76+
for path in self._example_files():
77+
with open(path, "r") as fh:
78+
data = yaml.safe_load(fh)
79+
sp = data.get("sp_composite")
80+
if isinstance(sp, str):
81+
form1 = True
82+
elif isinstance(sp, dict) and "preset" in sp:
83+
form2 = True
84+
elif isinstance(sp, dict) and "base" in sp:
85+
form3 = True
86+
for spc in data.get("species", []):
87+
if "sp_composite" in spc:
88+
form4 = True
89+
self.assertTrue(form1, "Form 1 (preset by name) not demonstrated.")
90+
self.assertTrue(form2, "Form 2 (preset + override) not demonstrated.")
91+
self.assertTrue(form3, "Form 3 (fully explicit recipe) not demonstrated.")
92+
self.assertTrue(form4, "Form 4 (per-species override) not demonstrated.")
93+
94+
def test_explicit_recipe_example_includes_cbs_extrapolation(self):
95+
path = os.path.join(EXAMPLES_DIR, "explicit_fpa", "input.yml")
96+
with open(path, "r") as fh:
97+
data = yaml.safe_load(fh)
98+
corrections = data["sp_composite"]["corrections"]
99+
term_types = {c["type"] for c in corrections}
100+
self.assertIn("cbs_extrapolation", term_types)
101+
102+
103+
if __name__ == "__main__":
104+
unittest.main()

docs/source/advanced.rst

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,183 @@ ARC extracts active space parameters from Molpro CCSD output files to guide subs
248248
The method returns a dictionary containing the ``'e_o'`` tuple (electrons, orbitals) alongside lists of occupied (``'occ'``) and closed-shell (``'closed'``) orbitals per irreducible representation.
249249

250250

251+
Composite single-point protocols (``sp_composite``)
252+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
253+
254+
``sp_composite`` expresses the final electronic energy of each stationary point
255+
as a sum of contributions computed at *different* levels of theory — a
256+
HEAT-style focal-point analysis. This is distinct from the legacy
257+
``composite_method`` (which means a Gaussian-style single-job composite like
258+
``CBS-QB3``); the two are mutually exclusive at the project level.
259+
260+
**When is it for you?**
261+
When a single level of theory is insufficient for the accuracy you need on a
262+
transition state. A typical motivation: ``CCSD(T)-F12/cc-pVTZ-F12`` wells agree
263+
with ATcT, but TS barriers miss experiment by several kJ/mol. Adding small
264+
post-(T) corrections (``δ[CCSDT]``, ``δ[CCSDT(Q)]``), plus core-valence and
265+
scalar-relativistic terms, closes the gap without any empirical fitting.
266+
267+
**Four YAML forms.**
268+
269+
**Form 1 — preset by name.** The quickest path::
270+
271+
project: h2o_heat345q
272+
sp_composite: HEAT-345Q
273+
species:
274+
- label: H2O
275+
smiles: O
276+
277+
ARC ships a few presets in ``arc/level/presets.yml``:
278+
279+
* ``HEAT-345`` — HEAT-style recipe inspired by Tajti et al. (see references)
280+
* ``HEAT-345Q`` — HEAT-345 plus a ``δ[CCSDT(Q)]`` correction
281+
* ``FPA-min`` — minimal focal-point recipe with a CBS extrapolation term
282+
283+
**Form 2 — preset with partial override.** Replace specific fields of named
284+
terms in the preset::
285+
286+
sp_composite:
287+
preset: HEAT-345Q
288+
overrides:
289+
delta_T:
290+
high: {method: ccsdt, basis: cc-pVTZ}
291+
292+
The override dict keys are term labels (``base``, ``delta_T``, ``delta_Q``,
293+
``delta_CV``, ``delta_rel``, ...). Unknown target labels raise ``InputError``.
294+
295+
**Form 3 — fully explicit recipe, including a CBS extrapolation term.** No
296+
preset, complete control::
297+
298+
sp_composite:
299+
reference: "My recipe; DOI: 10.1234/example"
300+
base:
301+
method: ccsd(t)-f12
302+
basis: cc-pVTZ-f12
303+
corrections:
304+
- label: delta_T
305+
type: delta
306+
high: {method: ccsdt, basis: cc-pVDZ}
307+
low: {method: ccsd(t), basis: cc-pVDZ}
308+
- label: cbs_corr
309+
type: cbs_extrapolation
310+
formula: helgaker_corr_2pt
311+
components: total # only "total" is currently supported
312+
levels:
313+
- {method: ccsd(t), basis: cc-pVTZ}
314+
- {method: ccsd(t), basis: cc-pVQZ}
315+
316+
Term types:
317+
318+
* ``single_point`` — one absolute SP (only the ``base`` is usually one).
319+
* ``delta`` — ``E[high] − E[low]`` between two levels (same basis typically).
320+
* ``cbs_extrapolation`` — CBS extrapolation from ≥2 levels with the same
321+
method but different basis cardinalities. Built-in formulas:
322+
``helgaker_hf_2pt`` (Halkier et al. 1998), ``helgaker_corr_2pt``
323+
(Helgaker et al. 1997), ``martin_3pt`` (Martin 1996). Alternatively,
324+
supply a user formula string referencing ``X``, ``Y``, ``Z`` (cardinals)
325+
and ``E_X``, ``E_Y``, ``E_Z`` (energies); it is parsed through a
326+
whitelisted AST evaluator — no ``eval()``.
327+
328+
**Form 4 — per-species override.** Three states are distinguishable::
329+
330+
project: mixed
331+
sp_composite: HEAT-345Q # applies by default to every species
332+
species:
333+
- label: H2O # inherits the project-wide protocol
334+
smiles: O
335+
- label: H2O_uncorrected
336+
smiles: O
337+
sp_composite: null # opt out — use plain sp_level
338+
- label: TS1
339+
xyz: ...
340+
sp_composite: # species-specific override
341+
base: {method: mp2, basis: cc-pVTZ}
342+
corrections: []
343+
344+
Internally each species is in one of three states: ``"inherit"`` (key absent),
345+
``"opt_out"`` (explicit ``null``), ``"explicit"`` (preset name or recipe).
346+
These three survive ``as_dict`` / ``from_dict`` and restart-dict round-trip.
347+
348+
**Interactions with other parameters.**
349+
350+
* **``sp_level``** — coexists. If you omit ``sp_level`` while setting
351+
``sp_composite``, ARC derives ``sp_level`` from ``sp_composite.base.level``
352+
so downstream code that reads ``sp_level`` (opt-out species, legacy paths)
353+
keeps working. If you supply ``sp_level`` explicitly, it is preserved.
354+
* **``composite_method`` (legacy)** — mutually exclusive with ``sp_composite``.
355+
Project fails to start with ``InputError`` if both are set.
356+
* **``adaptive_levels``** — mutually exclusive in the current release. Raises
357+
``InputError``. A future release may allow compatible combinations.
358+
* **``conformer_sp_level``** — unaffected. Conformer ranking stays at its own
359+
level; ``sp_composite`` kicks in only at the final SP stage on the
360+
optimized geometry.
361+
362+
**AEC / BAC behavior.**
363+
When ``sp_composite`` is active, ARC automatically routes Arkane's AEC lookup
364+
through ``sp_composite.base.level``. The BAC lookup is **skipped entirely**
365+
with a single warning — BAC was derived for a single LoT and is not meaningful
366+
on top of a δ-corrected composite. If you need BAC, compute it externally
367+
against the base level and add it as a literal term in the recipe.
368+
369+
Known limitation: per-species AEC is *not* implemented. When species carry
370+
mixed per-species protocols, the global AEC lookup uses the *project-level*
371+
``sp_composite.base.level``. Users who need per-species AEC should set
372+
``arkane_level_of_theory`` explicitly per project.
373+
374+
**Restart behavior.**
375+
Composite sub-jobs are tracked in the persistent output dict
376+
(``output[label]['paths']['sp_composite']: {sub_label → path}``). Restart
377+
re-runs only the sub-jobs missing from that dict. On init the scheduler
378+
*validates* every recorded path (file exists, ``parse_e_elect`` returns a
379+
number); invalidated entries are pushed back to pending with a warning. After
380+
seeding, the scheduler kick-starts any pending sub-jobs for species with prior
381+
composite progress, so a restart with no other events still makes forward
382+
progress.
383+
384+
**Provenance notebook.**
385+
Every time a composite finalizes, ARC regenerates a single project-level
386+
Jupyter notebook at ``<project>/output/sp_composite.ipynb``. It is
387+
**unexecuted on write**: it contains cell sources but no outputs. The user
388+
opens the notebook and runs "Run All" to independently verify the result —
389+
each section reconstructs its ``CompositeProtocol`` from a literal recipe
390+
dict, re-parses every sub-job QM output via ``arc.parser.parse_e_elect``, and
391+
re-evaluates the total. Citations (with DOI when supplied) carry through
392+
from ``presets.yml`` (or from the user's explicit ``reference:`` key) into the
393+
notebook's markdown.
394+
395+
**Units.**
396+
``arc.parser.parse_e_elect`` returns kJ/mol. ``CompositeProtocol.evaluate``
397+
is a pass-through sum and preserves whatever units its inputs use. ARC always
398+
stores ``species.e_elect`` in kJ/mol. Hartree is used only at display /
399+
logging boundaries (division by ``arc.constants.E_h_kJmol``) and in the
400+
Arkane species-file renderer, which converts once when writing the numeric
401+
``energy = <Hartree>`` assignment.
402+
403+
**Known limitations.**
404+
405+
* **MRCC adapter**: the composite framework is ESS-agnostic, but ARC does not
406+
yet ship a dedicated MRCC adapter. For ``CCSDT(Q)``, route through CFOUR
407+
(NCC module) or Molpro.
408+
* **Per-species AEC/BAC**: see the AEC/BAC section above.
409+
* **``adaptive_levels`` interaction**: currently rejected; may relax later.
410+
411+
**References.**
412+
413+
* Allen, East, Császár — focal-point analysis review (general FPA methodology).
414+
* Tajti, Szalay, Császár, Kállay, Gauss, Valeev, Flowers, Vázquez, Stanton,
415+
*J. Chem. Phys.* **121**, 11599 (2004). DOI: 10.1063/1.1804498 — HEAT protocol.
416+
* Helgaker, Klopper, Koch, Noga, *J. Chem. Phys.* **106**, 9639 (1997).
417+
DOI: 10.1063/1.473863 — two-point correlation CBS extrapolation.
418+
* Halkier, Helgaker, Jørgensen, Klopper, Koch, Olsen, Wilson,
419+
*Chem. Phys. Lett.* **286**, 243-252 (1998). DOI: 10.1016/S0009-2614(98)00111-0
420+
— two-point HF CBS extrapolation; fitted ``α = 1.63``.
421+
* Martin, *Chem. Phys. Lett.* **259**, 669-678 (1996). DOI: 10.1016/0009-2614(96)00898-6
422+
— three-point Schwartz-style extrapolation.
423+
* Dunning, *J. Chem. Phys.* **90**, 1007 (1989). DOI: 10.1063/1.456153 —
424+
correlation-consistent basis-set families; cardinal-number convention used
425+
by ``cardinal_from_basis``.
426+
427+
251428
Adaptive levels of theory
252429
^^^^^^^^^^^^^^^^^^^^^^^^^
253430
ARC allows users to adapt the level of theory to the size of the molecule.

examples/Composite/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# `sp_composite` examples
2+
3+
These inputs demonstrate the four YAML forms accepted by ARC's `sp_composite`
4+
feature — composite single-point protocols for refined electronic energies
5+
(HEAT-style focal-point analysis and CBS extrapolation).
6+
7+
| File | Demonstrates |
8+
|---|---|
9+
| `heat345q_preset/input.yml` | **Form 1** — preset by name (`HEAT-345Q`). Smallest possible composite input. |
10+
| `heat345q_partial_override/input.yml` | **Form 2** — preset with partial override: swap one basis set on a single term. |
11+
| `explicit_fpa/input.yml` | **Form 3** — fully explicit recipe, including a `cbs_extrapolation` term (Helgaker 2-pt correlation). |
12+
| `per_species_override/input.yml` | **Form 4** — per-species override: one species keeps the project default, one opts out via `null`, one uses a species-specific protocol. |
13+
14+
## Running
15+
16+
Activate the ARC conda environment (`environment.yml`), then from the repo root:
17+
18+
python ARC.py examples/Composite/heat345q_preset/input.yml
19+
20+
After the run finishes, a provenance notebook is generated at
21+
`<project_directory>/output/sp_composite.ipynb`. Open it in Jupyter or VS Code
22+
and select **Run All** — each section re-parses the actual QM output files via
23+
`arc.parser.parse_e_elect` and re-evaluates the `CompositeProtocol` to verify
24+
the final `e_elect` matches what ARC recorded in `output.yml`.
25+
26+
## A note on cost
27+
28+
The HEAT-style examples include `CCSDT` and `CCSDT(Q)` post-(T) corrections
29+
that require the CFOUR (NCC module) or Molpro adapters to actually execute.
30+
These are *illustrative*: the recipes are scientifically meaningful for small
31+
molecules (4–6 atoms, tight TSs) but become prohibitive quickly. The minimal
32+
`heat345q_preset` example uses `H2` and `O` as smoke-test species; adapt the
33+
level of theory (or drop expensive terms via overrides) for larger systems.
34+
35+
For small methodological demos that do not require an expensive post-(T)
36+
reference calculation, see `explicit_fpa/input.yml`, which shows the CBS
37+
extrapolation form using only CCSD(T)/cc-pV{T,Q}Z.
38+
39+
## Units
40+
41+
`species.e_elect` is stored in kJ/mol throughout. The notebook and ARC log
42+
display Hartree only at boundaries via division by `E_h_kJmol`
43+
(≈ 2625.4996 kJ/mol/Hartree). The Arkane species file (under
44+
`<project>/output/Species/<label>/arkane/species.py`) is rendered with a bare
45+
`energy = <Hartree>` assignment when `sp_composite` is active — matching
46+
Arkane's numeric-energy convention.
47+
48+
## More
49+
50+
Full documentation: `docs/source/advanced.rst`, section
51+
*Composite single-point protocols (sp_composite)*.
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Form 3 — fully explicit recipe, with a CBS extrapolation term.
2+
#
3+
# A minimal Allen-style focal-point analysis:
4+
# E_final = E[CCSD(T)-F12/cc-pVTZ-F12] (base)
5+
# + E_CBS(CCSD(T) corr) − E[CCSD(T)/cc-pVTZ] (CBS extrapolation)
6+
# + δ[CCSDT] (post-(T) correction)
7+
#
8+
# Does NOT require MRCC. CCSDT at cc-pVDZ runs in CFOUR (NCC) or Molpro.
9+
# CBS term uses Helgaker 2-point correlation-energy formula:
10+
# (X^3·E_X − Y^3·E_Y) / (X^3 − Y^3)
11+
# See Helgaker, Klopper, Koch, Noga, J. Chem. Phys. 106, 9639 (1997).
12+
#
13+
# The top-level ``reference`` key carries a user-supplied citation; it flows
14+
# into the generated provenance notebook's References block.
15+
project: composite_explicit_fpa
16+
17+
sp_composite:
18+
reference: >-
19+
Minimal focal-point analysis; cites Helgaker et al.
20+
J. Chem. Phys. 106, 9639 (1997); DOI: 10.1063/1.473863.
21+
base:
22+
method: ccsd(t)-f12
23+
basis: cc-pVTZ-f12
24+
corrections:
25+
- label: cbs_corr
26+
type: cbs_extrapolation
27+
formula: helgaker_corr_2pt
28+
# Only ``components: total`` is currently supported — adapter-level
29+
# correlation-only parsing is a future addition. Formula name documents intent.
30+
components: total
31+
levels:
32+
- {method: ccsd(t), basis: cc-pVTZ}
33+
- {method: ccsd(t), basis: cc-pVQZ}
34+
- label: delta_T
35+
type: delta
36+
high: {method: ccsdt, basis: cc-pVDZ}
37+
low: {method: ccsd(t), basis: cc-pVDZ}
38+
39+
species:
40+
- label: OH
41+
smiles: '[OH]'

0 commit comments

Comments
 (0)