Skip to content

Commit 22fa392

Browse files
committed
Reporting
1 parent 2bbc72a commit 22fa392

4 files changed

Lines changed: 471 additions & 1 deletion

File tree

arc/level/reporting.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,13 @@
4242
from typing import Any, Dict, List, Optional
4343

4444
import nbformat
45+
import yaml
4546
from nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook
4647

48+
from arc.constants import E_h_kJmol
4749
from arc.exceptions import InputError
4850
from arc.level.protocol import CompositeProtocol
51+
from arc.parser.parser import parse_e_elect
4952

5053

5154
# =========================================================================== #
@@ -533,3 +536,151 @@ def _references_cell(sections: List[SpeciesSection]):
533536
for ref in ordered_refs:
534537
lines.append(f"- {ref}")
535538
return _md("\n".join(lines) + "\n", _cell_id("shared", "references"))
539+
540+
541+
# =========================================================================== #
542+
# build_species_report_dict + write_species_report_yaml #
543+
# =========================================================================== #
544+
#
545+
# Per-species YAML report. Companion to the project-level provenance notebook
546+
# (which serves *independent verification* via Run-All); this writer produces
547+
# a consumable per-species summary that's readable in plain text and easy to
548+
# parse from downstream tooling. One file per stationary point, written by
549+
# the scheduler at composite finalization.
550+
551+
552+
def _composite_formula_text(protocol: CompositeProtocol) -> str:
553+
"""Plain-text version of the composite formula (no LaTeX).
554+
555+
The notebook uses LaTeX rendering; the YAML report is plain text — readers
556+
of ``cat sp_composite_report.yml`` shouldn't see ``\\delta`` macros.
557+
"""
558+
parts = [f"E_{protocol.base.label}"]
559+
for term in protocol.corrections:
560+
parts.append(term.label)
561+
return "E_final = " + " + ".join(parts)
562+
563+
564+
def build_species_report_dict(
565+
section: SpeciesSection,
566+
e_elect_kj_per_mol: float,
567+
timestamp: str,
568+
arc_version: str,
569+
arc_commit: str,
570+
) -> Dict[str, Any]:
571+
"""Assemble the per-species sp_composite report as a plain dict.
572+
573+
All energy values are computed by re-parsing the QM output files referenced
574+
in ``section.sub_job_paths`` via :func:`arc.parser.parser.parse_e_elect`
575+
(returns kJ/mol), then handed to each :class:`Term` to compute its
576+
contribution. The same evaluation path the notebook's "Run All" follows —
577+
the values land identically because the protocol is deterministic.
578+
579+
The caller-supplied ``e_elect_kj_per_mol`` is the value the scheduler
580+
recorded on the ``ARCSpecies`` (set during ``_finalize_composite``). We
581+
don't recompute the total here; we surface what ARC actually used so any
582+
downstream tooling reading this report is consistent with the run's
583+
output.yml / restart.yml.
584+
585+
Parameters
586+
----------
587+
section : SpeciesSection
588+
The reporting handoff struct populated by the scheduler at
589+
finalization. Carries protocol, recipe, sub-job paths, flags.
590+
e_elect_kj_per_mol : float
591+
The final electronic energy ARC recorded for this species (kJ/mol).
592+
timestamp : str
593+
ISO-8601 string. Caller supplies for determinism (tests pin it).
594+
arc_version, arc_commit : str
595+
Provenance identifiers.
596+
597+
Returns
598+
-------
599+
dict
600+
A plain dict ready for ``yaml.safe_dump`` or
601+
:func:`write_species_report_yaml`.
602+
"""
603+
energies_kj = {sl: parse_e_elect(p) for sl, p in section.sub_job_paths.items()}
604+
605+
base_term = section.protocol.base
606+
base_sub_label, base_level = base_term.required_levels()[0]
607+
base_block = {
608+
"sub_label": base_sub_label,
609+
"level": base_level.simple(),
610+
"energy_kj_per_mol": energies_kj[base_sub_label],
611+
"energy_hartree": energies_kj[base_sub_label] / E_h_kJmol,
612+
"path": section.sub_job_paths[base_sub_label],
613+
}
614+
615+
terms_block: List[Dict[str, Any]] = []
616+
for term in section.protocol.corrections:
617+
contribution_kj = term.evaluate(energies_kj)
618+
sub_jobs: List[Dict[str, Any]] = []
619+
for sub_label, level in term.required_levels():
620+
sub_jobs.append({
621+
"sub_label": sub_label,
622+
"level": level.simple(),
623+
"energy_kj_per_mol": energies_kj[sub_label],
624+
"energy_hartree": energies_kj[sub_label] / E_h_kJmol,
625+
"path": section.sub_job_paths[sub_label],
626+
})
627+
terms_block.append({
628+
"label": term.label,
629+
"type": type(term).__name__,
630+
"contribution_kj_per_mol": contribution_kj,
631+
"contribution_hartree": contribution_kj / E_h_kJmol,
632+
"sub_jobs": sub_jobs,
633+
})
634+
635+
return {
636+
"species": section.label,
637+
"kind": section.kind,
638+
"generated_at": timestamp,
639+
"arc_version": arc_version,
640+
"arc_commit": arc_commit,
641+
"protocol": {
642+
"preset": section.preset_name,
643+
"reference": section.reference,
644+
"formula": _composite_formula_text(section.protocol),
645+
},
646+
"units": {
647+
"energy": "kJ/mol",
648+
"energy_alt": "Hartree",
649+
},
650+
"base": base_block,
651+
"terms": terms_block,
652+
"final": {
653+
"e_elect_kj_per_mol": e_elect_kj_per_mol,
654+
"e_elect_hartree": e_elect_kj_per_mol / E_h_kJmol,
655+
"e_elect_source": "sp_composite",
656+
},
657+
"flags": list(section.flags),
658+
}
659+
660+
661+
def write_species_report_yaml(
662+
path: str,
663+
section: SpeciesSection,
664+
e_elect_kj_per_mol: float,
665+
timestamp: str,
666+
arc_version: str,
667+
arc_commit: str,
668+
) -> None:
669+
"""Build and write the per-species sp_composite YAML report.
670+
671+
Creates the parent directory if missing. Output is deterministic: keys are
672+
written in insertion order (Python 3.7+ dicts), flow style is block (the
673+
YAML default), and ``sort_keys=False`` preserves the schema's natural
674+
reading order (species → protocol → units → base → terms → final → flags).
675+
Two writes with the same inputs produce byte-identical files.
676+
"""
677+
report = build_species_report_dict(
678+
section=section,
679+
e_elect_kj_per_mol=e_elect_kj_per_mol,
680+
timestamp=timestamp,
681+
arc_version=arc_version,
682+
arc_commit=arc_commit,
683+
)
684+
os.makedirs(os.path.dirname(path), exist_ok=True)
685+
with open(path, "w") as fh:
686+
yaml.safe_dump(report, fh, sort_keys=False, default_flow_style=False)

arc/level/reporting_test.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@
3131
from arc.level.protocol import CompositeProtocol
3232
from arc.level.reporting import (
3333
SpeciesSection,
34+
build_species_report_dict,
3435
format_log_event,
3536
write_composite_notebook,
37+
write_species_report_yaml,
3638
)
3739

3840

@@ -403,6 +405,199 @@ def test_notebook_executes_and_recomputes_expected_final_value(self):
403405
self.assertIn(f"{expected_kjmol:,.3f}", all_text)
404406

405407

408+
# --------------------------------------------------------------------------- #
409+
# build_species_report_dict + write_species_report_yaml #
410+
# --------------------------------------------------------------------------- #
411+
412+
413+
import yaml # noqa: E402 (import after the other module-level imports for grouping)
414+
415+
416+
class TestSpeciesReportDict(unittest.TestCase):
417+
"""``build_species_report_dict`` produces the consumable per-species summary.
418+
419+
The notebook (``sp_composite.ipynb``) is for *independent verification* via
420+
Run-All; this YAML report is for *consumption* — readable in plain text,
421+
parseable by tooling, one file per species with every term's contribution
422+
spelled out next to the QM-output paths backing it.
423+
"""
424+
425+
def setUp(self):
426+
self.tmp = tempfile.mkdtemp()
427+
self.base_path = os.path.join(self.tmp, "base.out")
428+
self.hi_path = os.path.join(self.tmp, "delta_T__high.out")
429+
self.lo_path = os.path.join(self.tmp, "delta_T__low.out")
430+
_write_gaussian_fixture(self.base_path, -76.345678)
431+
_write_gaussian_fixture(self.hi_path, -76.346500) # lower (more negative) → contribution = high - low < 0
432+
_write_gaussian_fixture(self.lo_path, -76.345600)
433+
self.section = _make_two_term_section(
434+
paths={"base": self.base_path,
435+
"delta_T__high": self.hi_path,
436+
"delta_T__low": self.lo_path},
437+
)
438+
439+
def tearDown(self):
440+
# Tests create nested directories (e.g. Species/H2O/...) — use rmtree
441+
# to clean them up wholesale rather than enumerating fixture files.
442+
import shutil
443+
shutil.rmtree(self.tmp, ignore_errors=True)
444+
445+
def test_top_level_fields(self):
446+
d = build_species_report_dict(
447+
section=self.section,
448+
e_elect_kj_per_mol=-200000.0,
449+
timestamp="2026-04-30T13:10:32Z",
450+
arc_version="1.1.0",
451+
arc_commit="74fc4fa5",
452+
)
453+
self.assertEqual(d["species"], "H2O")
454+
self.assertEqual(d["kind"], "species")
455+
self.assertEqual(d["generated_at"], "2026-04-30T13:10:32Z")
456+
self.assertEqual(d["arc_version"], "1.1.0")
457+
self.assertEqual(d["arc_commit"], "74fc4fa5")
458+
459+
def test_protocol_block(self):
460+
d = build_species_report_dict(
461+
section=self.section,
462+
e_elect_kj_per_mol=-200000.0,
463+
timestamp="2026-04-30T13:10:32Z",
464+
arc_version="1.1.0",
465+
arc_commit="abc",
466+
)
467+
self.assertIsNone(d["protocol"]["preset"]) # explicit recipe in fixture
468+
self.assertIn("DOI", d["protocol"]["reference"])
469+
# Formula spells out the sum the protocol evaluates.
470+
self.assertIn("E_base", d["protocol"]["formula"])
471+
self.assertIn("delta_T", d["protocol"]["formula"])
472+
473+
def test_units_block(self):
474+
d = build_species_report_dict(
475+
section=self.section,
476+
e_elect_kj_per_mol=-200000.0,
477+
timestamp="t",
478+
arc_version="v",
479+
arc_commit="c",
480+
)
481+
self.assertEqual(d["units"]["energy"], "kJ/mol")
482+
self.assertEqual(d["units"]["energy_alt"], "Hartree")
483+
484+
def test_base_block(self):
485+
d = build_species_report_dict(
486+
section=self.section,
487+
e_elect_kj_per_mol=-200000.0,
488+
timestamp="t",
489+
arc_version="v",
490+
arc_commit="c",
491+
)
492+
base = d["base"]
493+
self.assertEqual(base["sub_label"], "base")
494+
self.assertEqual(base["path"], self.base_path)
495+
# Energy parsed via arc.parser; cross-check Hartree↔kJ/mol consistency.
496+
self.assertAlmostEqual(
497+
base["energy_kj_per_mol"] / E_h_kJmol,
498+
base["energy_hartree"],
499+
places=6,
500+
)
501+
502+
def test_terms_block_has_one_entry_per_correction(self):
503+
d = build_species_report_dict(
504+
section=self.section,
505+
e_elect_kj_per_mol=-200000.0,
506+
timestamp="t",
507+
arc_version="v",
508+
arc_commit="c",
509+
)
510+
self.assertEqual(len(d["terms"]), 1) # fixture has exactly one correction
511+
term = d["terms"][0]
512+
self.assertEqual(term["label"], "delta_T")
513+
self.assertEqual(term["type"], "DeltaTerm")
514+
self.assertEqual(len(term["sub_jobs"]), 2)
515+
self.assertEqual({sj["sub_label"] for sj in term["sub_jobs"]},
516+
{"delta_T__high", "delta_T__low"})
517+
# Contribution = E[high] - E[low] = -76.346500 - (-76.345600) = -0.000900 Ha
518+
# = -0.000900 × E_h_kJmol ≈ -2.363 kJ/mol
519+
self.assertAlmostEqual(term["contribution_hartree"], -0.000900, places=6)
520+
self.assertAlmostEqual(term["contribution_kj_per_mol"],
521+
-0.000900 * E_h_kJmol, places=3)
522+
523+
def test_final_block_uses_caller_supplied_e_elect(self):
524+
d = build_species_report_dict(
525+
section=self.section,
526+
e_elect_kj_per_mol=-200000.123,
527+
timestamp="t",
528+
arc_version="v",
529+
arc_commit="c",
530+
)
531+
self.assertEqual(d["final"]["e_elect_kj_per_mol"], -200000.123)
532+
self.assertAlmostEqual(d["final"]["e_elect_hartree"],
533+
-200000.123 / E_h_kJmol, places=6)
534+
self.assertEqual(d["final"]["e_elect_source"], "sp_composite")
535+
536+
def test_flags_propagated(self):
537+
section = _make_two_term_section(
538+
paths={"base": self.base_path,
539+
"delta_T__high": self.hi_path,
540+
"delta_T__low": self.lo_path},
541+
flags=["MRCC degenerate-system fallback for delta_Q__high"],
542+
)
543+
d = build_species_report_dict(
544+
section=section,
545+
e_elect_kj_per_mol=-200000.0,
546+
timestamp="t",
547+
arc_version="v",
548+
arc_commit="c",
549+
)
550+
self.assertEqual(len(d["flags"]), 1)
551+
self.assertIn("MRCC", d["flags"][0])
552+
553+
def test_yaml_round_trips(self):
554+
out = os.path.join(self.tmp, "sp_composite_report.yml")
555+
write_species_report_yaml(
556+
path=out,
557+
section=self.section,
558+
e_elect_kj_per_mol=-200000.0,
559+
timestamp="2026-04-30T13:10:32Z",
560+
arc_version="1.1.0",
561+
arc_commit="74fc4fa5",
562+
)
563+
self.assertTrue(os.path.exists(out))
564+
with open(out) as fh:
565+
loaded = yaml.safe_load(fh)
566+
self.assertEqual(loaded["species"], "H2O")
567+
self.assertEqual(loaded["kind"], "species")
568+
self.assertIn("base", loaded)
569+
self.assertEqual(len(loaded["terms"]), 1)
570+
self.assertEqual(loaded["terms"][0]["label"], "delta_T")
571+
572+
def test_yaml_writer_creates_parent_directory(self):
573+
nested = os.path.join(self.tmp, "Species", "H2O", "sp_composite_report.yml")
574+
write_species_report_yaml(
575+
path=nested,
576+
section=self.section,
577+
e_elect_kj_per_mol=-200000.0,
578+
timestamp="t",
579+
arc_version="v",
580+
arc_commit="c",
581+
)
582+
self.assertTrue(os.path.exists(nested))
583+
584+
def test_writer_is_deterministic(self):
585+
"""Two writes with the same inputs produce byte-identical files."""
586+
out_a = os.path.join(self.tmp, "a.yml")
587+
out_b = os.path.join(self.tmp, "b.yml")
588+
for out in (out_a, out_b):
589+
write_species_report_yaml(
590+
path=out,
591+
section=self.section,
592+
e_elect_kj_per_mol=-200000.0,
593+
timestamp="2026-04-30T13:10:32Z",
594+
arc_version="1.1.0",
595+
arc_commit="74fc4fa5",
596+
)
597+
with open(out_a, "rb") as fa, open(out_b, "rb") as fb:
598+
self.assertEqual(fa.read(), fb.read())
599+
600+
406601
# --------------------------------------------------------------------------- #
407602
# Import placement (module-level, per project guidelines) #
408603
# --------------------------------------------------------------------------- #

0 commit comments

Comments
 (0)