Skip to content

Commit 6c433bd

Browse files
alongdclaude
andcommitted
scheduler: skip composite sub-job spawn when geometry is missing
get_xyz(generate=False) can return None on a corrupted mid-opt restart; warn and skip instead of submitting sp sub-jobs with no geometry. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent 72f39e9 commit 6c433bd

2 files changed

Lines changed: 79 additions & 1 deletion

File tree

arc/scheduler.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1638,10 +1638,21 @@ def _spawn_composite_pending(self, label: str) -> None:
16381638
Pending sub_labels that share a Level with an existing pending sub_label
16391639
are de-duplicated here: only one sp job runs, and its eventual output
16401640
path maps to every matching sub_label at completion time.
1641+
1642+
Species whose geometry cannot be retrieved (``get_xyz(generate=False)``
1643+
returns ``None``, e.g. on a corrupted mid-opt restart) are skipped with
1644+
a warning rather than submitting sp jobs with no geometry.
16411645
"""
16421646
pending = self._sp_composite_pending.get(label, {})
16431647
if not pending:
16441648
return
1649+
xyz = self.species_dict[label].get_xyz(generate=False)
1650+
if xyz is None:
1651+
logger.warning(format_log_event(
1652+
label, "no geometry available — skipping sub-job spawn",
1653+
{"pending_sub_labels": sorted(pending.keys())},
1654+
))
1655+
return
16451656
unique_levels: list[tuple[Level, list[str]]] = []
16461657
for sub_label, lvl in pending.items():
16471658
for existing_lvl, sub_labels in unique_levels:
@@ -1660,7 +1671,7 @@ def _spawn_composite_pending(self, label: str) -> None:
16601671
"sub-job queued",
16611672
{"sub_labels": sub_labels, "level": lvl.simple()}))
16621673
self.run_job(label=label,
1663-
xyz=self.species_dict[label].get_xyz(generate=False),
1674+
xyz=xyz,
16641675
level_of_theory=lvl,
16651676
job_type='sp')
16661677

arc/scheduler_test.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,73 @@ def test_kick_start_skips_species_with_no_prior_progress(self):
16831683
# No prior progress → kick-start must not spawn anything.
16841684
self.assertEqual(spawned, [])
16851685

1686+
def test_spawn_composite_pending_skips_species_without_geometry(self):
1687+
"""A species whose get_xyz(generate=False) returns None must not spawn sub-jobs."""
1688+
recipe = {"base": {"method": "hf", "basis": "cc-pVTZ"}, "corrections": []}
1689+
protocol = CompositeProtocol.from_user_input(recipe)
1690+
spc = ARCSpecies(label='H2', smiles='[H][H]')
1691+
sched = self._make_scheduler([spc], sp_composite=protocol)
1692+
self.assertIsNone(spc.get_xyz(generate=False))
1693+
self.assertTrue(sched._sp_composite_pending['H2'])
1694+
calls = []
1695+
sched.run_job = lambda *args, **kwargs: calls.append(kwargs)
1696+
with self.assertLogs(logger='arc', level=logging.WARNING) as cm:
1697+
sched._spawn_composite_pending('H2')
1698+
self.assertEqual(calls, [])
1699+
joined = '\n'.join(cm.output)
1700+
self.assertIn('H2', joined)
1701+
self.assertIn('no geometry', joined)
1702+
1703+
def test_restart_kick_start_skips_species_without_geometry(self):
1704+
"""Restart kick-start must not submit sp sub-jobs for a species whose
1705+
geometry cannot be retrieved (e.g., a corrupted mid-opt restart)."""
1706+
tmp = os.path.join(self.project_directory, "fx_kickstart_noxyz")
1707+
os.makedirs(tmp, exist_ok=True)
1708+
recipe = {
1709+
"base": {"method": "hf", "basis": "cc-pVTZ"},
1710+
"corrections": [
1711+
{"label": "delta_T", "type": "delta",
1712+
"high": {"method": "ccsdt", "basis": "cc-pVDZ"},
1713+
"low": {"method": "ccsd(t)", "basis": "cc-pVDZ"}},
1714+
],
1715+
}
1716+
protocol = CompositeProtocol.from_user_input(recipe)
1717+
spc = ARCSpecies(label='H2', smiles='[H][H]')
1718+
spc.final_xyz = {'symbols': ('H', 'H'), 'coords': ((0, 0, 0), (0, 0, 0.74)),
1719+
'isotopes': (1, 1)}
1720+
sched1 = self._make_scheduler([spc], sp_composite=protocol)
1721+
paths = self._seed_protocol_fixtures(tmp, protocol,
1722+
{"base": -1.10, "delta_T__high": -1.15,
1723+
"delta_T__low": -1.12})
1724+
sched1.post_sp_actions('H2', paths["base"], protocol.base.level)
1725+
output_snapshot = sched1.output
1726+
# Fresh scheduler from the snapshot, but the species carries no 3D info.
1727+
spc2 = ARCSpecies(label='H2', smiles='[H][H]')
1728+
self.assertIsNone(spc2.get_xyz(generate=False))
1729+
spawned = []
1730+
with patch.object(Scheduler, "run_job", lambda self, *a, **kw: spawned.append(kw)), \
1731+
self.assertLogs(logger='arc', level=logging.WARNING) as cm:
1732+
sched2 = Scheduler(
1733+
project='sp_composite_orch',
1734+
ess_settings=self.ess_settings,
1735+
species_list=[spc2],
1736+
project_directory=self.project_directory,
1737+
opt_level=Level(repr=default_levels_of_theory['opt']),
1738+
freq_level=Level(repr=default_levels_of_theory['freq']),
1739+
sp_level=Level(repr=default_levels_of_theory['sp']),
1740+
conformer_opt_level=Level(repr=default_levels_of_theory['conformer']),
1741+
scan_level=Level(repr=default_levels_of_theory['scan']),
1742+
ts_guess_level=Level(repr=default_levels_of_theory['ts_guesses']),
1743+
orbitals_level=default_levels_of_theory['orbitals'],
1744+
sp_composite=protocol,
1745+
output=output_snapshot,
1746+
testing=True,
1747+
)
1748+
self.assertEqual(spawned, [])
1749+
self.assertEqual(set(sched2._sp_composite_pending['H2'].keys()),
1750+
{"delta_T__high", "delta_T__low"})
1751+
self.assertIn('no geometry', '\n'.join(cm.output))
1752+
16861753
# --- Phase 3.5: corruption recovery ------------------------------------ #
16871754

16881755
def test_corrupted_recorded_output_is_invalidated_and_requeued(self):

0 commit comments

Comments
 (0)