Skip to content

Commit 4ac82b4

Browse files
authored
Skip 2D-graph isomorphism enforcement for TS species (#876)
## Summary - ARC was enforcing 2D-graph isomorphism on TSs, but TS connectivity/bond-orders are not uniquely defined (partial forming/breaking bonds), so valid TS geometries were being rejected. - `ARCSpecies.mol_from_xyz` now short-circuits the isomorphism check for TSs and simply accepts the perceived molecule. - Scheduler: `is_ts` guards added at the three remaining `check_xyz_isomorphism` call sites (`check_directed_scan`, `check_directed_scan_job`, `troubleshoot_scan_job`). Matches the existing guards in `parse_composite_geo` and `parse_opt_geo`. - Secondary fix: the non-TS warning message in `mol_from_xyz` now logs `perceived_mol` in the "and:" branch (it was logging `self.mol` twice). - Docs updated in `TS_search.rst` and `advanced.rst` to describe the policy. ## Design note for reviewer I kept the policy at the **call sites** rather than pushing an early-return into `check_xyz_isomorphism` itself. Rationale: returning `True` from a method named "check..." when no check ran conflates "passed" with "skipped", and several callers propagate the value into logs/restart dicts (`self.output[label]['isomorphism'] += '... passed ...'`). The scheduler already used this call-site-guard pattern at `parse_composite_geo` (line 2434) and `parse_opt_geo` (line 2537), so this keeps the codebase consistent. One thing worth a second look: at `check_directed_scan_job`, `is_isomorphic=True` is now stored into `rotors_dict[i]['directed_scan'][...]['is_isomorphic']` for TSs. If any downstream consumer trusts that field as evidence a real check ran, a sentinel (`None` for skipped) or a rename would be better. I didn't change it because the same semantic already exists in the docstring for `mol_from_xyz`, but @alongd please weigh in. ## Test plan - [x] `arc.species.species_test.TestARCSpecies.test_ts_mol_from_xyz_skips_isomorphism_enforcement` — TS accepts perceived xyz-derived molecule even when stored 2D graph disagrees - [x] `arc.scheduler_test.TestScheduler.test_check_directed_scan_job_skips_isomorphism_for_ts` — mock-based, asserts `check_xyz_isomorphism` is NOT called and `is_isomorphic=True` is recorded - [x] `arc.scheduler_test.TestScheduler.test_troubleshoot_scan_job_skips_isomorphism_for_ts` — mock-based, asserts `check_xyz_isomorphism` is NOT called and `final_xyz` / `run_opt_job` path runs - [x] Existing `test_mol_from_xyz` and `test_check_xyz_isomorphism` still pass (non-TS behavior unchanged) - [x] Full suite run in CI cc @alongd — would value your eyes on the design note above, especially whether the call-site-guard pattern is the right call or whether you'd prefer centralizing in `check_xyz_isomorphism` itself.
2 parents f04b5c2 + b521d1e commit 4ac82b4

6 files changed

Lines changed: 244 additions & 32 deletions

File tree

arc/scheduler.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3099,9 +3099,12 @@ def check_directed_scan(self, label, pivots, scan, energies):
30993099
# a lower conformation was found
31003100
deg_increment = actions[1]
31013101
self.species_dict[label].set_dihedral(scan=scan, index=1, deg_increment=deg_increment)
3102-
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(
3103-
allow_nonisomorphic_2d=self.allow_nonisomorphic_2d,
3104-
xyz=self.species_dict[label].initial_xyz)
3102+
if self.species_dict[label].is_ts:
3103+
is_isomorphic = True
3104+
else:
3105+
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(
3106+
allow_nonisomorphic_2d=self.allow_nonisomorphic_2d,
3107+
xyz=self.species_dict[label].initial_xyz)
31053108
if is_isomorphic:
31063109
self.delete_all_species_jobs(label)
31073110
# Remove all completed rotor calculation information
@@ -3161,7 +3164,10 @@ def check_directed_scan_job(self, label: str, job: JobAdapter):
31613164
"""
31623165
if job.job_status[1]['status'] == 'done':
31633166
xyz = parser.parse_geometry(log_file_path=job.local_path_to_output_file)
3164-
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(xyz=xyz, verbose=False)
3167+
if self.species_dict[label].is_ts:
3168+
is_isomorphic = True
3169+
else:
3170+
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(xyz=xyz, verbose=False)
31653171
for rotor_dict in self.species_dict[label].rotors_dict.values():
31663172
if rotor_dict['pivots'] == job.pivots:
31673173
key = tuple(f'{dihedral:.2f}' for dihedral in job.dihedrals)
@@ -3394,9 +3400,12 @@ def troubleshoot_scan_job(self,
33943400
break
33953401
else:
33963402
# If 'change conformer' is not used, check for isomorphism.
3397-
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(
3398-
allow_nonisomorphic_2d=self.allow_nonisomorphic_2d,
3399-
xyz=new_xyz)
3403+
if self.species_dict[label].is_ts:
3404+
is_isomorphic = True
3405+
else:
3406+
is_isomorphic = self.species_dict[label].check_xyz_isomorphism(
3407+
allow_nonisomorphic_2d=self.allow_nonisomorphic_2d,
3408+
xyz=new_xyz)
34003409
if is_isomorphic:
34013410
self.species_dict[label].final_xyz = new_xyz
34023411
# Remove all completed rotor calculation information.

arc/scheduler_test.py

Lines changed: 151 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""
77

88
import unittest
9-
from unittest.mock import patch
9+
from unittest.mock import MagicMock, patch
1010
import os
1111
import shutil
1212

@@ -1033,6 +1033,156 @@ def test_run_sp_monoatomic_dlpno(self, mock_run_job):
10331033
self.assertEqual(o_level.method, 'dlpno-ccsd(t)-f12')
10341034
self.assertEqual(o_level.cabs, 'cc-pvtz-f12-cabs')
10351035

1036+
def test_check_directed_scan_job_skips_isomorphism_for_ts(self):
1037+
"""check_directed_scan_job must not call check_xyz_isomorphism for a TS; is_isomorphic is recorded as True."""
1038+
ts_xyz = str_to_xyz("""N 0.91779059 0.51946178 0.00000000
1039+
H 1.81402049 1.03819414 0.00000000
1040+
H 0.00000000 0.00000000 0.00000000
1041+
H 0.91779059 1.22790192 0.72426890""")
1042+
ts_spc = ARCSpecies(label='TS_dirscan', is_ts=True, xyz=ts_xyz, multiplicity=1, charge=0,
1043+
compute_thermo=False)
1044+
ts_spc.rotors_dict = {0: {'pivots': [1, 2], 'directed_scan': {}}}
1045+
1046+
project_directory = os.path.join(ARC_PATH, 'Projects', 'arc_project_ts_iso_dirscan')
1047+
self.addCleanup(shutil.rmtree, project_directory, ignore_errors=True)
1048+
sched = Scheduler(project='test_ts_iso_dirscan', ess_settings=self.ess_settings,
1049+
species_list=[ts_spc],
1050+
opt_level=Level(repr=default_levels_of_theory['opt']),
1051+
freq_level=Level(repr=default_levels_of_theory['freq']),
1052+
sp_level=Level(repr=default_levels_of_theory['sp']),
1053+
ts_guess_level=Level(repr=default_levels_of_theory['ts_guesses']),
1054+
project_directory=project_directory,
1055+
testing=True,
1056+
job_types=self.job_types1,
1057+
)
1058+
1059+
job_mock = MagicMock()
1060+
job_mock.job_status = [None, {'status': 'done'}]
1061+
job_mock.local_path_to_output_file = '/fake/path.log'
1062+
job_mock.pivots = [1, 2]
1063+
job_mock.dihedrals = [45.0]
1064+
job_mock.ess_trsh_methods = []
1065+
1066+
with patch('arc.species.species.ARCSpecies.check_xyz_isomorphism') as mock_iso, \
1067+
patch('arc.scheduler.parser.parse_geometry', return_value=ts_xyz), \
1068+
patch('arc.scheduler.parser.parse_e_elect', return_value=-123.45):
1069+
sched.check_directed_scan_job(label='TS_dirscan', job=job_mock)
1070+
1071+
mock_iso.assert_not_called()
1072+
recorded = sched.species_dict['TS_dirscan'].rotors_dict[0]['directed_scan'][('45.00',)]
1073+
self.assertTrue(recorded['is_isomorphic'])
1074+
1075+
@patch('arc.scheduler.Scheduler.run_opt_job')
1076+
def test_troubleshoot_scan_job_skips_isomorphism_for_ts(self, mock_run_opt):
1077+
"""troubleshoot_scan_job must not call check_xyz_isomorphism for a TS when applying 'change conformer'."""
1078+
ts_xyz = str_to_xyz("""N 0.91779059 0.51946178 0.00000000
1079+
H 1.81402049 1.03819414 0.00000000
1080+
H 0.00000000 0.00000000 0.00000000
1081+
H 0.91779059 1.22790192 0.72426890""")
1082+
new_xyz = str_to_xyz("""N 0.91000000 0.52000000 0.00000000
1083+
H 1.81000000 1.04000000 0.00000000
1084+
H 0.00000000 0.00000000 0.00000000
1085+
H 0.91000000 1.23000000 0.72000000""")
1086+
ts_spc = ARCSpecies(label='TS_trsh', is_ts=True, xyz=ts_xyz, multiplicity=1, charge=0,
1087+
compute_thermo=False)
1088+
ts_spc.rotors_dict = {0: {'pivots': [1, 2], 'scan': [3, 1, 2, 4], 'scan_path': '',
1089+
'invalidation_reason': '', 'success': None, 'symmetry': None,
1090+
'times_dihedral_set': 0, 'trsh_methods': [], 'trsh_counter': 0}}
1091+
1092+
project_directory = os.path.join(ARC_PATH, 'Projects', 'arc_project_ts_iso_trsh')
1093+
self.addCleanup(shutil.rmtree, project_directory, ignore_errors=True)
1094+
sched = Scheduler(project='test_ts_iso_trsh', ess_settings=self.ess_settings,
1095+
species_list=[ts_spc],
1096+
opt_level=Level(repr=default_levels_of_theory['opt']),
1097+
freq_level=Level(repr=default_levels_of_theory['freq']),
1098+
sp_level=Level(repr=default_levels_of_theory['sp']),
1099+
ts_guess_level=Level(repr=default_levels_of_theory['ts_guesses']),
1100+
project_directory=project_directory,
1101+
testing=True,
1102+
job_types=self.job_types1,
1103+
)
1104+
sched.trsh_ess_jobs = True
1105+
sched.trsh_rotors = True
1106+
1107+
job_mock = MagicMock()
1108+
job_mock.species_label = 'TS_trsh'
1109+
job_mock.rotor_index = 0
1110+
job_mock.torsions = [[3, 1, 2, 4]]
1111+
job_mock.job_name = 'scan_a200'
1112+
1113+
with patch('arc.species.species.ARCSpecies.check_xyz_isomorphism') as mock_iso, \
1114+
patch('arc.scheduler.Scheduler.delete_all_species_jobs'):
1115+
sched.troubleshoot_scan_job(job=job_mock, methods={'change conformer': new_xyz})
1116+
1117+
mock_iso.assert_not_called()
1118+
self.assertEqual(sched.species_dict['TS_trsh'].final_xyz, new_xyz)
1119+
mock_run_opt.assert_called_once()
1120+
1121+
@patch('arc.scheduler.Scheduler.run_job')
1122+
def test_troubleshoot_scan_job_skips_isomorphism_for_ts_non_conformer(self, mock_run_job):
1123+
"""troubleshoot_scan_job must not call check_xyz_isomorphism for a TS in the non-'change conformer' branch."""
1124+
ts_xyz = str_to_xyz("""N 0.91779059 0.51946178 0.00000000
1125+
H 1.81402049 1.03819414 0.00000000
1126+
H 0.00000000 0.00000000 0.00000000
1127+
H 0.91779059 1.22790192 0.72426890""")
1128+
ts_spc = ARCSpecies(label='TS_trsh_nc', is_ts=True, xyz=ts_xyz, multiplicity=1, charge=0,
1129+
compute_thermo=False)
1130+
ts_spc.rotors_dict = {0: {'pivots': [1, 2], 'scan': [3, 1, 2, 4], 'scan_path': '',
1131+
'invalidation_reason': '', 'success': None, 'symmetry': None,
1132+
'times_dihedral_set': 0, 'trsh_methods': [], 'trsh_counter': 0}}
1133+
1134+
project_directory = os.path.join(ARC_PATH, 'Projects', 'arc_project_ts_iso_trsh_nc')
1135+
self.addCleanup(shutil.rmtree, project_directory, ignore_errors=True)
1136+
sched = Scheduler(project='test_ts_iso_trsh_nc', ess_settings=self.ess_settings,
1137+
species_list=[ts_spc],
1138+
opt_level=Level(repr=default_levels_of_theory['opt']),
1139+
freq_level=Level(repr=default_levels_of_theory['freq']),
1140+
sp_level=Level(repr=default_levels_of_theory['sp']),
1141+
ts_guess_level=Level(repr=default_levels_of_theory['ts_guesses']),
1142+
project_directory=project_directory,
1143+
testing=True,
1144+
job_types=self.job_types1,
1145+
)
1146+
sched.trsh_ess_jobs = True
1147+
sched.trsh_rotors = True
1148+
1149+
job_mock = MagicMock()
1150+
job_mock.species_label = 'TS_trsh_nc'
1151+
job_mock.rotor_index = 0
1152+
job_mock.torsions = [[2, 0, 1, 3]] # 0-indexed; torsions_to_scans yields the 1-indexed scan [3, 1, 2, 4]
1153+
job_mock.job_name = 'scan_a201'
1154+
job_mock.scan_res = 8.0
1155+
job_mock.xyz = ts_xyz
1156+
job_mock.level = Level(repr=default_levels_of_theory['scan'])
1157+
job_mock.local_path_to_output_file = '/fake/path.log'
1158+
1159+
with patch('arc.species.species.ARCSpecies.check_xyz_isomorphism') as mock_iso, \
1160+
patch('arc.scheduler.trsh_scan_job') as mock_trsh_scan:
1161+
mock_trsh_scan.return_value = [], 4.0
1162+
1163+
sched.troubleshoot_scan_job(job=job_mock, methods={'inc_res': None})
1164+
1165+
mock_iso.assert_not_called()
1166+
mock_trsh_scan.assert_called_once_with(
1167+
label='TS_trsh_nc',
1168+
scan_res=8.0,
1169+
scan=[3, 1, 2, 4],
1170+
scan_list=[[3, 1, 2, 4]],
1171+
methods={'inc_res': None},
1172+
log_file='/fake/path.log',
1173+
)
1174+
1175+
mock_run_job.assert_called_once()
1176+
_, kwargs = mock_run_job.call_args
1177+
self.assertEqual(kwargs['label'], 'TS_trsh_nc')
1178+
self.assertEqual(kwargs['xyz'], ts_xyz)
1179+
self.assertEqual(kwargs['level_of_theory'], job_mock.level)
1180+
self.assertEqual(kwargs['job_type'], 'scan')
1181+
self.assertEqual(kwargs['torsions'], [[2, 0, 1, 3]])
1182+
self.assertEqual(kwargs['scan_trsh'], [])
1183+
self.assertEqual(kwargs['trsh'], {'scan_res': 4.0})
1184+
self.assertEqual(kwargs['rotor_index'], 0)
1185+
10361186
@classmethod
10371187
def tearDownClass(cls):
10381188
"""

arc/species/species.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,8 @@ def mol_from_xyz(self,
16211621
Important for TS searches and for identifying rotor indices.
16221622
This works by generating a molecule from xyz and using the
16231623
2D structure to confirm that the perceived molecule is correct.
1624+
For TSs, the perceived molecule is accepted without enforcing
1625+
2D-graph isomorphism, since TS connectivity is not strictly defined.
16241626
If ``xyz`` is not given, the species xyz attribute will be used.
16251627
16261628
Args:
@@ -1643,28 +1645,32 @@ def mol_from_xyz(self,
16431645
n_fragments=self.get_n_fragments(),
16441646
)
16451647
if perceived_mol is not None:
1646-
allow_nonisomorphic_2d = (self.charge is not None and self.charge) \
1647-
or self.mol.has_charge() or perceived_mol.has_charge() \
1648-
or (self.multiplicity is not None and self.multiplicity >= 3) \
1649-
or self.mol.multiplicity >= 3 or perceived_mol.multiplicity >= 3
1650-
isomorphic = self.check_xyz_isomorphism(mol=perceived_mol,
1651-
xyz=xyz,
1652-
allow_nonisomorphic_2d=allow_nonisomorphic_2d)
1653-
if not isomorphic:
1654-
logger.warning(f'XYZ and the 2D graph representation for {self.label} are not isomorphic.\nGot '
1655-
f'xyz:\n{xyz}\n\nwhich corresponds to {self.mol.copy(deep=True).to_smiles()}\n'
1656-
f'{self.mol.copy(deep=True).to_adjacency_list()}\n\nand: '
1657-
f'{self.mol.copy(deep=True).to_smiles()}\n'
1658-
f'{self.mol.copy(deep=True).to_adjacency_list()}')
1659-
raise SpeciesError(f'XYZ and the 2D graph representation for {self.label} are not compliant.')
1660-
if not self.keep_mol:
1661-
if is_mol_valid(perceived_mol, charge=self.charge, multiplicity=self.multiplicity, n_radicals=self.number_of_radicals):
1648+
if self.is_ts:
1649+
if not self.keep_mol:
16621650
self.mol = perceived_mol
1663-
else:
1664-
try:
1665-
order_atoms(ref_mol=perceived_mol, mol=self.mol)
1666-
except SanitizationError:
1651+
else:
1652+
allow_nonisomorphic_2d = (self.charge is not None and self.charge) \
1653+
or self.mol.has_charge() or perceived_mol.has_charge() \
1654+
or (self.multiplicity is not None and self.multiplicity >= 3) \
1655+
or self.mol.multiplicity >= 3 or perceived_mol.multiplicity >= 3
1656+
isomorphic = self.check_xyz_isomorphism(mol=perceived_mol,
1657+
xyz=xyz,
1658+
allow_nonisomorphic_2d=allow_nonisomorphic_2d)
1659+
if not isomorphic:
1660+
logger.warning(f'XYZ and the 2D graph representation for {self.label} are not isomorphic.\nGot '
1661+
f'xyz:\n{xyz}\n\nwhich corresponds to {self.mol.copy(deep=True).to_smiles()}\n'
1662+
f'{self.mol.copy(deep=True).to_adjacency_list()}\n\nand: '
1663+
f'{perceived_mol.copy(deep=True).to_smiles()}\n'
1664+
f'{perceived_mol.copy(deep=True).to_adjacency_list()}')
1665+
raise SpeciesError(f'XYZ and the 2D graph representation for {self.label} are not compliant.')
1666+
if not self.keep_mol:
1667+
if is_mol_valid(perceived_mol, charge=self.charge, multiplicity=self.multiplicity, n_radicals=self.number_of_radicals):
16671668
self.mol = perceived_mol
1669+
else:
1670+
try:
1671+
order_atoms(ref_mol=perceived_mol, mol=self.mol)
1672+
except SanitizationError:
1673+
self.mol = perceived_mol
16681674
else:
16691675
perceived_mol = perceive_molecule_from_xyz(xyz,
16701676
charge=self.charge,

arc/species/species_test.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,46 @@ def test_mol_from_xyz(self):
16841684
radical_count += atom.radical_electrons
16851685
self.assertEqual(radical_count, 2)
16861686

1687+
def test_ts_mol_from_xyz_skips_isomorphism_enforcement(self):
1688+
"""Test that a TS accepts the perceived xyz-derived molecule even if a stored 2D graph disagrees."""
1689+
xyz = {'symbols': ('O', 'O', 'H', 'C', 'C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'),
1690+
'isotopes': (16, 16, 1, 12, 12, 12, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
1691+
'coords': ((1.570004, 0.919237, -0.063628), (2.346384, -0.215837, 0.116826),
1692+
(2.578713, -0.467096, -0.784756), (-0.673058, -1.117816, -1.045216),
1693+
(-0.76037, -0.036132, 0.001231), (-0.850886, -0.54288, 1.418092),
1694+
(-1.694638, 1.099253, -0.333714), (-1.612555, -1.681809, -1.088997),
1695+
(-0.491563, -0.700452, -2.03735), (0.122945, -1.827698, -0.811666),
1696+
(0.427835, 0.508898, -0.034066), (-0.034676, -1.231859, 1.641118),
1697+
(-1.795224, -1.080235, 1.568607), (-0.814118, 0.27711, 2.136337),
1698+
(-2.733958, 0.749029, -0.320335), (-1.610541, 1.910407, 0.391085),
1699+
(-1.494252, 1.501954, -1.327915))}
1700+
adj = """multiplicity 2
1701+
1 O u0 p2 c0 {2,S} {17,S}
1702+
2 O u1 p2 c0 {1,S}
1703+
3 C u0 p0 c0 {4,S} {5,S} {6,S} {7,S}
1704+
4 C u0 p0 c0 {3,S} {8,S} {9,S} {10,S}
1705+
5 C u0 p0 c0 {3,S} {11,S} {12,S} {13,S}
1706+
6 C u0 p0 c0 {3,S} {14,S} {15,S} {16,S}
1707+
7 H u0 p0 c0 {3,S}
1708+
8 H u0 p0 c0 {4,S}
1709+
9 H u0 p0 c0 {4,S}
1710+
10 H u0 p0 c0 {4,S}
1711+
11 H u0 p0 c0 {5,S}
1712+
12 H u0 p0 c0 {5,S} {17,vdW}
1713+
13 H u0 p0 c0 {5,S}
1714+
14 H u0 p0 c0 {6,S}
1715+
15 H u0 p0 c0 {6,S}
1716+
16 H u0 p0 c0 {6,S}
1717+
17 H u0 p0 c0 {1,S} {12,vdW}"""
1718+
1719+
mol_from_adj = Molecule().from_adjacency_list(adj)
1720+
self.assertTrue(any(bond.is_van_der_waals() for bond in mol_from_adj.get_all_edges()),
1721+
'Expected the test adjlist to contain at least one vdW bond.')
1722+
1723+
spc = ARCSpecies(label='TS0', adjlist=adj, xyz=xyz, is_ts=True, multiplicity=2, charge=0)
1724+
self.assertFalse(any(bond.is_van_der_waals() for bond in spc.mol.get_all_edges()),
1725+
'TS ARCSpecies.mol should not retain vdW bonds from the input adjlist.')
1726+
16871727
def test_consistent_atom_order(self):
16881728
"""Test that the atom order is preserved whether starting from SMILES or from xyz"""
16891729
xyz9 = """O -1.17310019 -0.30822930 0.16269772

docs/source/TS_search.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ Outputs and validation
4747
""""""""""""""""""""""
4848
Validated TS results are reported in the project output (log files and generated artifacts),
4949
together with the supporting calculations (optimization, frequency, and IRC).
50+
ARC does not require TS geometries to be isomorphic with a stored 2D adjacency list, since a TS does not have a
51+
single strict graph representation. Instead, TS validation relies on TS-specific checks such as the imaginary
52+
frequency, normal mode displacement analysis, IRC results, and energetic consistency.
5053

5154
Reference
5255
"""""""""

0 commit comments

Comments
 (0)