Skip to content

Commit 8ac5e82

Browse files
committed
Improved the TS IRC check
IRC Improvements (arc/checks/ts.py): - Upgraded 'check_irc_species_and_rxn' to use molecular graph isomorphism as the primary validation method. - Added 'perceive_irc_fragments' using DFS-based connected component detection to handle multi-species reactions (e.g., A + B) reliably. - Implemented permutation-based matching to verify that the set of perceived IRC fragments matches the expected reaction species. - Retained distance-matrix bond-list comparison as a robust fallback.
1 parent d51218d commit 8ac5e82

1 file changed

Lines changed: 134 additions & 10 deletions

File tree

arc/checks/ts.py

Lines changed: 134 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919
)
2020
from arc.family.family import get_reaction_family_products
2121
from arc.imports import settings
22-
from arc.species.converter import check_xyz_dict, displace_xyz, xyz_to_dmat
22+
from arc.species.converter import check_isomorphism, check_xyz_dict, displace_xyz, xyz_from_data, xyz_to_dmat
23+
from arc.species.perceive import perceive_molecule_from_xyz
2324
from arc.statmech.factory import statmech_factory
2425

2526
if TYPE_CHECKING:
2627
from arc.job.adapter import JobAdapter
2728
from arc.level import Level
29+
from arc.molecule.molecule import Molecule
2830
from arc.species.species import ARCSpecies, TSGuess
2931
from arc.reaction import ARCReaction
3032

@@ -498,28 +500,150 @@ def check_irc_species_and_rxn(xyz_1: dict,
498500
Check that the two species that result from optimizing the outputs of two IRC runs
499501
correspond to the desired reactants and products of the corresponding reaction.
500502
503+
Uses molecular graph isomorphism (including bond orders and resonance structures)
504+
when molecule perception succeeds for both endpoints. Falls back to distance-matrix-based
505+
bond-list comparison if perception fails for either endpoint or if the expected
506+
reactant/product ``Molecule`` objects are unavailable.
507+
501508
Args:
502-
xyz_1 (dict): The coordinates of IRS species 1.
503-
xyz_2 (dict): The coordinates of IRS species 2.
509+
xyz_1 (dict): The coordinates of IRC species 1.
510+
xyz_2 (dict): The coordinates of IRC species 2.
504511
rxn (ARCReaction): The corresponding reaction object instance.
505512
"""
506513
if rxn is None:
507514
return None
508515
rxn.ts_species.ts_checks['IRC'] = False
509516
xyz_1, xyz_2 = check_xyz_dict(xyz_1), check_xyz_dict(xyz_2)
517+
518+
# Primary check: molecular graph isomorphism
519+
reactants, products = rxn.get_reactants_and_products(return_copies=True)
520+
r_mols = [r.mol for r in reactants]
521+
p_mols = [p.mol for p in products]
522+
523+
if not any(m is None for m in r_mols + p_mols):
524+
charge = rxn.charge or 0
525+
frags_1 = _perceive_irc_fragments(xyz_1, charge=charge)
526+
frags_2 = _perceive_irc_fragments(xyz_2, charge=charge)
527+
if frags_1 is not None and frags_2 is not None:
528+
if (_match_fragments_to_species(frags_1, r_mols)
529+
and _match_fragments_to_species(frags_2, p_mols)) \
530+
or (_match_fragments_to_species(frags_1, p_mols)
531+
and _match_fragments_to_species(frags_2, r_mols)):
532+
rxn.ts_species.ts_checks['IRC'] = True
533+
return
534+
logger.debug('IRC isomorphism check failed, falling back to bond-list comparison.')
535+
else:
536+
logger.debug('IRC molecule perception failed for one or both endpoints, '
537+
'falling back to bond-list comparison.')
538+
539+
# Fallback: bond-list connectivity comparison
540+
try:
541+
r_bonds, p_bonds = rxn.get_bonds()
542+
except Exception:
543+
logger.debug('Could not get reaction bonds for IRC fallback check.')
544+
return
510545
dmat_1, dmat_2 = xyz_to_dmat(xyz_1), xyz_to_dmat(xyz_2)
511-
dmat_bonds_1 = get_bonds_from_dmat(dmat=dmat_1,
512-
elements=xyz_1['symbols'],
513-
)
514-
dmat_bonds_2 = get_bonds_from_dmat(dmat=dmat_2,
515-
elements=xyz_2['symbols'],
516-
)
517-
r_bonds, p_bonds = rxn.get_bonds()
546+
dmat_bonds_1 = get_bonds_from_dmat(dmat=dmat_1, elements=xyz_1['symbols'])
547+
dmat_bonds_2 = get_bonds_from_dmat(dmat=dmat_2, elements=xyz_2['symbols'])
518548
if _check_equal_bonds_list(dmat_bonds_1, r_bonds) and _check_equal_bonds_list(dmat_bonds_2, p_bonds) \
519549
or _check_equal_bonds_list(dmat_bonds_2, r_bonds) and _check_equal_bonds_list(dmat_bonds_1, p_bonds):
520550
rxn.ts_species.ts_checks['IRC'] = True
521551

522552

553+
def _perceive_irc_fragments(xyz: dict,
554+
charge: int = 0,
555+
) -> Optional[List['Molecule']]:
556+
"""
557+
Perceive individual molecular fragments from an IRC endpoint geometry.
558+
559+
Detects connected components from the distance-matrix-based bond list,
560+
then perceives each fragment as a standalone ``Molecule`` object.
561+
562+
Args:
563+
xyz (dict): The Cartesian coordinates of the IRC endpoint.
564+
charge (int): The net charge of the full system.
565+
Applied directly for single-fragment systems; each fragment is
566+
assumed neutral for multi-fragment systems.
567+
568+
Returns:
569+
Optional[List[Molecule]]: A list of perceived ``Molecule`` objects (one per fragment),
570+
or ``None`` if perception fails for any fragment.
571+
"""
572+
symbols = xyz['symbols']
573+
coords = xyz['coords']
574+
n_atoms = len(symbols)
575+
576+
dmat = xyz_to_dmat(xyz)
577+
bonds = get_bonds_from_dmat(dmat=dmat, elements=symbols)
578+
579+
# Find connected components via DFS
580+
adj = {i: set() for i in range(n_atoms)}
581+
for a, b in bonds:
582+
adj[a].add(b)
583+
adj[b].add(a)
584+
585+
visited = set()
586+
fragment_indices = []
587+
for start in range(n_atoms):
588+
if start in visited:
589+
continue
590+
component = []
591+
stack = [start]
592+
while stack:
593+
node = stack.pop()
594+
if node in visited:
595+
continue
596+
visited.add(node)
597+
component.append(node)
598+
for neighbor in adj[node]:
599+
if neighbor not in visited:
600+
stack.append(neighbor)
601+
fragment_indices.append(sorted(component))
602+
603+
# Perceive each fragment separately
604+
perceived_mols = []
605+
for frag_idx in fragment_indices:
606+
frag_symbols = tuple(symbols[i] for i in frag_idx)
607+
frag_coords = tuple(coords[i] for i in frag_idx)
608+
frag_xyz = xyz_from_data(coords=frag_coords, symbols=frag_symbols)
609+
frag_charge = charge if len(fragment_indices) == 1 else 0
610+
mol = perceive_molecule_from_xyz(frag_xyz, charge=frag_charge, n_fragments=1)
611+
if mol is None:
612+
return None
613+
perceived_mols.append(mol)
614+
615+
return perceived_mols
616+
617+
618+
def _match_fragments_to_species(fragments: List['Molecule'],
619+
expected_mols: List['Molecule'],
620+
) -> bool:
621+
"""
622+
Check whether a list of perceived molecular fragments matches a list of expected species
623+
via graph isomorphism. Handles multi-species reactions (e.g., A + B) by finding a
624+
one-to-one matching between fragments and expected species across all permutations.
625+
626+
Args:
627+
fragments (List[Molecule]): Perceived fragment molecules from an IRC endpoint.
628+
expected_mols (List[Molecule]): Expected species molecules from the reaction.
629+
630+
Returns:
631+
bool: Whether a valid one-to-one isomorphic matching exists.
632+
"""
633+
if len(fragments) != len(expected_mols):
634+
return False
635+
if len(fragments) == 0:
636+
return True
637+
n = len(fragments)
638+
if n == 1:
639+
return check_isomorphism(fragments[0], expected_mols[0])
640+
from itertools import permutations
641+
for perm in permutations(range(n)):
642+
if all(check_isomorphism(fragments[i], expected_mols[perm[i]]) for i in range(n)):
643+
return True
644+
return False
645+
646+
523647
def _check_equal_bonds_list(bonds_1: List[Tuple[int, int]],
524648
bonds_2: List[Tuple[int, int]],
525649
) -> bool:

0 commit comments

Comments
 (0)