Skip to content

Commit fa8eba2

Browse files
committed
Tests: Added additional family recognition tests
for InChI multiplicity check, duplicate labels, and family disambiguation
1 parent eb78576 commit fa8eba2

1 file changed

Lines changed: 143 additions & 0 deletions

File tree

arc/family/family_test.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
check_product_isomorphism,
1818
descent_complex_group,
1919
determine_possible_reaction_products_from_family,
20+
filter_products_by_reaction,
2021
get_reaction_family_products,
2122
get_all_families,
2223
get_entries,
@@ -1338,6 +1339,148 @@ def test_label_map_no_duplicate_for_normal_families(self):
13381339
for label in pd['p_label_map']:
13391340
self.assertNotIn('_2', label, f'Unexpected suffix in p_label_map key: {label}')
13401341

1342+
def test_check_product_isomorphism_inchi_fallback_rejects_different_multiplicity(self):
1343+
"""Test that the InChI fallback in check_product_isomorphism rejects
1344+
molecules with same InChI but different multiplicity.
1345+
[CH2][CH2] (triplet biradical) vs C=C (singlet ethylene) have
1346+
identical InChIs but are structurally different species."""
1347+
biradical = Molecule(smiles='[CH2][CH2]')
1348+
ethylene = Molecule(smiles='C=C')
1349+
ethylene_species = ARCSpecies(label='C2H4', smiles='C=C')
1350+
# Biradical should NOT match ethylene
1351+
self.assertFalse(check_product_isomorphism([biradical], [ethylene_species]))
1352+
# Ethylene should match itself
1353+
self.assertTrue(check_product_isomorphism([ethylene], [ethylene_species]))
1354+
1355+
def test_check_product_isomorphism_inchi_fallback_accepts_same_multiplicity(self):
1356+
"""Test that the InChI fallback correctly matches molecules with
1357+
same InChI AND same multiplicity but different Lewis structures.
1358+
O=C=C(O)C=O and O=C[C-](O)C#[O+] are different Lewis structures
1359+
of the same molecule."""
1360+
mol_a = Molecule(smiles='O=C=C(O)C=O')
1361+
spc_b = ARCSpecies(label='test', smiles='O=C=C(O)C=O')
1362+
self.assertTrue(check_product_isomorphism([mol_a], [spc_b]))
1363+
1364+
def test_check_product_isomorphism_length_mismatch(self):
1365+
"""Test that check_product_isomorphism returns False for length mismatch."""
1366+
mol = Molecule(smiles='C')
1367+
spc = ARCSpecies(label='CH4', smiles='C')
1368+
self.assertFalse(check_product_isomorphism([mol, mol], [spc]))
1369+
self.assertFalse(check_product_isomorphism([], [spc]))
1370+
1371+
def test_disproportionation_not_matched_for_triplet_products(self):
1372+
"""Test that Disproportionation is NOT matched when the template generates
1373+
a singlet product but the actual species is triplet.
1374+
C2H5 + OH → C2H4 + H2O: Disproportionation template generates O=O (singlet)
1375+
but actual O2 is [O][O] (triplet). Only H_Abstraction should NOT match either
1376+
since [CH2][CH2] (biradical) != C=C (ethylene)."""
1377+
rxn = ARCReaction(r_species=[ARCSpecies(label='C2H5', smiles='C[CH2]'),
1378+
ARCSpecies(label='OH', smiles='[OH]')],
1379+
p_species=[ARCSpecies(label='C2H4', smiles='C=C'),
1380+
ARCSpecies(label='H2O', smiles='O')])
1381+
self.assertEqual(rxn.family, 'Disproportionation')
1382+
self.assertFalse(rxn.family_own_reverse)
1383+
1384+
def test_h_abstraction_not_confused_with_disproportionation(self):
1385+
"""Test that a true H_Abstraction (CH4 + OH → CH3 + H2O) is correctly
1386+
identified and NOT classified as Disproportionation."""
1387+
rxn = ARCReaction(r_species=[ARCSpecies(label='CH4', smiles='C'),
1388+
ARCSpecies(label='OH', smiles='[OH]')],
1389+
p_species=[ARCSpecies(label='CH3', smiles='[CH3]'),
1390+
ARCSpecies(label='H2O', smiles='O')])
1391+
self.assertEqual(rxn.family, 'H_Abstraction')
1392+
self.assertTrue(rxn.family_own_reverse)
1393+
1394+
def test_apply_recipe_form_bond_duplicate_labels(self):
1395+
"""Test that apply_recipe handles FORM_BOND with duplicate labels (*,*).
1396+
R_Recombination uses ['FORM_BOND', '*', 1, '*'] where both atoms
1397+
share the same label."""
1398+
fam = ReactionFamily(label='R_Recombination')
1399+
ch3 = ARCSpecies(label='CH3', smiles='[CH3]')
1400+
r_species = [ch3, ARCSpecies(label='CH3_2', smiles='[CH3]')]
1401+
products = fam.generate_products(reactants=r_species)
1402+
self.assertGreater(len(products), 0)
1403+
for group_labels, product_lists in products.items():
1404+
for product_list in product_lists:
1405+
template_mols = product_list[0]
1406+
# Should produce ethane (CC), not crash
1407+
self.assertEqual(len(template_mols), 1)
1408+
self.assertTrue(template_mols[0].is_isomorphic(Molecule(smiles='CC')))
1409+
1410+
def test_apply_recipe_lose_radical_all_labeled_atoms(self):
1411+
"""Test that LOSE_RADICAL is applied to ALL atoms with the same label,
1412+
not just the first one. R_Recombination's recipe ['LOSE_RADICAL', '*', '1']
1413+
should remove radicals from both * atoms."""
1414+
fam = ReactionFamily(label='R_Recombination')
1415+
h1 = ARCSpecies(label='H1', smiles='[H]')
1416+
h2 = ARCSpecies(label='H2', smiles='[H]')
1417+
products = fam.generate_products(reactants=[h1, h2])
1418+
self.assertGreater(len(products), 0)
1419+
for group_labels, product_lists in products.items():
1420+
for product_list in product_lists:
1421+
template_mols = product_list[0]
1422+
# H + H → H2, no radicals remaining
1423+
self.assertEqual(template_mols[0].get_radical_count(), 0)
1424+
1425+
def test_filter_products_by_reaction(self):
1426+
"""Test filter_products_by_reaction with matching and non-matching products."""
1427+
rxn = ARCReaction(r_species=[ARCSpecies(label='CH4', smiles='C'),
1428+
ARCSpecies(label='OH', smiles='[OH]')],
1429+
p_species=[ARCSpecies(label='CH3', smiles='[CH3]'),
1430+
ARCSpecies(label='H2O', smiles='O')])
1431+
# Get unfiltered products
1432+
all_prods = determine_possible_reaction_products_from_family(rxn, 'H_Abstraction')
1433+
self.assertGreater(len(all_prods), 0)
1434+
# Filter should keep only those matching rxn products
1435+
filtered = filter_products_by_reaction(rxn=rxn, product_dicts=all_prods)
1436+
self.assertGreater(len(filtered), 0)
1437+
for pd in filtered:
1438+
self.assertEqual(pd['family'], 'H_Abstraction')
1439+
1440+
def test_filter_products_rejects_wrong_product_count(self):
1441+
"""Test that filter_products_by_reaction rejects templates with wrong product count."""
1442+
rxn = ARCReaction(r_species=[ARCSpecies(label='CH4', smiles='C'),
1443+
ARCSpecies(label='OH', smiles='[OH]')],
1444+
p_species=[ARCSpecies(label='CH3', smiles='[CH3]'),
1445+
ARCSpecies(label='H2O', smiles='O')])
1446+
# Fabricate a product_dict with 1 product (should need 2)
1447+
fake = [{'family': 'H_Abstraction', 'group_labels': ('X_H', 'Y_rad'),
1448+
'products': [Molecule(smiles='C')],
1449+
'r_label_map': {'*1': 0}, 'p_label_map': {'*1': 0},
1450+
'own_reverse': True, 'discovered_in_reverse': False}]
1451+
filtered = filter_products_by_reaction(rxn=rxn, product_dicts=fake)
1452+
self.assertEqual(len(filtered), 0)
1453+
1454+
def test_get_reaction_family_products_skips_unsupported_groups(self):
1455+
"""Test that get_reaction_family_products gracefully skips families
1456+
with unsupported atom types rather than crashing."""
1457+
rxn = ARCReaction(r_species=[ARCSpecies(label='CH4', smiles='C'),
1458+
ARCSpecies(label='OH', smiles='[OH]')],
1459+
p_species=[ARCSpecies(label='CH3', smiles='[CH3]'),
1460+
ARCSpecies(label='H2O', smiles='O')])
1461+
# Using 'all' families includes some with unsupported atom types (e.g., Na)
1462+
products = get_reaction_family_products(rxn, rmg_family_set='all')
1463+
self.assertIsInstance(products, list)
1464+
# Should still find H_Abstraction despite some families failing
1465+
families = set(p['family'] for p in products)
1466+
self.assertIn('H_Abstraction', families)
1467+
1468+
def test_determine_family_for_various_reactions(self):
1469+
"""Test family identification for a range of reaction types."""
1470+
test_cases = [
1471+
# (r_smiles_list, p_smiles_list, expected_family)
1472+
(['C', '[OH]'], ['[CH3]', 'O'], 'H_Abstraction'),
1473+
(['C[CH2]', '[OH]'], ['C=C', 'O'], 'Disproportionation'),
1474+
(['[CH2]CC[CH2]'], ['C1CCC1'], 'Birad_recombination'),
1475+
]
1476+
for r_smiles, p_smiles, expected in test_cases:
1477+
r_species = [ARCSpecies(label=f'R{i}', smiles=s) for i, s in enumerate(r_smiles)]
1478+
p_species = [ARCSpecies(label=f'P{i}', smiles=s) for i, s in enumerate(p_smiles)]
1479+
rxn = ARCReaction(r_species=r_species, p_species=p_species)
1480+
self.assertEqual(rxn.family, expected,
1481+
f'Expected {expected} for {" + ".join(r_smiles)} => {" + ".join(p_smiles)}, '
1482+
f'got {rxn.family}')
1483+
13411484
def test_check_family_name(self):
13421485
"""Test check family name function"""
13431486
self.assertTrue(check_family_name('H_Abstraction'))

0 commit comments

Comments
 (0)