@@ -106,6 +106,50 @@ def _cmp(mol, ref_mol):
106106 )
107107
108108
109+ def test_templateResidueFromMolecule_sdf_reference (tmp_path ):
110+ """A reference Molecule without matching atom names (e.g. read from an SDF
111+ file, whose atom names are only element symbols) is matched by converting it
112+ to SMILES and using MCS, giving the same result as the name-matched path."""
113+ import numpy as np
114+
115+ testdir = os .path .join (curr_dir , "test_molecule" , "test_templating" )
116+ ref_named = Molecule (os .path .join (testdir , "BEN_pH7.4.cif" ))
117+
118+ # Round-trip the reference through SDF. SDF keeps bonds, bond orders and
119+ # formal charges but assigns element-only atom names, so name matching is no
120+ # longer viable and the SMILES route is taken.
121+ sdf_path = os .path .join (tmp_path , "ref.sdf" )
122+ ref_named .write (sdf_path )
123+ ref_sdf = Molecule (sdf_path )
124+ # The SDF reference's names are not unique (element symbols repeat), so the
125+ # name path cannot apply and the SMILES route must be exercised.
126+ assert len (np .unique (ref_sdf .name )) < ref_sdf .numAtoms
127+
128+ mol = Molecule (os .path .join (testdir , "BEN.pdb" ))
129+ mol .templateResidueFromMolecule (
130+ "resname BEN" , ref_sdf , addHs = True , guessBonds = True
131+ )
132+
133+ # The SMILES / MCS path is symmetry-tolerant: benzamidine's two amidine
134+ # nitrogens are interchangeable, so which one carries the +1 and the double
135+ # bond can differ from the name-matched reference by a resonance swap. It is
136+ # the same molecule, so compare by canonical SMILES (invariant under that
137+ # automorphism) plus atom count, composition and net charge.
138+ from rdkit import Chem
139+
140+ def _canonical (m ):
141+ rd = m .toRDKitMol (
142+ sanitize = True , kekulize = False , assignStereo = False , _logger = False
143+ )
144+ return Chem .MolToSmiles (Chem .RemoveHs (rd ))
145+
146+ ben = mol .copy (sel = "resname BEN" )
147+ assert ben .numAtoms == ref_named .numAtoms
148+ assert sorted (ben .element ) == sorted (ref_named .element )
149+ assert int (ben .formalcharge .sum ()) == int (ref_named .formalcharge .sum ())
150+ assert _canonical (ben ) == _canonical (ref_named )
151+
152+
109153def test_templateResidueFromSmiles_multiresidue ():
110154 """A selection that spans multiple residues with the same resname
111155 (e.g. ``resname BEN`` when there are several BEN copies in the
@@ -199,6 +243,16 @@ def test_templateResidueFromSmiles_incorrect_smiles():
199243 assert "2" in ben .bondtype
200244
201245
246+ def test_templateResidueFromSmiles_incomplete_template_errors ():
247+ """A SMILES that does not cover every residue heavy atom must raise rather
248+ than silently leaving atoms untemplated. Benzene matches only BEN's ring,
249+ leaving the amidine C and two N atoms unmatched."""
250+ testdir = os .path .join (curr_dir , "test_molecule" , "test_templating" )
251+ mol = Molecule (os .path .join (testdir , "BEN.pdb" ))
252+ with pytest .raises (RuntimeError ):
253+ mol .templateResidueFromSmiles ("resname BEN" , "c1ccccc1" , guessBonds = True )
254+
255+
202256@pytest .mark .parametrize (
203257 "smiles" ,
204258 (
0 commit comments