Skip to content

Commit b30535b

Browse files
committed
Merge PR #2698 branch 'drawbidentate'.
Adsorbates with more than one surface site were drawn weirdly, with the second or third surface site often floating up in the air. With this change, it temporarily forms a bond between the surface sites, so the layout algorithm sees them as part of a ring, then it rotates the molecule so they are at the bottom, and then it removes the bond once the coordinates are determined. There are some special cases and exceptions for things with many surface sites. Overall, though, this should make the adsorbates look much better.
2 parents 43d5bf8 + fd5bdfa commit b30535b

2 files changed

Lines changed: 261 additions & 23 deletions

File tree

rmgpy/molecule/draw.py

Lines changed: 105 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import math
5050
import os.path
5151
import re
52+
import itertools
5253

5354
try:
5455
import cairocffi as cairo
@@ -60,7 +61,8 @@
6061
import numpy as np
6162
from rdkit.Chem import AllChem
6263

63-
from rmgpy.molecule.molecule import Atom, Molecule
64+
from rmgpy.molecule.molecule import Atom, Molecule, Bond
65+
from rmgpy.molecule.pathfinder import find_shortest_path
6466
from rmgpy.qm.molecule import Geometry
6567

6668

@@ -96,6 +98,12 @@ def create_new_surface(file_format, target=None, width=1024, height=768):
9698

9799
################################################################################
98100

101+
class AdsorbateDrawingError(Exception):
102+
"""
103+
When something goes wrong trying to draw an adsorbate.
104+
"""
105+
pass
106+
99107
class MoleculeDrawer(object):
100108
"""
101109
This class provides functionality for drawing the skeletal formula of
@@ -207,7 +215,16 @@ def draw(self, molecule, file_format, target=None):
207215
# replace the bonds after generating coordinates. This avoids
208216
# bugs with RDKit
209217
old_bond_dictionary = self._make_single_bonds()
210-
self._generate_coordinates()
218+
if molecule.contains_surface_site():
219+
try:
220+
self._connect_surface_sites()
221+
self._generate_coordinates()
222+
self._disconnect_surface_sites()
223+
except AdsorbateDrawingError as e:
224+
self._disconnect_surface_sites()
225+
self._generate_coordinates(fix_surface_sites=False)
226+
else:
227+
self._generate_coordinates()
211228
self._replace_bonds(old_bond_dictionary)
212229

213230
# Generate labels to use
@@ -323,11 +340,13 @@ def _find_ring_groups(self):
323340
if not found:
324341
self.ringSystems.append([cycle])
325342

326-
def _generate_coordinates(self):
343+
def _generate_coordinates(self, fix_surface_sites=True):
327344
"""
328345
Generate the 2D coordinates to be used when drawing the current
329346
molecule. The function uses rdKits 2D coordinate generation.
330347
Updates the self.coordinates Array in place.
348+
If `fix_surface_sites` is True, then the surface sites are placed
349+
at the bottom of the molecule.
331350
"""
332351
atoms = self.molecule.atoms
333352
natoms = len(atoms)
@@ -390,7 +409,6 @@ def _generate_coordinates(self):
390409
# If two atoms lie on top of each other, push them apart a bit
391410
# This is ugly, but at least the mess you end up with isn't as misleading
392411
# as leaving everything piled on top of each other at the origin
393-
import itertools
394412
for atom1, atom2 in itertools.combinations(backbone, 2):
395413
i1, i2 = atoms.index(atom1), atoms.index(atom2)
396414
if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
@@ -402,7 +420,6 @@ def _generate_coordinates(self):
402420
# If two atoms lie on top of each other, push them apart a bit
403421
# This is ugly, but at least the mess you end up with isn't as misleading
404422
# as leaving everything piled on top of each other at the origin
405-
import itertools
406423
for atom1, atom2 in itertools.combinations(backbone, 2):
407424
i1, i2 = atoms.index(atom1), atoms.index(atom2)
408425
if np.linalg.norm(coordinates[i1, :] - coordinates[i2, :]) < 0.5:
@@ -457,26 +474,59 @@ def _generate_coordinates(self):
457474
coordinates[:, 0] = temp[:, 1]
458475
coordinates[:, 1] = temp[:, 0]
459476

460-
# For surface species, rotate them so the site is at the bottom.
461-
if self.molecule.contains_surface_site():
477+
# For surface species
478+
if fix_surface_sites and self.molecule.contains_surface_site():
462479
if len(self.molecule.atoms) == 1:
463480
return coordinates
464-
for site in self.molecule.atoms:
465-
if site.is_surface_site():
466-
break
467-
else:
468-
raise Exception("Can't find surface site")
469-
if site.bonds:
470-
adsorbate = next(iter(site.bonds))
471-
vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adsorbate), :]
472-
angle = math.atan2(vector0[0], vector0[1]) - math.pi
481+
sites = [atom for atom in self.molecule.atoms if atom.is_surface_site()]
482+
if len(sites) == 1:
483+
# rotate them so the site is at the bottom.
484+
site = sites[0]
485+
if site.bonds:
486+
adatom = next(iter(site.bonds))
487+
vector0 = coordinates[atoms.index(site), :] - coordinates[atoms.index(adatom), :]
488+
angle = math.atan2(vector0[0], vector0[1]) - math.pi
489+
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
490+
self.coordinates = coordinates = np.dot(coordinates, rot)
491+
else:
492+
# van der Waals
493+
index = atoms.index(site)
494+
coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit
495+
coordinates[index, 0] = coordinates[:, 0].mean() # and center it
496+
elif len(sites) <= 4:
497+
# Rotate so the line of best fit through the adatoms is horizontal.
498+
# find atoms bonded to sites
499+
adatoms = [next(iter(site.bonds)) for site in sites]
500+
adatom_indices = [atoms.index(a) for a in adatoms]
501+
# find the best fit line through the bonded atoms
502+
x = coordinates[adatom_indices, 0]
503+
y = coordinates[adatom_indices, 1]
504+
A = np.vstack([x, np.ones(len(x))]).T
505+
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
506+
# rotate so the line is horizontal
507+
angle = -math.atan(m)
473508
rot = np.array([[math.cos(angle), math.sin(angle)], [-math.sin(angle), math.cos(angle)]], float)
474509
self.coordinates = coordinates = np.dot(coordinates, rot)
510+
# if the line is above the middle, flip it
511+
not_site_indices = [atoms.index(a) for a in atoms if not a.is_surface_site()]
512+
if coordinates[adatom_indices, 1].mean() > coordinates[not_site_indices, 1].mean():
513+
coordinates[:, 1] *= -1
514+
x = coordinates[adatom_indices, 0]
515+
y = coordinates[adatom_indices, 1]
516+
site_y_pos = min(min(y) - 0.8, min(coordinates[not_site_indices, 1]) - 0.5)
517+
if max(y) - site_y_pos > 1.5:
518+
raise AdsorbateDrawingError("Adsorbate bond too long")
519+
for x1, x2 in itertools.combinations(x, 2):
520+
if abs(x1 - x2) < 0.2:
521+
raise AdsorbateDrawingError("Sites overlapping")
522+
for site, x_pos in zip(sites, x):
523+
index = atoms.index(site)
524+
coordinates[index, 1] = site_y_pos
525+
coordinates[index, 0] = x_pos
526+
475527
else:
476-
# van der waals
477-
index = atoms.index(site)
478-
coordinates[index, 1] = min(coordinates[:, 1]) - 0.8 # just move the site down a bit
479-
coordinates[index, 0] = coordinates[:, 0].mean() # and center it
528+
# more than 4 surface sites? leave them alone
529+
pass
480530

481531
def _find_cyclic_backbone(self):
482532
"""
@@ -854,7 +904,7 @@ def _generate_functional_group_coordinates(self, atom0, atom1):
854904
# Check to see if atom1 is in any cycles in the molecule
855905
ring_system = None
856906
for ring_sys in self.ringSystems:
857-
if any([atom1 in ring for ring in ring_sys]):
907+
if any(atom1 in ring for ring in ring_sys):
858908
ring_system = ring_sys
859909

860910
if ring_system is not None:
@@ -1624,6 +1674,40 @@ def _replace_bonds(self, bond_order_dictionary):
16241674
for bond, order in bond_order_dictionary.items():
16251675
bond.set_order_num(order)
16261676

1677+
def _connect_surface_sites(self):
1678+
"""
1679+
Creates single bonds between atoms that are surface sites.
1680+
This is to help make multidentate adsorbates look better.
1681+
"""
1682+
sites = [a for a in self.molecule.atoms if a.is_surface_site()]
1683+
if len(sites) > 4:
1684+
return
1685+
for site1 in sites:
1686+
other_sites = [a for a in sites if a != site1]
1687+
if not other_sites: break
1688+
# connect to the nearest site
1689+
site2 = min(other_sites, key=lambda a: len(find_shortest_path(site1, a)))
1690+
if len(find_shortest_path(site1, site2)) > 2 and len(sites) > 3:
1691+
# if there are more than 3 sites, don't connect sites that aren't neighbors
1692+
continue
1693+
1694+
bond = site1.bonds.get(site2)
1695+
if bond is None:
1696+
bond = Bond(site1, site2, 1)
1697+
site1.bonds[site2] = bond
1698+
site2.bonds[site1] = bond
1699+
1700+
def _disconnect_surface_sites(self):
1701+
"""
1702+
Removes all bonds between atoms that are surface sites.
1703+
"""
1704+
for site1 in self.molecule.atoms:
1705+
if site1.is_surface_site():
1706+
for site2 in list(site1.bonds.keys()): # make a list copy so we can delete from the dict
1707+
if site2.is_surface_site():
1708+
del site1.bonds[site2]
1709+
del site2.bonds[site1]
1710+
16271711

16281712
################################################################################
16291713

test/rmgpy/molecule/drawTest.py

Lines changed: 156 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@
3333

3434
import os
3535
import os.path
36+
import itertools
3637

37-
38-
from rmgpy.molecule import Molecule
38+
from rmgpy.molecule import Molecule, Atom, Bond
3939
from rmgpy.molecule.draw import MoleculeDrawer
4040
from rmgpy.species import Species
4141

@@ -144,3 +144,157 @@ def test_draw_hydrogen_bond_adsorbate(self):
144144
from cairo import PDFSurface
145145
surface, _cr, (_xoff, _yoff, _width, _height) = self.drawer.draw(molecule, file_format="pdf")
146146
assert isinstance(surface, PDFSurface)
147+
148+
def test_draw_bidentate_adsorbate(self):
149+
try:
150+
from cairocffi import ImageSurface
151+
except ImportError:
152+
from cairo import ImageSurface
153+
154+
test_molecules = [
155+
Molecule().from_adjacency_list(
156+
"""
157+
1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
158+
2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
159+
3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
160+
4 X u0 p0 c0 {1,S}
161+
5 H u0 p0 c0 {1,S}
162+
6 H u0 p0 c0 {1,S}
163+
7 H u0 p0 c0 {2,S}
164+
8 X u0 p0 c0 {2,S}
165+
9 H u0 p0 c0 {3,S}
166+
10 H u0 p0 c0 {3,S}
167+
11 H u0 p0 c0 {3,S}
168+
"""),
169+
Molecule().from_adjacency_list(
170+
"""
171+
1 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}
172+
2 C u0 p0 c0 {1,S} {3,S} {7,S} {8,S}
173+
3 C u0 p0 c0 {2,S} {9,S} {10,S} {11,S}
174+
4 X u0 p0 c0 {1,S}
175+
5 H u0 p0 c0 {1,S}
176+
6 H u0 p0 c0 {1,S}
177+
7 H u0 p0 c0 {2,S}
178+
8 H u0 p0 c0 {2,S}
179+
9 H u0 p0 c0 {3,S}
180+
10 H u0 p0 c0 {3,S}
181+
11 X u0 p0 c0 {3,S}
182+
"""),
183+
]
184+
for molecule in [Molecule(smiles="CC(=O)CCO"), Molecule(smiles="C1CC=CC1CC"), Molecule(smiles="C=CCC(O)=O")]:
185+
bondable = [a for a in molecule.atoms if a.is_non_hydrogen() and any(b.is_hydrogen() for b in a.bonds)]
186+
for b1, b2 in itertools.combinations(bondable, 2):
187+
# find a hydrogen atom bonded to each of the two atoms
188+
for h1 in b1.bonds:
189+
if h1.is_hydrogen():
190+
break
191+
for h2 in b2.bonds:
192+
if h2.is_hydrogen():
193+
break
194+
molecule.remove_atom(h1)
195+
molecule.remove_atom(h2)
196+
x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
197+
x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
198+
molecule.add_atom(x1)
199+
molecule.add_atom(x2)
200+
molecule.add_bond(Bond(b1, x1, order=1))
201+
molecule.add_bond(Bond(b2, x2, order=1))
202+
test_molecules.append(molecule.copy(deep=True))
203+
molecule.remove_atom(x1)
204+
molecule.remove_atom(x2)
205+
molecule.add_atom(h1)
206+
molecule.add_atom(h2)
207+
molecule.add_bond(Bond(b1, h1, order=1))
208+
molecule.add_bond(Bond(b2, h2, order=1))
209+
210+
for b1, b2, b3 in itertools.combinations(bondable, 3):
211+
# find a hydrogen atom bonded to each of the two atoms
212+
for h1 in b1.bonds:
213+
if h1.is_hydrogen():
214+
break
215+
for h2 in b2.bonds:
216+
if h2.is_hydrogen():
217+
break
218+
for h3 in b3.bonds:
219+
if h3.is_hydrogen():
220+
break
221+
molecule.remove_atom(h1)
222+
molecule.remove_atom(h2)
223+
molecule.remove_atom(h3)
224+
x1 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
225+
x2 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
226+
x3 = Atom(element='X', radical_electrons=0, charge=0, label='', lone_pairs=0)
227+
molecule.add_atom(x1)
228+
molecule.add_atom(x2)
229+
molecule.add_atom(x3)
230+
molecule.add_bond(Bond(b1, x1, order=1))
231+
molecule.add_bond(Bond(b2, x2, order=1))
232+
molecule.add_bond(Bond(b3, x3, order=1))
233+
test_molecules.append(molecule.copy(deep=True))
234+
molecule.remove_atom(x1)
235+
molecule.remove_atom(x2)
236+
molecule.remove_atom(x3)
237+
molecule.add_atom(h1)
238+
molecule.add_atom(h2)
239+
molecule.add_atom(h3)
240+
molecule.add_bond(Bond(b1, h1, order=1))
241+
molecule.add_bond(Bond(b2, h2, order=1))
242+
molecule.add_bond(Bond(b3, h3, order=1))
243+
244+
test_molecules.append(Molecule().from_adjacency_list(
245+
"""
246+
1 C u0 p0 c0 {2,S} {3,S} {9,S} {10,S}
247+
2 X u0 p0 c0 {1,S}
248+
3 C u0 p0 c0 {1,S} {4,S} {5,S} {11,S}
249+
4 X u0 p0 c0 {3,S}
250+
5 C u0 p0 c0 {3,S} {6,S} {7,S} {12,S}
251+
6 X u0 p0 c0 {5,S}
252+
7 C u0 p0 c0 {5,S} {8,S} {13,S} {14,S}
253+
8 X u0 p0 c0 {7,S}
254+
9 H u0 p0 c0 {1,S}
255+
10 H u0 p0 c0 {1,S}
256+
11 H u0 p0 c0 {3,S}
257+
12 H u0 p0 c0 {5,S}
258+
13 H u0 p0 c0 {7,S}
259+
14 H u0 p0 c0 {7,S}
260+
"""))
261+
test_molecules.append(Molecule().from_adjacency_list(
262+
"""
263+
1 O u0 p2 c0 {4,S} {5,S}
264+
2 O u0 p2 c0 {5,S} {6,S}
265+
3 O u0 p2 c0 {6,S} {26,S}
266+
4 C u0 p0 c0 {1,S} {7,S} {8,S} {19,S}
267+
5 C u0 p0 c0 {1,S} {2,S} {10,S} {21,S}
268+
6 C u0 p0 c0 {2,S} {3,S} {9,S} {20,S}
269+
7 C u0 p0 c0 {4,S} {15,S} {16,S} {24,S}
270+
8 C u0 p0 c0 {4,S} {17,S} {18,S} {25,S}
271+
9 C u0 p0 c0 {6,S} {11,S} {12,S} {22,S}
272+
10 C u0 p0 c0 {5,S} {13,S} {14,S} {23,S}
273+
11 H u0 p0 c0 {9,S}
274+
12 H u0 p0 c0 {9,S}
275+
13 H u0 p0 c0 {10,S}
276+
14 H u0 p0 c0 {10,S}
277+
15 H u0 p0 c0 {7,S}
278+
16 H u0 p0 c0 {7,S}
279+
17 H u0 p0 c0 {8,S}
280+
18 H u0 p0 c0 {8,S}
281+
19 X u0 p0 c0 {4,S}
282+
20 X u0 p0 c0 {6,S}
283+
21 X u0 p0 c0 {5,S}
284+
22 X u0 p0 c0 {9,S}
285+
23 X u0 p0 c0 {10,S}
286+
24 X u0 p0 c0 {7,S}
287+
25 X u0 p0 c0 {8,S}
288+
26 X u0 p0 c0 {3,S}
289+
"""))
290+
test_molecules.append(Molecule(smiles="*CC(*)(C*)OCC#*"))
291+
test_molecules.append(Molecule(smiles="*CC(*)(C*)C*"))
292+
for number, molecule in enumerate(test_molecules, 1):
293+
path = f"test_polydentate_{number}.png"
294+
if os.path.exists(path):
295+
os.unlink(path)
296+
self.drawer.clear()
297+
surface, _cr, (_xoff, _yoff, width, height) = self.drawer.draw(molecule, file_format="png", target=path)
298+
assert os.path.exists(path), "File doesn't exist"
299+
os.unlink(path)
300+
assert isinstance(surface, ImageSurface)

0 commit comments

Comments
 (0)