From 1f6186367efbc86571d01f9b69327f58e2ec9a57 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 9 Oct 2025 11:23:38 -0500 Subject: [PATCH 1/7] Reproduce issue #699 --- .../_tests/test_utils/test_packmol.py | 68 ++++++++++--------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/openff/evaluator/_tests/test_utils/test_packmol.py b/openff/evaluator/_tests/test_utils/test_packmol.py index 0c4b84503..d19912505 100644 --- a/openff/evaluator/_tests/test_utils/test_packmol.py +++ b/openff/evaluator/_tests/test_utils/test_packmol.py @@ -3,7 +3,9 @@ """ import numpy as np +import openmm.app import pytest +from openff.toolkit import Molecule, Topology from openff.units import unit from openff.evaluator.utils import packmol @@ -11,7 +13,6 @@ def test_packmol_box_size(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -32,7 +33,6 @@ def test_packmol_box_size(): def test_packmol_bad_input(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -41,7 +41,6 @@ def test_packmol_bad_input(): def test_packmol_failed(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -50,7 +49,6 @@ def test_packmol_failed(): def test_packmol_water(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -71,7 +69,6 @@ def test_packmol_water(): def test_packmol_ions(): - from openff.toolkit.topology import Molecule molecules = [ Molecule.from_smiles("[Na+]"), @@ -100,7 +97,6 @@ def test_packmol_ions(): def test_packmol_paracetamol(): - from openff.toolkit.topology import Molecule # Test something a bit more tricky than water molecules = [Molecule.from_smiles("CC(=O)NC1=CC=C(C=C1)O")] @@ -117,35 +113,34 @@ def test_packmol_paracetamol(): assert trajectory.topology.n_bonds == 20 -def test_amino_acids(): - amino_residues = { - "C[C@H](N)C(=O)O": "ALA", - # Undefined stereochemistry error. - # "N=C(N)NCCC[C@H](N)C(=O)O": "ARG", - "NC(=O)C[C@H](N)C(=O)O": "ASN", - "N[C@@H](CC(=O)O)C(=O)O": "ASP", - "N[C@@H](CS)C(=O)O": "CYS", - "N[C@@H](CCC(=O)O)C(=O)O": "GLU", - "NC(=O)CC[C@H](N)C(=O)O": "GLN", - "NCC(=O)O": "GLY", - "N[C@@H](Cc1c[nH]cn1)C(=O)O": "HIS", - "CC[C@H](C)[C@H](N)C(=O)O": "ILE", - "CC(C)C[C@H](N)C(=O)O": "LEU", - "NCCCC[C@H](N)C(=O)O": "LYS", - "CSCC[C@H](N)C(=O)O": "MET", - "N[C@@H](Cc1ccccc1)C(=O)O": "PHE", - "O=C(O)[C@@H]1CCCN1": "PRO", - "N[C@@H](CO)C(=O)O": "SER", - "C[C@@H](O)[C@H](N)C(=O)O": "THR", - "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O": "TRP", - "N[C@@H](Cc1ccc(O)cc1)C(=O)O": "TYR", - "CC(C)[C@H](N)C(=O)O": "VAL", - } - +amino_residues = { + "C[C@H](N)C(=O)O": "ALA", + # Undefined stereochemistry error. + # "N=C(N)NCCC[C@H](N)C(=O)O": "ARG", + "NC(=O)C[C@H](N)C(=O)O": "ASN", + "N[C@@H](CC(=O)O)C(=O)O": "ASP", + "N[C@@H](CS)C(=O)O": "CYS", + "N[C@@H](CCC(=O)O)C(=O)O": "GLU", + "NC(=O)CC[C@H](N)C(=O)O": "GLN", + "NCC(=O)O": "GLY", + "N[C@@H](Cc1c[nH]cn1)C(=O)O": "HIS", + "CC[C@H](C)[C@H](N)C(=O)O": "ILE", + "CC(C)C[C@H](N)C(=O)O": "LEU", + "NCCCC[C@H](N)C(=O)O": "LYS", + "CSCC[C@H](N)C(=O)O": "MET", + "N[C@@H](Cc1ccccc1)C(=O)O": "PHE", + "O=C(O)[C@@H]1CCCN1": "PRO", + "N[C@@H](CO)C(=O)O": "SER", + "C[C@@H](O)[C@H](N)C(=O)O": "THR", + "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O": "TRP", + "N[C@@H](Cc1ccc(O)cc1)C(=O)O": "TYR", + "CC(C)[C@H](N)C(=O)O": "VAL", +} + + +def test_amino_acid_residue_information(): smiles = [*amino_residues] - from openff.toolkit.topology import Molecule - molecules = [Molecule.from_smiles(x) for x in smiles] counts = [1] * len(smiles) @@ -160,3 +155,10 @@ def test_amino_acids(): for index, smiles in enumerate(smiles): assert trajectory.top.residue(index).name == amino_residues[smiles] + + trajectory.save_pdb("test.pdb") + + # Check that the trajectory can later be read into OpenFF (via OpenMM) + Topology.from_openmm( + openmm.app.PDBFile("test.pdb").topology, unique_molecules=molecules + ) From 55dd557135c481b91d31f6260a0a20392ebe2dba Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 9 Oct 2025 11:23:49 -0500 Subject: [PATCH 2/7] Turn of AWS runner --- .github/workflows/aws.yaml | 82 -------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 .github/workflows/aws.yaml diff --git a/.github/workflows/aws.yaml b/.github/workflows/aws.yaml deleted file mode 100644 index 04c34cbb5..000000000 --- a/.github/workflows/aws.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: Test Self-Hosted Runner -on: - workflow_dispatch: - pull_request: - branches: - - "main" - -defaults: - run: - shell: bash -l {0} - -jobs: - start-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - outputs: - mapping: ${{ steps.aws-start.outputs.mapping }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Create cloud runner - id: aws-start - uses: omsf/start-aws-gha-runner@v1.1.0 - with: - aws_image_id: ami-0d5079d9be06933e5 - aws_instance_type: g4dn.xlarge - aws_home_dir: /home/ubuntu - env: - GH_PAT: ${{ secrets.GH_PAT }} - self-hosted-test: - runs-on: self-hosted - needs: - - start-aws-runner - steps: - - uses: actions/checkout@v5 - - - name: Print disk usage - run: "df -h" - - - name: Print Docker details - run: "docker version || true" - - - name: Setup Conda Environment - uses: mamba-org/setup-micromamba@v2 - with: - environment-file: devtools/conda-envs/test_env.yaml - - - name: Install Package and test plugins - run: python -m pip install . utilities/test_plugins/ - - - name: Double-check local installation - run: python -c "from openff.evaluator import __version__; print(__version__)" - - name: Run integration tests - run: | - cd integration-tests/default-workflows/ - python run.py - stop-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - needs: - - start-aws-runner - - self-hosted-test - if: ${{ always() }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Stop instances - uses: omsf/stop-aws-gha-runner@v1.0.0 - with: - instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} - env: - GH_PAT: ${{ secrets.GH_PAT }} From 1c27d995dad47cef9ddbba9e7fc2939f570b517d Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 9 Oct 2025 11:23:49 -0500 Subject: [PATCH 3/7] Turn off AWS runner --- .github/workflows/aws.yaml | 82 -------------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 .github/workflows/aws.yaml diff --git a/.github/workflows/aws.yaml b/.github/workflows/aws.yaml deleted file mode 100644 index 04c34cbb5..000000000 --- a/.github/workflows/aws.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: Test Self-Hosted Runner -on: - workflow_dispatch: - pull_request: - branches: - - "main" - -defaults: - run: - shell: bash -l {0} - -jobs: - start-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - outputs: - mapping: ${{ steps.aws-start.outputs.mapping }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Create cloud runner - id: aws-start - uses: omsf/start-aws-gha-runner@v1.1.0 - with: - aws_image_id: ami-0d5079d9be06933e5 - aws_instance_type: g4dn.xlarge - aws_home_dir: /home/ubuntu - env: - GH_PAT: ${{ secrets.GH_PAT }} - self-hosted-test: - runs-on: self-hosted - needs: - - start-aws-runner - steps: - - uses: actions/checkout@v5 - - - name: Print disk usage - run: "df -h" - - - name: Print Docker details - run: "docker version || true" - - - name: Setup Conda Environment - uses: mamba-org/setup-micromamba@v2 - with: - environment-file: devtools/conda-envs/test_env.yaml - - - name: Install Package and test plugins - run: python -m pip install . utilities/test_plugins/ - - - name: Double-check local installation - run: python -c "from openff.evaluator import __version__; print(__version__)" - - name: Run integration tests - run: | - cd integration-tests/default-workflows/ - python run.py - stop-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - needs: - - start-aws-runner - - self-hosted-test - if: ${{ always() }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Stop instances - uses: omsf/stop-aws-gha-runner@v1.0.0 - with: - instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} - env: - GH_PAT: ${{ secrets.GH_PAT }} From e2d7e6caf240efdbffa30241aacf090ae5b14aaf Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 13 Oct 2025 11:06:29 -0500 Subject: [PATCH 4/7] Refactor to internally use JSON representation of topologies --- .../_tests/test_utils/test_packmol.py | 26 +++++------ openff/evaluator/forcefield/system.py | 15 +------ openff/evaluator/protocols/coordinates.py | 30 ++++++------- openff/evaluator/protocols/forcefield.py | 25 ++++------- openff/evaluator/utils/packmol.py | 43 +++++++++++++++---- 5 files changed, 70 insertions(+), 69 deletions(-) diff --git a/openff/evaluator/_tests/test_utils/test_packmol.py b/openff/evaluator/_tests/test_utils/test_packmol.py index d19912505..01929e11d 100644 --- a/openff/evaluator/_tests/test_utils/test_packmol.py +++ b/openff/evaluator/_tests/test_utils/test_packmol.py @@ -3,9 +3,8 @@ """ import numpy as np -import openmm.app import pytest -from openff.toolkit import Molecule, Topology +from openff.toolkit import ForceField, Molecule from openff.units import unit from openff.evaluator.utils import packmol @@ -144,21 +143,16 @@ def test_amino_acid_residue_information(): molecules = [Molecule.from_smiles(x) for x in smiles] counts = [1] * len(smiles) - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, counts, box_size=([1000] * 3) * unit.angstrom ) - assert trajectory is not None - - assert trajectory.n_chains == len(smiles) - assert trajectory.n_residues == len(smiles) + for residue, smiles in zip( + topology.residues, + smiles, + ): + assert residue.residue_name == amino_residues[smiles] - for index, smiles in enumerate(smiles): - assert trajectory.top.residue(index).name == amino_residues[smiles] - - trajectory.save_pdb("test.pdb") - - # Check that the trajectory can later be read into OpenFF (via OpenMM) - Topology.from_openmm( - openmm.app.PDBFile("test.pdb").topology, unique_molecules=molecules - ) + # dummy check to make sure enough chemical information is present to parametrize, + # even though this force field is not necessarily designed for amino acids + ForceField("openff_unconstrained-2.3.0-rc2.offxml").create_openmm_system(topology) diff --git a/openff/evaluator/forcefield/system.py b/openff/evaluator/forcefield/system.py index 9fd4808c5..b15c68420 100644 --- a/openff/evaluator/forcefield/system.py +++ b/openff/evaluator/forcefield/system.py @@ -28,20 +28,9 @@ def topology_path(self) -> str: @property def topology(self) -> "Topology": - from openff.toolkit.topology import Molecule, Topology - from openmm import app + from openff.toolkit.topology import Topology - pdb_file = app.PDBFile(self._topology_path) - - topology = Topology.from_openmm( - pdb_file.topology, - unique_molecules=[ - Molecule.from_smiles(smiles=component.smiles) - for component in self._substance.components - ], - ) - - return topology + return Topology.from_json(open(self._topology_path).read()) @property def system_path(self) -> str: diff --git a/openff/evaluator/protocols/coordinates.py b/openff/evaluator/protocols/coordinates.py index 9ada4f5d3..ae47546be 100644 --- a/openff/evaluator/protocols/coordinates.py +++ b/openff/evaluator/protocols/coordinates.py @@ -8,6 +8,7 @@ from os import path import numpy as np +from openff.toolkit import Topology from openff.units import unit from openmm import app @@ -227,23 +228,20 @@ def _rebuild_substance(self, number_of_molecules): return output_substance - def _save_results(self, directory, trajectory): + def _save_results(self, directory: str, topology: Topology): """Save the results of running PACKMOL in the working directory Parameters ---------- - directory: str + directory The directory to save the results in. - trajectory : mdtraj.Trajectory - The trajectory of the created system. + topology + The topology of the created system. """ + self.coordinate_file_path = path.join(directory, "output.json") - # Fix mdtraj #1611 - for atom in trajectory.topology.atoms: - atom.serial = None - - self.coordinate_file_path = path.join(directory, "output.pdb") - trajectory.save_pdb(self.coordinate_file_path) + with open(self.coordinate_file_path, "w+") as file: + file.write(topology.to_json()) def _execute(self, directory, available_resources): molecules, number_of_molecules, exception = self._build_molecule_arrays() @@ -254,7 +252,7 @@ def _execute(self, directory, available_resources): packmol_directory = path.join(directory, "packmol_files") # Create packed box - trajectory, residue_names = packmol.pack_box( + topology, residue_names = packmol.pack_box( molecules=molecules, number_of_copies=number_of_molecules, mass_density=self.mass_density, @@ -270,10 +268,10 @@ def _execute(self, directory, available_resources): for component, residue_name in zip(self.substance, residue_names): self.assigned_residue_names[component.identifier] = residue_name - if trajectory is None: + if topology is None: raise RuntimeError("Packmol failed to complete.") - self._save_results(directory, trajectory) + self._save_results(directory, topology) @workflow_protocol() @@ -300,7 +298,7 @@ def _execute(self, directory, available_resources): packmol_directory = path.join(directory, "packmol_files") # Create packed box - trajectory, residue_names = packmol.pack_box( + topology, residue_names = packmol.pack_box( molecules=molecules, number_of_copies=number_of_molecules, structure_to_solvate=self.solute_coordinate_file, @@ -313,7 +311,7 @@ def _execute(self, directory, available_resources): retain_working_files=self.retain_packmol_files, ) - if trajectory is None: + if topology is None: raise RuntimeError("Packmol failed to complete.") self.assigned_residue_names = dict() @@ -321,7 +319,7 @@ def _execute(self, directory, available_resources): for component, residue_name in zip(self.substance, residue_names): self.assigned_residue_names[component.identifier] = residue_name - self._save_results(directory, trajectory) + self._save_results(directory, topology) @workflow_protocol() diff --git a/openff/evaluator/protocols/forcefield.py b/openff/evaluator/protocols/forcefield.py index 3662f5463..8949e2525 100644 --- a/openff/evaluator/protocols/forcefield.py +++ b/openff/evaluator/protocols/forcefield.py @@ -53,7 +53,7 @@ class BaseBuildSystem(Protocol, abc.ABC): default_value=UNDEFINED, ) coordinate_file_path = InputAttribute( - docstring="The file path to the PDB coordinate file which defines the " + docstring="The file path to the JSON file which defines the " "topology of the system to which the force field parameters " "will be assigned.", type_hint=str, @@ -443,8 +443,8 @@ def _execute(self, directory, available_resources): force_field_source = ForceFieldSource.from_json(self.force_field_path) cutoff = to_openmm(force_field_source.cutoff) - # Load in the systems topology - openmm_pdb_file = app.PDBFile(self.coordinate_file_path) + topology = Topology.from_json(open(self.coordinate_file_path).read()) + openmm_topology = topology.to_openmm() # Create an OFF topology for better insight into the layout of the system # topology. @@ -465,7 +465,7 @@ def _execute(self, directory, available_resources): if smiles in ["O", "[H]O[H]", "[H][O][H]"]: component_system = self._build_tip3p_system( cutoff, - openmm_pdb_file.topology.getUnitCellDimensions(), + openmm_topology.getUnitCellDimensions(), ) else: @@ -480,9 +480,8 @@ def _execute(self, directory, available_resources): system_templates[smiles] = component_system # Apply the parameters to the topology. - topology = Topology.from_openmm( - openmm_pdb_file.topology, unique_molecules.values() - ) + if False: + topology = Topology.from_openmm(openmm_topology, unique_molecules.values()) # Create the full system object from the component templates. system = self._create_empty_system(cutoff) @@ -508,9 +507,9 @@ def _execute(self, directory, available_resources): self._append_system(system, system_template, cutoff, index_map) - if openmm_pdb_file.topology.getPeriodicBoxVectors() is not None: + if openmm_topology.getPeriodicBoxVectors() is not None: system.setDefaultPeriodicBoxVectors( - *openmm_pdb_file.topology.getPeriodicBoxVectors() + *openmm_topology.getPeriodicBoxVectors() ) # Serialize the system object. @@ -536,7 +535,7 @@ class BuildSmirnoffSystem(BaseBuildSystem): def _execute(self, directory, available_resources): from openff.toolkit.topology import Molecule, Topology - pdb_file = app.PDBFile(self.coordinate_file_path) + topology = Topology.from_json(open(self.coordinate_file_path).read()) force_field_source = ForceFieldSource.from_json(self.force_field_path) @@ -558,12 +557,6 @@ def _execute(self, directory, available_resources): unique_molecules.append(molecule) - # Create the topology to parameterize from the input coordinates and the - # expected molecule species. - topology = Topology.from_openmm( - pdb_file.topology, unique_molecules=unique_molecules - ) - interchange = Interchange.from_smirnoff( topology=topology, force_field=force_field ) diff --git a/openff/evaluator/utils/packmol.py b/openff/evaluator/utils/packmol.py index c66c6be7d..43163fdde 100644 --- a/openff/evaluator/utils/packmol.py +++ b/openff/evaluator/utils/packmol.py @@ -17,7 +17,9 @@ from collections import defaultdict from functools import reduce +import mdtraj import numpy as np +from openff.toolkit import Topology from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper from openff.units import unit @@ -335,8 +337,6 @@ def _create_trajectory(molecule): mdtraj.Trajectory The created trajectory. """ - import mdtraj - # Check whether the molecule has a configuration defined, and if not, # define one. if molecule.n_conformers <= 0: @@ -349,7 +349,9 @@ def _create_trajectory(molecule): # Toolkit's PDB writer is untrustworthy (for non-biopolymers) with OpenEyeToolktiWrapper # See https://github.com/openforcefield/openff-toolkit/issues/1307 and linked issues molecule.to_file( - file.name, file_format="PDB", toolkit_registry=RDKitToolkitWrapper() + file.name, + file_format="PDB", + toolkit_registry=RDKitToolkitWrapper(), ) # Load the pdb into an mdtraj object. mdtraj_trajectory = mdtraj.load_pdb(file.name) @@ -360,6 +362,10 @@ def _create_trajectory(molecule): for residue in mdtraj_trajectory.topology.residues: _generate_residue_name(residue, molecule.to_smiles()) + # need to set the residue name on the Molecule object as well to ensure it's written to PDB + for atom in molecule.atoms: + atom.metadata["residue_name"] = mdtraj_trajectory.topology.residue(0).name + return mdtraj_trajectory @@ -479,8 +485,6 @@ def _correct_packmol_output( mdtraj.Trajectory A trajectory containing the packed system with full connectivity. """ - import mdtraj - with warnings.catch_warnings(): if structure_to_solvate is not None: # Catch the known warning which is fixed in the next section. @@ -570,7 +574,7 @@ def pack_box( verbose=False, working_directory=None, retain_working_files=False, -): +) -> tuple[Topology, list[str]]: """Run packmol to generate a box containing a mixture of molecules. Parameters @@ -683,11 +687,20 @@ def pack_box( pdb_file_name = f"{index}.pdb" pdb_file_names.append(pdb_file_name) - mdtraj_trajectory.save_pdb(pdb_file_name) + molecule.to_file( + pdb_file_name, file_format="PDB", toolkit_registry=RDKitToolkitWrapper() + ) mdtraj_topologies.append(mdtraj_trajectory.topology) residue_name = mdtraj_trajectory.topology.residue(0).name assigned_residue_names.append(residue_name) + print(80 * "=") + print("PRINTING PDB FILE AS IT'S PASSED TO PACKMOL FROM INSIDE EVALUATOR") + print(f"Determined residue name is {residue_name}") + print(f"{pdb_file_name=}") + print(80 * "=") + os.system(f"cat {pdb_file_name}") + print(80 * "=" + "\n") # Generate the input file. output_file_name = "packmol_output.pdb" @@ -715,6 +728,14 @@ def pack_box( packmol_succeeded = result.find("Success!") > 0 + print(80 * "=") + print("PRINTING THE RAW PDB OUTPUT THAT PACKMOL GAVE US") + print(f"{output_file_name=}") + print(80 * "=") + os.system(f"cat {output_file_name}") + print(80 * "=") + print(80 * "=" + "\n") + if not retain_working_files: os.unlink(input_file_path) @@ -752,4 +773,10 @@ def pack_box( if temporary_directory and not retain_working_files: shutil.rmtree(working_directory) - return trajectory, assigned_residue_names + output_file_name = "packmol_output.pdb" + return ( + Topology.from_openmm( + trajectory.topology.to_openmm(), unique_molecules=molecules + ), + assigned_residue_names, + ) From 753f7b98b3cc5fb3d2e3925f0219d50af20f71a6 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 13 Oct 2025 16:39:33 -0500 Subject: [PATCH 5/7] Partial refactor to use topologies internally --- .../_tests/test_utils/test_packmol.py | 70 +++++++++---------- openff/evaluator/utils/packmol.py | 25 ++++--- 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/openff/evaluator/_tests/test_utils/test_packmol.py b/openff/evaluator/_tests/test_utils/test_packmol.py index 01929e11d..18d344c80 100644 --- a/openff/evaluator/_tests/test_utils/test_packmol.py +++ b/openff/evaluator/_tests/test_utils/test_packmol.py @@ -15,20 +15,18 @@ def test_packmol_box_size(): molecules = [Molecule.from_smiles("O")] - trajectory, _ = packmol.pack_box( - molecules, [10], box_size=([20] * 3) * unit.angstrom - ) + topology, _ = packmol.pack_box(molecules, [10], box_size=([20] * 3) * unit.angstrom) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 10 - assert trajectory.n_atoms == 30 - assert trajectory.topology.n_bonds == 20 + assert len(topology.chains) == 10 # should be 1 ... + assert len(topology.residues) == 10 + assert topology.n_atoms == 30 + assert topology.n_bonds == 20 - assert all(x.name == "HOH" for x in trajectory.top.residues) + assert all(x.residue_name == "HOH" for x in topology.residues) - assert np.allclose(trajectory.unitcell_lengths, 2.2) + assert np.allclose(topology.box_vectors.m_as("nanometer").diagonal(), 2.2) def test_packmol_bad_input(): @@ -51,20 +49,20 @@ def test_packmol_water(): molecules = [Molecule.from_smiles("O")] - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, [10], mass_density=1.0 * unit.grams / unit.milliliters, ) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 10 - assert trajectory.n_atoms == 30 - assert trajectory.topology.n_bonds == 20 + assert len(topology.chains) == 10 # should be 1 ... + assert len(topology.residues) == 10 + assert topology.n_atoms == 30 + assert topology.n_bonds == 20 - assert all(x.name == "HOH" for x in trajectory.top.residues) + assert all(residue.residue_name == "HOH" for residue in topology.residues) def test_packmol_ions(): @@ -75,24 +73,24 @@ def test_packmol_ions(): Molecule.from_smiles("[K+]"), ] - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, [1, 1, 1], box_size=([20] * 3) * unit.angstrom ) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 3 - assert trajectory.n_residues == 3 - assert trajectory.n_atoms == 3 - assert trajectory.topology.n_bonds == 0 + assert len(topology.chains) == 3 + assert len(topology.residues) == 3 + assert topology.n_atoms == 3 + assert topology.n_bonds == 0 - assert trajectory.top.residue(0).name == "Na+" - assert trajectory.top.residue(1).name == "Cl-" - assert trajectory.top.residue(2).name == "K+" + assert topology.residues[0].residue_name == "Na+" + assert topology.residues[1].residue_name == "Cl-" + assert topology.residues[2].residue_name == "K+" - assert trajectory.top.atom(0).name == "Na+" - assert trajectory.top.atom(1).name == "Cl-" - assert trajectory.top.atom(2).name == "K+" + assert topology.atom(0).name == "Na+" + assert topology.atom(1).name == "Cl-" + assert topology.atom(2).name == "K+" def test_packmol_paracetamol(): @@ -100,16 +98,14 @@ def test_packmol_paracetamol(): # Test something a bit more tricky than water molecules = [Molecule.from_smiles("CC(=O)NC1=CC=C(C=C1)O")] - trajectory, _ = packmol.pack_box( - molecules, [1], box_size=([20] * 3) * unit.angstrom - ) + topology, _ = packmol.pack_box(molecules, [1], box_size=([20] * 3) * unit.angstrom) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 1 - assert trajectory.n_atoms == 20 - assert trajectory.topology.n_bonds == 20 + assert len(topology.chains) == 1 + assert len(topology.residues) == 1 + assert topology.n_atoms == 20 + assert topology.n_bonds == 20 amino_residues = { diff --git a/openff/evaluator/utils/packmol.py b/openff/evaluator/utils/packmol.py index 43163fdde..67dec0553 100644 --- a/openff/evaluator/utils/packmol.py +++ b/openff/evaluator/utils/packmol.py @@ -19,9 +19,8 @@ import mdtraj import numpy as np -from openff.toolkit import Topology +from openff.toolkit import Quantity, Topology, unit from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper -from openff.units import unit from openff.evaluator.substances import Component from openff.evaluator.utils.utils import temporarily_change_directory @@ -169,7 +168,7 @@ def _approximate_box_size_by_density( return box_size -def _generate_residue_name(residue, smiles): +def _generate_residue_name(residue, smiles) -> str: """Generates residue name for a particular residue which corresponds to a particular smiles pattern. @@ -262,8 +261,9 @@ def _generate_residue_name(residue, smiles): openff_molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) if openff_molecule.n_atoms == 1: - residue.name = _ion_residue_name(openff_molecule) - residue.atom(0).name = residue.name + openff_molecule.atom(0).name = openff_molecule.atom(0).metadata['residue_name'] = _ion_residue_name(openff_molecule) + + openff_molecule.perceive_residues() return @@ -288,7 +288,7 @@ def _generate_residue_name(residue, smiles): element_counter[atom.element.symbol] += 1 -def _ion_residue_name(molecule): +def _ion_residue_name(molecule) -> str: """Generates a residue name for a monatomic ion. Parameters @@ -774,9 +774,12 @@ def pack_box( shutil.rmtree(working_directory) output_file_name = "packmol_output.pdb" - return ( - Topology.from_openmm( - trajectory.topology.to_openmm(), unique_molecules=molecules - ), - assigned_residue_names, + + topology = Topology.from_openmm( + trajectory.topology.to_openmm(), unique_molecules=molecules ) + + # box vectors don't survive MDTraj -> OpenMM -> Topology conversion + topology.box_vectors = Quantity(box_size, "nanometer") + + return topology, assigned_residue_names From 9ae4c3c673096ddcef4f4ddd5a3d677ef56aae54 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:39:54 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openff/evaluator/utils/packmol.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openff/evaluator/utils/packmol.py b/openff/evaluator/utils/packmol.py index 67dec0553..5da8437ea 100644 --- a/openff/evaluator/utils/packmol.py +++ b/openff/evaluator/utils/packmol.py @@ -261,7 +261,9 @@ def _generate_residue_name(residue, smiles) -> str: openff_molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) if openff_molecule.n_atoms == 1: - openff_molecule.atom(0).name = openff_molecule.atom(0).metadata['residue_name'] = _ion_residue_name(openff_molecule) + openff_molecule.atom(0).name = openff_molecule.atom(0).metadata[ + "residue_name" + ] = _ion_residue_name(openff_molecule) openff_molecule.perceive_residues() From 1c592323c5feef38b759d60e11e5bd48a2ed4e02 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Tue, 14 Oct 2025 11:42:16 -0500 Subject: [PATCH 7/7] Update residue name handling --- openff/evaluator/utils/packmol.py | 69 +++++++++++++------------------ 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/openff/evaluator/utils/packmol.py b/openff/evaluator/utils/packmol.py index 5da8437ea..029b53570 100644 --- a/openff/evaluator/utils/packmol.py +++ b/openff/evaluator/utils/packmol.py @@ -14,7 +14,6 @@ import subprocess import tempfile import warnings -from collections import defaultdict from functools import reduce import mdtraj @@ -168,7 +167,7 @@ def _approximate_box_size_by_density( return box_size -def _generate_residue_name(residue, smiles) -> str: +def _generate_residue_name(smiles: str) -> tuple[str, list[str] | None]: """Generates residue name for a particular residue which corresponds to a particular smiles pattern. @@ -177,10 +176,15 @@ def _generate_residue_name(residue, smiles) -> str: Parameters ---------- - residue: mdtraj.core.topology.Residue - The residue to assign the name to. smiles: str The SMILES pattern to generate a resiude name for. + + Returns + ------- + residue_name + The generated residue name. + atom names + A list of atom names if they need to be renamed, otherwise None. """ from mdtraj.core import residue_names from openff.toolkit.topology import Molecule @@ -237,37 +241,19 @@ def _generate_residue_name(residue, smiles) -> str: # Check for amino acids. if standardized_smiles in amino_residue_mappings: - residue.name = amino_residue_mappings[standardized_smiles] - return + return amino_residue_mappings[standardized_smiles], None # Check for water if standardized_smiles == "O": - residue.name = "HOH" - - # Re-assign the water atom names. These need to be set to get - # correct CONECT statements. - h_counter = 1 - - for atom in residue.atoms: - if atom.element.symbol == "O": - atom.name = "O1" - else: - atom.name = f"H{h_counter}" - h_counter += 1 - - return + # TODO: May need to also rename atoms to O1, H1, H2 + return "HOH", None # Check for ions openff_molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) if openff_molecule.n_atoms == 1: - openff_molecule.atom(0).name = openff_molecule.atom(0).metadata[ - "residue_name" - ] = _ion_residue_name(openff_molecule) - - openff_molecule.perceive_residues() - - return + ion_name = _ion_residue_name(openff_molecule) + return ion_name, [ion_name] # Randomly generate a name random_residue_name = "".join( @@ -280,14 +266,8 @@ def _generate_residue_name(residue, smiles) -> str: [random.choice(string.ascii_uppercase) for _ in range(3)] ) - residue.name = random_residue_name - - # Assign unique atom names. - element_counter = defaultdict(int) - - for atom in residue.atoms: - atom.name = f"{atom.element.symbol}{element_counter[atom.element.symbol] + 1}" - element_counter[atom.element.symbol] += 1 + # TODO: May need to assign random atom names as well + return random_residue_name, None def _ion_residue_name(molecule) -> str: @@ -684,18 +664,27 @@ def pack_box( mdtraj_topologies = [] for index, molecule in enumerate(molecules): - mdtraj_trajectory = _create_trajectory(molecule) + residue_name, atom_names = _generate_residue_name(molecule.to_smiles()) + assigned_residue_names.append(residue_name) + + for atom in molecule.atoms: + atom.metadata["residue_name"] = residue_name + + if atom_names is not None: + for atom, new_name in zip(molecule.atoms, atom_names): + atom.name = new_name + + molecule.add_default_hierarchy_schemes() pdb_file_name = f"{index}.pdb" pdb_file_names.append(pdb_file_name) molecule.to_file( - pdb_file_name, file_format="PDB", toolkit_registry=RDKitToolkitWrapper() + pdb_file_name, + file_format="PDB", + toolkit_registry=RDKitToolkitWrapper(), ) - mdtraj_topologies.append(mdtraj_trajectory.topology) - residue_name = mdtraj_trajectory.topology.residue(0).name - assigned_residue_names.append(residue_name) print(80 * "=") print("PRINTING PDB FILE AS IT'S PASSED TO PACKMOL FROM INSIDE EVALUATOR") print(f"Determined residue name is {residue_name}")