diff --git a/.github/workflows/aws.yaml b/.github/workflows/aws.yaml deleted file mode 100644 index 04c34cbb5..000000000 --- a/.github/workflows/aws.yaml +++ /dev/null @@ -1,82 +0,0 @@ -name: Test Self-Hosted Runner -on: - workflow_dispatch: - pull_request: - branches: - - "main" - -defaults: - run: - shell: bash -l {0} - -jobs: - start-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - outputs: - mapping: ${{ steps.aws-start.outputs.mapping }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Create cloud runner - id: aws-start - uses: omsf/start-aws-gha-runner@v1.1.0 - with: - aws_image_id: ami-0d5079d9be06933e5 - aws_instance_type: g4dn.xlarge - aws_home_dir: /home/ubuntu - env: - GH_PAT: ${{ secrets.GH_PAT }} - self-hosted-test: - runs-on: self-hosted - needs: - - start-aws-runner - steps: - - uses: actions/checkout@v5 - - - name: Print disk usage - run: "df -h" - - - name: Print Docker details - run: "docker version || true" - - - name: Setup Conda Environment - uses: mamba-org/setup-micromamba@v2 - with: - environment-file: devtools/conda-envs/test_env.yaml - - - name: Install Package and test plugins - run: python -m pip install . utilities/test_plugins/ - - - name: Double-check local installation - run: python -c "from openff.evaluator import __version__; print(__version__)" - - name: Run integration tests - run: | - cd integration-tests/default-workflows/ - python run.py - stop-aws-runner: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - needs: - - start-aws-runner - - self-hosted-test - if: ${{ always() }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::649715411074:role/gh-actions-runner-role - aws-region: us-east-1 - - name: Stop instances - uses: omsf/stop-aws-gha-runner@v1.0.0 - with: - instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} - env: - GH_PAT: ${{ secrets.GH_PAT }} diff --git a/openff/evaluator/_tests/test_utils/test_packmol.py b/openff/evaluator/_tests/test_utils/test_packmol.py index 0c4b84503..18d344c80 100644 --- a/openff/evaluator/_tests/test_utils/test_packmol.py +++ b/openff/evaluator/_tests/test_utils/test_packmol.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from openff.toolkit import ForceField, Molecule from openff.units import unit from openff.evaluator.utils import packmol @@ -11,28 +12,24 @@ def test_packmol_box_size(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] - trajectory, _ = packmol.pack_box( - molecules, [10], box_size=([20] * 3) * unit.angstrom - ) + topology, _ = packmol.pack_box(molecules, [10], box_size=([20] * 3) * unit.angstrom) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 10 - assert trajectory.n_atoms == 30 - assert trajectory.topology.n_bonds == 20 + assert len(topology.chains) == 10 # should be 1 ... + assert len(topology.residues) == 10 + assert topology.n_atoms == 30 + assert topology.n_bonds == 20 - assert all(x.name == "HOH" for x in trajectory.top.residues) + assert all(x.residue_name == "HOH" for x in topology.residues) - assert np.allclose(trajectory.unitcell_lengths, 2.2) + assert np.allclose(topology.box_vectors.m_as("nanometer").diagonal(), 2.2) def test_packmol_bad_input(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -41,7 +38,6 @@ def test_packmol_bad_input(): def test_packmol_failed(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] @@ -50,28 +46,26 @@ def test_packmol_failed(): def test_packmol_water(): - from openff.toolkit.topology import Molecule molecules = [Molecule.from_smiles("O")] - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, [10], mass_density=1.0 * unit.grams / unit.milliliters, ) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 10 - assert trajectory.n_atoms == 30 - assert trajectory.topology.n_bonds == 20 + assert len(topology.chains) == 10 # should be 1 ... + assert len(topology.residues) == 10 + assert topology.n_atoms == 30 + assert topology.n_bonds == 20 - assert all(x.name == "HOH" for x in trajectory.top.residues) + assert all(residue.residue_name == "HOH" for residue in topology.residues) def test_packmol_ions(): - from openff.toolkit.topology import Molecule molecules = [ Molecule.from_smiles("[Na+]"), @@ -79,84 +73,82 @@ def test_packmol_ions(): Molecule.from_smiles("[K+]"), ] - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, [1, 1, 1], box_size=([20] * 3) * unit.angstrom ) - assert trajectory is not None + assert topology is not None - assert trajectory.n_chains == 3 - assert trajectory.n_residues == 3 - assert trajectory.n_atoms == 3 - assert trajectory.topology.n_bonds == 0 + assert len(topology.chains) == 3 + assert len(topology.residues) == 3 + assert topology.n_atoms == 3 + assert topology.n_bonds == 0 - assert trajectory.top.residue(0).name == "Na+" - assert trajectory.top.residue(1).name == "Cl-" - assert trajectory.top.residue(2).name == "K+" + assert topology.residues[0].residue_name == "Na+" + assert topology.residues[1].residue_name == "Cl-" + assert topology.residues[2].residue_name == "K+" - assert trajectory.top.atom(0).name == "Na+" - assert trajectory.top.atom(1).name == "Cl-" - assert trajectory.top.atom(2).name == "K+" + assert topology.atom(0).name == "Na+" + assert topology.atom(1).name == "Cl-" + assert topology.atom(2).name == "K+" def test_packmol_paracetamol(): - from openff.toolkit.topology import Molecule # Test something a bit more tricky than water molecules = [Molecule.from_smiles("CC(=O)NC1=CC=C(C=C1)O")] - trajectory, _ = packmol.pack_box( - molecules, [1], box_size=([20] * 3) * unit.angstrom - ) - - assert trajectory is not None - - assert trajectory.n_chains == 1 - assert trajectory.n_residues == 1 - assert trajectory.n_atoms == 20 - assert trajectory.topology.n_bonds == 20 - - -def test_amino_acids(): - amino_residues = { - "C[C@H](N)C(=O)O": "ALA", - # Undefined stereochemistry error. - # "N=C(N)NCCC[C@H](N)C(=O)O": "ARG", - "NC(=O)C[C@H](N)C(=O)O": "ASN", - "N[C@@H](CC(=O)O)C(=O)O": "ASP", - "N[C@@H](CS)C(=O)O": "CYS", - "N[C@@H](CCC(=O)O)C(=O)O": "GLU", - "NC(=O)CC[C@H](N)C(=O)O": "GLN", - "NCC(=O)O": "GLY", - "N[C@@H](Cc1c[nH]cn1)C(=O)O": "HIS", - "CC[C@H](C)[C@H](N)C(=O)O": "ILE", - "CC(C)C[C@H](N)C(=O)O": "LEU", - "NCCCC[C@H](N)C(=O)O": "LYS", - "CSCC[C@H](N)C(=O)O": "MET", - "N[C@@H](Cc1ccccc1)C(=O)O": "PHE", - "O=C(O)[C@@H]1CCCN1": "PRO", - "N[C@@H](CO)C(=O)O": "SER", - "C[C@@H](O)[C@H](N)C(=O)O": "THR", - "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O": "TRP", - "N[C@@H](Cc1ccc(O)cc1)C(=O)O": "TYR", - "CC(C)[C@H](N)C(=O)O": "VAL", - } - + topology, _ = packmol.pack_box(molecules, [1], box_size=([20] * 3) * unit.angstrom) + + assert topology is not None + + assert len(topology.chains) == 1 + assert len(topology.residues) == 1 + assert topology.n_atoms == 20 + assert topology.n_bonds == 20 + + +amino_residues = { + "C[C@H](N)C(=O)O": "ALA", + # Undefined stereochemistry error. + # "N=C(N)NCCC[C@H](N)C(=O)O": "ARG", + "NC(=O)C[C@H](N)C(=O)O": "ASN", + "N[C@@H](CC(=O)O)C(=O)O": "ASP", + "N[C@@H](CS)C(=O)O": "CYS", + "N[C@@H](CCC(=O)O)C(=O)O": "GLU", + "NC(=O)CC[C@H](N)C(=O)O": "GLN", + "NCC(=O)O": "GLY", + "N[C@@H](Cc1c[nH]cn1)C(=O)O": "HIS", + "CC[C@H](C)[C@H](N)C(=O)O": "ILE", + "CC(C)C[C@H](N)C(=O)O": "LEU", + "NCCCC[C@H](N)C(=O)O": "LYS", + "CSCC[C@H](N)C(=O)O": "MET", + "N[C@@H](Cc1ccccc1)C(=O)O": "PHE", + "O=C(O)[C@@H]1CCCN1": "PRO", + "N[C@@H](CO)C(=O)O": "SER", + "C[C@@H](O)[C@H](N)C(=O)O": "THR", + "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O": "TRP", + "N[C@@H](Cc1ccc(O)cc1)C(=O)O": "TYR", + "CC(C)[C@H](N)C(=O)O": "VAL", +} + + +def test_amino_acid_residue_information(): smiles = [*amino_residues] - from openff.toolkit.topology import Molecule - molecules = [Molecule.from_smiles(x) for x in smiles] counts = [1] * len(smiles) - trajectory, _ = packmol.pack_box( + topology, _ = packmol.pack_box( molecules, counts, box_size=([1000] * 3) * unit.angstrom ) - assert trajectory is not None - - assert trajectory.n_chains == len(smiles) - assert trajectory.n_residues == len(smiles) + for residue, smiles in zip( + topology.residues, + smiles, + ): + assert residue.residue_name == amino_residues[smiles] - for index, smiles in enumerate(smiles): - assert trajectory.top.residue(index).name == amino_residues[smiles] + # dummy check to make sure enough chemical information is present to parametrize, + # even though this force field is not necessarily designed for amino acids + ForceField("openff_unconstrained-2.3.0-rc2.offxml").create_openmm_system(topology) diff --git a/openff/evaluator/forcefield/system.py b/openff/evaluator/forcefield/system.py index 9fd4808c5..b15c68420 100644 --- a/openff/evaluator/forcefield/system.py +++ b/openff/evaluator/forcefield/system.py @@ -28,20 +28,9 @@ def topology_path(self) -> str: @property def topology(self) -> "Topology": - from openff.toolkit.topology import Molecule, Topology - from openmm import app + from openff.toolkit.topology import Topology - pdb_file = app.PDBFile(self._topology_path) - - topology = Topology.from_openmm( - pdb_file.topology, - unique_molecules=[ - Molecule.from_smiles(smiles=component.smiles) - for component in self._substance.components - ], - ) - - return topology + return Topology.from_json(open(self._topology_path).read()) @property def system_path(self) -> str: diff --git a/openff/evaluator/protocols/coordinates.py b/openff/evaluator/protocols/coordinates.py index 9ada4f5d3..ae47546be 100644 --- a/openff/evaluator/protocols/coordinates.py +++ b/openff/evaluator/protocols/coordinates.py @@ -8,6 +8,7 @@ from os import path import numpy as np +from openff.toolkit import Topology from openff.units import unit from openmm import app @@ -227,23 +228,20 @@ def _rebuild_substance(self, number_of_molecules): return output_substance - def _save_results(self, directory, trajectory): + def _save_results(self, directory: str, topology: Topology): """Save the results of running PACKMOL in the working directory Parameters ---------- - directory: str + directory The directory to save the results in. - trajectory : mdtraj.Trajectory - The trajectory of the created system. + topology + The topology of the created system. """ + self.coordinate_file_path = path.join(directory, "output.json") - # Fix mdtraj #1611 - for atom in trajectory.topology.atoms: - atom.serial = None - - self.coordinate_file_path = path.join(directory, "output.pdb") - trajectory.save_pdb(self.coordinate_file_path) + with open(self.coordinate_file_path, "w+") as file: + file.write(topology.to_json()) def _execute(self, directory, available_resources): molecules, number_of_molecules, exception = self._build_molecule_arrays() @@ -254,7 +252,7 @@ def _execute(self, directory, available_resources): packmol_directory = path.join(directory, "packmol_files") # Create packed box - trajectory, residue_names = packmol.pack_box( + topology, residue_names = packmol.pack_box( molecules=molecules, number_of_copies=number_of_molecules, mass_density=self.mass_density, @@ -270,10 +268,10 @@ def _execute(self, directory, available_resources): for component, residue_name in zip(self.substance, residue_names): self.assigned_residue_names[component.identifier] = residue_name - if trajectory is None: + if topology is None: raise RuntimeError("Packmol failed to complete.") - self._save_results(directory, trajectory) + self._save_results(directory, topology) @workflow_protocol() @@ -300,7 +298,7 @@ def _execute(self, directory, available_resources): packmol_directory = path.join(directory, "packmol_files") # Create packed box - trajectory, residue_names = packmol.pack_box( + topology, residue_names = packmol.pack_box( molecules=molecules, number_of_copies=number_of_molecules, structure_to_solvate=self.solute_coordinate_file, @@ -313,7 +311,7 @@ def _execute(self, directory, available_resources): retain_working_files=self.retain_packmol_files, ) - if trajectory is None: + if topology is None: raise RuntimeError("Packmol failed to complete.") self.assigned_residue_names = dict() @@ -321,7 +319,7 @@ def _execute(self, directory, available_resources): for component, residue_name in zip(self.substance, residue_names): self.assigned_residue_names[component.identifier] = residue_name - self._save_results(directory, trajectory) + self._save_results(directory, topology) @workflow_protocol() diff --git a/openff/evaluator/protocols/forcefield.py b/openff/evaluator/protocols/forcefield.py index 3662f5463..8949e2525 100644 --- a/openff/evaluator/protocols/forcefield.py +++ b/openff/evaluator/protocols/forcefield.py @@ -53,7 +53,7 @@ class BaseBuildSystem(Protocol, abc.ABC): default_value=UNDEFINED, ) coordinate_file_path = InputAttribute( - docstring="The file path to the PDB coordinate file which defines the " + docstring="The file path to the JSON file which defines the " "topology of the system to which the force field parameters " "will be assigned.", type_hint=str, @@ -443,8 +443,8 @@ def _execute(self, directory, available_resources): force_field_source = ForceFieldSource.from_json(self.force_field_path) cutoff = to_openmm(force_field_source.cutoff) - # Load in the systems topology - openmm_pdb_file = app.PDBFile(self.coordinate_file_path) + topology = Topology.from_json(open(self.coordinate_file_path).read()) + openmm_topology = topology.to_openmm() # Create an OFF topology for better insight into the layout of the system # topology. @@ -465,7 +465,7 @@ def _execute(self, directory, available_resources): if smiles in ["O", "[H]O[H]", "[H][O][H]"]: component_system = self._build_tip3p_system( cutoff, - openmm_pdb_file.topology.getUnitCellDimensions(), + openmm_topology.getUnitCellDimensions(), ) else: @@ -480,9 +480,8 @@ def _execute(self, directory, available_resources): system_templates[smiles] = component_system # Apply the parameters to the topology. - topology = Topology.from_openmm( - openmm_pdb_file.topology, unique_molecules.values() - ) + if False: + topology = Topology.from_openmm(openmm_topology, unique_molecules.values()) # Create the full system object from the component templates. system = self._create_empty_system(cutoff) @@ -508,9 +507,9 @@ def _execute(self, directory, available_resources): self._append_system(system, system_template, cutoff, index_map) - if openmm_pdb_file.topology.getPeriodicBoxVectors() is not None: + if openmm_topology.getPeriodicBoxVectors() is not None: system.setDefaultPeriodicBoxVectors( - *openmm_pdb_file.topology.getPeriodicBoxVectors() + *openmm_topology.getPeriodicBoxVectors() ) # Serialize the system object. @@ -536,7 +535,7 @@ class BuildSmirnoffSystem(BaseBuildSystem): def _execute(self, directory, available_resources): from openff.toolkit.topology import Molecule, Topology - pdb_file = app.PDBFile(self.coordinate_file_path) + topology = Topology.from_json(open(self.coordinate_file_path).read()) force_field_source = ForceFieldSource.from_json(self.force_field_path) @@ -558,12 +557,6 @@ def _execute(self, directory, available_resources): unique_molecules.append(molecule) - # Create the topology to parameterize from the input coordinates and the - # expected molecule species. - topology = Topology.from_openmm( - pdb_file.topology, unique_molecules=unique_molecules - ) - interchange = Interchange.from_smirnoff( topology=topology, force_field=force_field ) diff --git a/openff/evaluator/utils/packmol.py b/openff/evaluator/utils/packmol.py index c66c6be7d..029b53570 100644 --- a/openff/evaluator/utils/packmol.py +++ b/openff/evaluator/utils/packmol.py @@ -14,12 +14,12 @@ import subprocess import tempfile import warnings -from collections import defaultdict from functools import reduce +import mdtraj import numpy as np +from openff.toolkit import Quantity, Topology, unit from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper -from openff.units import unit from openff.evaluator.substances import Component from openff.evaluator.utils.utils import temporarily_change_directory @@ -167,7 +167,7 @@ def _approximate_box_size_by_density( return box_size -def _generate_residue_name(residue, smiles): +def _generate_residue_name(smiles: str) -> tuple[str, list[str] | None]: """Generates residue name for a particular residue which corresponds to a particular smiles pattern. @@ -176,10 +176,15 @@ def _generate_residue_name(residue, smiles): Parameters ---------- - residue: mdtraj.core.topology.Residue - The residue to assign the name to. smiles: str The SMILES pattern to generate a resiude name for. + + Returns + ------- + residue_name + The generated residue name. + atom names + A list of atom names if they need to be renamed, otherwise None. """ from mdtraj.core import residue_names from openff.toolkit.topology import Molecule @@ -236,34 +241,19 @@ def _generate_residue_name(residue, smiles): # Check for amino acids. if standardized_smiles in amino_residue_mappings: - residue.name = amino_residue_mappings[standardized_smiles] - return + return amino_residue_mappings[standardized_smiles], None # Check for water if standardized_smiles == "O": - residue.name = "HOH" - - # Re-assign the water atom names. These need to be set to get - # correct CONECT statements. - h_counter = 1 - - for atom in residue.atoms: - if atom.element.symbol == "O": - atom.name = "O1" - else: - atom.name = f"H{h_counter}" - h_counter += 1 - - return + # TODO: May need to also rename atoms to O1, H1, H2 + return "HOH", None # Check for ions openff_molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) if openff_molecule.n_atoms == 1: - residue.name = _ion_residue_name(openff_molecule) - residue.atom(0).name = residue.name - - return + ion_name = _ion_residue_name(openff_molecule) + return ion_name, [ion_name] # Randomly generate a name random_residue_name = "".join( @@ -276,17 +266,11 @@ def _generate_residue_name(residue, smiles): [random.choice(string.ascii_uppercase) for _ in range(3)] ) - residue.name = random_residue_name + # TODO: May need to assign random atom names as well + return random_residue_name, None - # Assign unique atom names. - element_counter = defaultdict(int) - for atom in residue.atoms: - atom.name = f"{atom.element.symbol}{element_counter[atom.element.symbol] + 1}" - element_counter[atom.element.symbol] += 1 - - -def _ion_residue_name(molecule): +def _ion_residue_name(molecule) -> str: """Generates a residue name for a monatomic ion. Parameters @@ -335,8 +319,6 @@ def _create_trajectory(molecule): mdtraj.Trajectory The created trajectory. """ - import mdtraj - # Check whether the molecule has a configuration defined, and if not, # define one. if molecule.n_conformers <= 0: @@ -349,7 +331,9 @@ def _create_trajectory(molecule): # Toolkit's PDB writer is untrustworthy (for non-biopolymers) with OpenEyeToolktiWrapper # See https://github.com/openforcefield/openff-toolkit/issues/1307 and linked issues molecule.to_file( - file.name, file_format="PDB", toolkit_registry=RDKitToolkitWrapper() + file.name, + file_format="PDB", + toolkit_registry=RDKitToolkitWrapper(), ) # Load the pdb into an mdtraj object. mdtraj_trajectory = mdtraj.load_pdb(file.name) @@ -360,6 +344,10 @@ def _create_trajectory(molecule): for residue in mdtraj_trajectory.topology.residues: _generate_residue_name(residue, molecule.to_smiles()) + # need to set the residue name on the Molecule object as well to ensure it's written to PDB + for atom in molecule.atoms: + atom.metadata["residue_name"] = mdtraj_trajectory.topology.residue(0).name + return mdtraj_trajectory @@ -479,8 +467,6 @@ def _correct_packmol_output( mdtraj.Trajectory A trajectory containing the packed system with full connectivity. """ - import mdtraj - with warnings.catch_warnings(): if structure_to_solvate is not None: # Catch the known warning which is fixed in the next section. @@ -570,7 +556,7 @@ def pack_box( verbose=False, working_directory=None, retain_working_files=False, -): +) -> tuple[Topology, list[str]]: """Run packmol to generate a box containing a mixture of molecules. Parameters @@ -678,16 +664,34 @@ def pack_box( mdtraj_topologies = [] for index, molecule in enumerate(molecules): - mdtraj_trajectory = _create_trajectory(molecule) + residue_name, atom_names = _generate_residue_name(molecule.to_smiles()) + assigned_residue_names.append(residue_name) + + for atom in molecule.atoms: + atom.metadata["residue_name"] = residue_name + + if atom_names is not None: + for atom, new_name in zip(molecule.atoms, atom_names): + atom.name = new_name + + molecule.add_default_hierarchy_schemes() pdb_file_name = f"{index}.pdb" pdb_file_names.append(pdb_file_name) - mdtraj_trajectory.save_pdb(pdb_file_name) - mdtraj_topologies.append(mdtraj_trajectory.topology) + molecule.to_file( + pdb_file_name, + file_format="PDB", + toolkit_registry=RDKitToolkitWrapper(), + ) - residue_name = mdtraj_trajectory.topology.residue(0).name - assigned_residue_names.append(residue_name) + print(80 * "=") + print("PRINTING PDB FILE AS IT'S PASSED TO PACKMOL FROM INSIDE EVALUATOR") + print(f"Determined residue name is {residue_name}") + print(f"{pdb_file_name=}") + print(80 * "=") + os.system(f"cat {pdb_file_name}") + print(80 * "=" + "\n") # Generate the input file. output_file_name = "packmol_output.pdb" @@ -715,6 +719,14 @@ def pack_box( packmol_succeeded = result.find("Success!") > 0 + print(80 * "=") + print("PRINTING THE RAW PDB OUTPUT THAT PACKMOL GAVE US") + print(f"{output_file_name=}") + print(80 * "=") + os.system(f"cat {output_file_name}") + print(80 * "=") + print(80 * "=" + "\n") + if not retain_working_files: os.unlink(input_file_path) @@ -752,4 +764,13 @@ def pack_box( if temporary_directory and not retain_working_files: shutil.rmtree(working_directory) - return trajectory, assigned_residue_names + output_file_name = "packmol_output.pdb" + + topology = Topology.from_openmm( + trajectory.topology.to_openmm(), unique_molecules=molecules + ) + + # box vectors don't survive MDTraj -> OpenMM -> Topology conversion + topology.box_vectors = Quantity(box_size, "nanometer") + + return topology, assigned_residue_names