Skip to content

Commit e3f2260

Browse files
committed
Bug fixes, implicit solvent support, fixes to support a conda build with modeller
1 parent 4c4afec commit e3f2260

7 files changed

Lines changed: 120 additions & 24 deletions

File tree

prepmd/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@
55
from . import util
66
from . import prep
77
from . import model
8-
__version__ = "0.2"
8+
from . import add_modeller_license
9+
__version__ = "0.3"

prepmd/add_modeller_license.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Change the modeller license key
5+
"""
6+
7+
import importlib
8+
from pathlib import Path
9+
import sys
10+
11+
# note: don't do this
12+
# i have to do it like this to make the conda build work
13+
14+
def entry_point():
15+
key = sys.argv[1]
16+
modeller_init_path = Path(importlib.util.find_spec("modeller").origin)
17+
modeller_lib = modeller_init_path.parent.parent.absolute() / "modeller" / "config.py"
18+
with open(modeller_lib) as file:
19+
contents = file.readlines()
20+
splitline = contents[1].split("'")
21+
contents_1_new = splitline[0] + "'" + key + "'" + splitline[2]
22+
contents[1] = contents_1_new
23+
with open(modeller_lib, "w") as file:
24+
file.writelines(contents)
25+
print("Updated modeller license info.")
26+
27+
if __name__ == "__main__":
28+
if len(sys.argv) != 2:
29+
print("Usage: prep-license LICENSE-KEY")
30+
print("Note: this will replace whatever your current license key is.")
31+
sys.exit(0)
32+
entry_point()

prepmd/get_residues.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
"""
44
Read residue information from structure files
55
"""
6-
6+
NO_MODELLER = False
7+
try:
8+
from modeller import *
9+
except:
10+
NO_MODELLER = True
711
from prepmd import util
8-
from modeller import *
912

1013

1114
def get_residues_pdb(pdb, code):
@@ -18,6 +21,8 @@ def get_residues_pdb(pdb, code):
1821
Returns:
1922
the fasta sequence as a string
2023
"""
24+
if NO_MODELLER:
25+
raise ImportError("Can't run without MODELLER and a valid license key")
2126
log.none()
2227
e = Environ()
2328
m = Model(e, file=pdb)
@@ -80,5 +85,6 @@ def get_fullseq_pdb(pdb, code):
8085
fasta_joined = "/".join(fastas)
8186
chains = ":::::::::"
8287
if fastas == []:
83-
raise IOError("Couldn't get full sequence from contents of "+pdb)
88+
raise IOError("Couldn't get full sequence from contents of "+pdb+". "
89+
"Does it contain a sequence?")
8490
return ">P1;"+code+"_fill\n"+chains+"\n"+fasta_joined+"*"

prepmd/model.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33
"""
44
Calls to the MODELLER API
55
"""
6-
import modeller
6+
NO_MODELLER = False
7+
try:
8+
import modeller
9+
from modeller.automodel import *
10+
except:
11+
NO_MODELLER = True
12+
713
from Bio import Align
8-
from modeller.automodel import *
914
import os
1015
import pathlib
1116
import shutil
@@ -102,10 +107,12 @@ def get_best_pdb(directory, exts=["pdb", "cif", "mmcif", "mmCif"]):
102107
Returns:
103108
path to the file with the highest objective function, a string
104109
"""
110+
105111
pdbs = []
106112
for ext in exts:
107113
pdbs += list(pathlib.Path(directory).glob('*.'+ext))
108114
scores = {}
115+
similarities = {}
109116
for pdb in pdbs:
110117
with open(pdb, "r") as file:
111118
for line in file:
@@ -117,6 +124,26 @@ def get_best_pdb(directory, exts=["pdb", "cif", "mmcif", "mmCif"]):
117124
if "_modeller.objective_function" in line:
118125
score = line.split()[-1]
119126
scores[pdb] = float(score)
127+
# pdb
128+
if "BEST TEMPLATE" in line:
129+
similarities[pdb] = float(line.split()[-1])
130+
# mmcif
131+
if "_modeller.best_template_pct_seq_id" in line:
132+
similarities[pdb] = float(line.split()[-1])
133+
max_sim = max(similarities.values())
134+
if max_sim >= 97:
135+
print("Similiarity: "+str(max_sim)+"%.")
136+
if max_sim < 97 and max_sim >= 90:
137+
print("WARNING: best sequence similarity is "+str(max_sim)+"%. "
138+
"Modelled loops could be wrong. Please check them!")
139+
if max_sim < 90 and max_sim >= 80:
140+
print("SERIOUS WARNING: best sequence similarity is only "+
141+
str(max_sim)+"%. Modelled loops may have the wrong sequence." )
142+
if max_sim < 80:
143+
raise ValueError("Sequences do not match. Cannot reconstruct missing "
144+
"loops. Please double-check your sequence data (by "
145+
"default these are the SEQRES records from the input "
146+
"structure.")
120147
return str(max(scores, key=scores.get))
121148

122149

@@ -136,6 +163,9 @@ def fix_missing_residues(code, fastafile, alignmentout, inmodel, outmodel,
136163
Returns:
137164
nothing, but writes out outmodel and wdir.
138165
"""
166+
if NO_MODELLER:
167+
raise ImportError("Can't fix without MODELLER and a valid license key")
168+
139169
print("Fixing missing residues...")
140170

141171
if not os.path.isdir(wdir):

prepmd/prep.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,23 @@ def prep(code, outmodel, workingdir, folder=None, fastafile=None, inmodel=None,
100100
locals_json = json.dumps(locals_copy)
101101

102102
# infer download format from output format
103-
if not download_format:
103+
if not download_format and not inmodel:
104104
if (".pdb") in outmodel:
105105
download_format = "pdb"
106106
print("No download format specified, downloading PDB.")
107107
elif (".cif") in outmodel or ".mmcif" in outmodel or (
108108
".pdbx") in outmodel:
109109
download_format = "mmCif"
110110
print("No download format specified, downloading mmCif.")
111+
112+
#infer download format from input format
113+
if not download_format and inmodel:
114+
if (".pdb") in inmodel:
115+
download_format = "pdb"
116+
if ".cif" in inmodel or ".pdbx" in inmodel or ".mmcif" in inmodel:
117+
download_format = "mmCif"
118+
119+
111120

112121
# check that input/output are specified in the same format
113122
# i'm not against converting the files but it shouldn't happen implicitly
@@ -126,7 +135,10 @@ def in_string(substr, text): return text == None or substr in text.lower()
126135
pathlib.Path(workingdir).mkdir(parents=True, exist_ok=True)
127136

128137
if inmodel:
129-
shutil.copyfile(inmodel, workingdir+os.path.sep+inmodel)
138+
shutil.copyfile(inmodel, workingdir+os.path.sep+code+"."+download_format)
139+
# note: modeller requires the filename to be the same as the pdb code
140+
# so here we change the filename
141+
inmodel = code+"."+download_format.replace("mmCif", "cif")
130142

131143
run_dir = os.getcwd()
132144
os.chdir(workingdir)

prepmd/run.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,22 @@
3131
"amber14,None": ['amber14-all.xml'],
3232
"charmm36,None": ['charmm36.xml'],
3333
"amoeba,None": ['amoeba2018.xml'],
34-
"amber19,None": ['amber19-all.xml']
34+
"amber19,None": ['amber19-all.xml'],
35+
"amber19,implicit": ['amber19-all.xml', 'implicit/gbn2.xml'],
36+
"amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml'],
37+
"charmm36,implicit": ['charmm36.xml', 'implicit/gbn2.xml'],
38+
"amoeba,implicit": ['amoeba2018.xml', 'amoeba2009_gk.xml'],
3539
}
3640

3741

3842
def make_forcefield(ff, solvent=None):
3943
if ff+","+str(solvent) not in ff_lookup.keys():
44+
err = "Solvent "+str(solvent)+" not in list of available solvents."
4045
validff = set([item[0] for item in list(
4146
map(methodcaller("split", ","), ff_lookup.keys()))])
4247
validsolvent = set([item[1] for item in list(
4348
map(methodcaller("split", ","), ff_lookup.keys()))])
44-
err = "Please pick a valid force field and solvent.\n"
49+
err += "Please pick a valid force field and solvent.\n"
4550
err += "Valid force fields are "+str(validff)+"\n"
4651
err += "Valid solvents are "+str(validsolvent)+"\n"
4752
raise ValueError(err)
@@ -64,8 +69,8 @@ def check_platforms():
6469

6570
def test_sim(pdb):
6671
run(pdb, minimised_structure_out=pdb, md_steps=None,
67-
integrator="LangevinMiddleIntegrator", solvent=None,
68-
pressure=None, test_sim_steps=50)
72+
integrator="LangevinMiddleIntegrator",
73+
pressure=None, test_sim_steps=250)
6974

7075

7176
def run(pdb,
@@ -84,7 +89,7 @@ def run(pdb,
8489
test_run=True,
8590
fix_backbone=False,
8691
constraints="Default", # Default, None, HBonds
87-
solvent="tip4pew",
92+
solvent="implicit",
8893
strip_solvent=False,
8994
ionic_strength=0.1*molar,
9095
pressure=1*bar,
@@ -133,7 +138,8 @@ def run(pdb,
133138
None, HBonds. By default, HBonds will be constrained if the backbone
134139
is not fixed. A string.
135140
solvent - solvent to use. Possible values: None (no solvent), tip3p,
136-
tip4pew, spce. A string.
141+
tip4pew, spce, implicit (gbn model, equivalent to AMBER igb=8). A
142+
string.
137143
write_solvent: whether to write solvent atoms to the minimised
138144
structure file. Solvent will always be written to the trajectory. A
139145
bool.
@@ -195,7 +201,7 @@ def run(pdb,
195201
"running a solvated simulation instead.")
196202

197203
if not solvent:
198-
print("WARNING: No solvent")
204+
print("WARNING: No solvent. Minimised structures may be wrong!!!")
199205

200206
# read in mmcif or pdb
201207
if ".cif" in pdb or ".mmcif" in pdb:
@@ -209,7 +215,7 @@ def run(pdb,
209215

210216
forcefield = make_forcefield(forcefield, solvent)
211217

212-
if solvent:
218+
if solvent and solvent != "implicit":
213219
print("Solvating system...")
214220
modeller.addSolvent(forcefield, model=solvent,
215221
ionicStrength=ionic_strength,
@@ -231,7 +237,7 @@ def run(pdb,
231237

232238
# setup non bonded method - if none is chosen, select based on box
233239
if non_bonded_method == "Default":
234-
if solvent is None:
240+
if solvent == None or solvent == "implicit":
235241
non_bonded_method = CutoffNonPeriodic
236242
else:
237243
non_bonded_method = PME
@@ -282,6 +288,9 @@ def run(pdb,
282288
raise ValueError("Integrator must be one of: "
283289
"VariableLangevinIntegrator,"
284290
" LangevinMiddleIntegrator")
291+
292+
# TODO: create metadynamics object here
293+
285294
simulation = Simulation(modeller.topology, system, integrator)
286295
simulation.context.setPositions(modeller.positions)
287296

@@ -336,6 +345,8 @@ def run(pdb,
336345
" minimisation and test run were both skipped.")
337346

338347
if md_steps and traj_out:
348+
# todo: if metadynamics, add rmsd reporter here
349+
339350
if ".dcd" in traj_out.lower():
340351
simulation.reporters.append(DCDReporter(traj_out, step))
341352
elif ".xtc" in traj_out.lower():
@@ -350,6 +361,9 @@ def run(pdb,
350361
step, step=True,
351362
potentialEnergy=True,
352363
temperature=True))
364+
365+
# TODO: if metadynamics, do meta.step here
366+
353367
print("Running production MD...")
354368
simulation.step(md_steps)
355369
simulation.saveCheckpoint(checkpoint_output)
@@ -394,7 +408,7 @@ def entry_point():
394408
help="Fix the backbone", action="store_true")
395409
parser.add_argument("-c", "--constraints", help="Constraints to apply. Possible values: None, HBonds. Constraining HBonds can make simulations run faster, at the cost of some accuracy. You can't constrain HBonds if you're also fixing the backbone, for obvious reasons.", default="Default")
396410
parser.add_argument(
397-
"-solv", "--solvent", help="Solvent to use. Possible values: tip3p, tip4pew, spce. If no solvent is specified, none will be used.", default=None)
411+
"-solv", "--solvent", help="Solvent to use. Possible values: tip3p, tip4pew, spce, implicit (GBn model, equivalent to AMBER igb=8). If no solvent is specified, implicit will be used.", default="implicit")
398412
parser.add_argument("-ns", "--strip_solvent",
399413
help="Don't write solvent molecules to the minimised output file", action="store_true")
400414
parser.add_argument("-ion", "--ionic_strength",

pyproject.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@ requires-python = ">= 3.13.3"
55
authors = [{name = "Rob Welch", email = "robert.welch@stfc.ac.uk"}]
66
description = "Automatically prepare pdb/mmCif structures for molecular dynamics simulations"
77
dependencies = [
8-
"requests",
9-
"openmm==8.2.0",
10-
"pdbfixer==1.11",
11-
"gemmi==0.7.1",
12-
#"modeller==10.7",
13-
"biopython==1.85",
8+
# "requests",
9+
# "openmm==8.2.0",
10+
# "pdbfixer==1.11",
11+
#"gemmi==0.7.1",
12+
# "modeller==10.7",
13+
# "biopython==1.85",
1414
]
1515
readme = "README.md"
1616
license = "AGPL-3.0-or-later"
1717

1818
[project.scripts]
1919
prepmd = "prepmd.prep:entry_point"
2020
runmd = "prepmd.run:entry_point"
21+
prep-license = "prepmd.add_modeller_license:entry_point"

0 commit comments

Comments
 (0)