Skip to content

Commit 9e4dcc1

Browse files
committed
PDB2PQR output option
1 parent 35e2cc9 commit 9e4dcc1

7 files changed

Lines changed: 84 additions & 165 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dist/
1414
downloads/
1515
eggs/
1616
.eggs/
17-
lib/
17+
#lib/
1818
lib64/
1919
parts/
2020
sdist/

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
A utility to automatically prepare structures from the PDB for molecular dynamics simulation and perform minimisations and simple MD simulations.
66

77
## Features
8-
* [X] Automatically download structures, sequences and metadata from the PDB and UNIPROT
8+
* [X] Automatically download structures, sequences and metadata from the PDB, PDB-REDO, EMDB and UNIPROT
99
* [X] Automatically fill missing loops with modeller
1010
* [X] Automatically add missing atoms and fix non-standard residues with pdbfixer
1111
* [X] Automatically resolve steric clashes and minimise structures

prepmd/pdb2pqr.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
@author: rob
77
"""
88

9-
#import pdb2pqr
9+
import pdb2pqr
1010

11-
# steps:
12-
# run as normal but not including test simulation and fixing
13-
# apply pdb2pqr before fixing
14-
# then fix, and especially add missing atoms AND remove hetatms
15-
# then run as normal
11+
def run_pdb2pqr(infile, outfile, ff="AMBER"):
12+
"""
13+
Run PDB2PQR.
1614
17-
#pdb2pqr.run_pdb2pqr("A")
18-
# example usage: UBQ.pdb 1UBQ.pqr --titration-state-method=propka --with-ph=7 --ff=CHARMM --ffout=CHARMM
15+
Args:
16+
infile - path to input file, a string
17+
outfile - path to output PQR file, a string
18+
ff - force field to use for calculations and residue naming
19+
Returns:
20+
nothing, but writes 'outfile'
21+
"""
22+
pdb2pqr.run_pdb2pqr([infile, outfile, "--titration-state-method=propka",
23+
"--with-ph=7", "--ff="+ff, "--ffout="+ff, "--drop-water"])

prepmd/point_cloud.py

Lines changed: 26 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -10,154 +10,26 @@
1010
from sklearn.neighbors import NearestNeighbors
1111
import numpy as np
1212
from prepmd.lib.icp import icp
13-
14-
"""
15-
# via Oriol Manzano Duran
16-
def compute_cross_covariace(src_points: np.array, dst_points: np.array, centroid_src_points: np.array, centroid_dst_points: np.array):
17-
'''
18-
Compute the cross-covariance between two set of point clouds.
19-
It will tell us how the coordinates of point in src_points change with changes in coodrinates of points belonging to dst_points
20-
21-
Args:
22-
src_points(np.array): The coordinates of the first point cloud to correlate.
23-
dst_points(np.array): The coordinates of the second points to correlate with the first.
24-
centroid_src_points(np.array): Center position of all points in first point cloud, equivalent to get the mean of all the points
25-
centroid_dst_points(np.array): Center position of all points in second point cloud, equivalent to get the mean of all the points
26-
27-
Return
28-
(np.array)(src_points.shape[1]xsrc_points.shape[1]): The cross-covariance matrix between the points
29-
'''
30-
centered_src_points: np.array = src_points - centroid_src_points
31-
centered_dst_points: np.array = dst_points - centroid_dst_points
32-
cov: np.array = np.dot(centered_src_points.T, centered_dst_points)
33-
return cov
34-
35-
def calculate_best_transform(src_points: np.array, dst_points: np.array):
36-
'''
37-
Calculates the best transform that maps points between two point clouds.
38-
The cross-covariance matrix between the point clouds is calculated, then
39-
rotations and translations are extracted using singular value decomposition.
40-
41-
Args:
42-
src_points(np.array): The coordinates of the first point cloud.
43-
dst_points(np.array): The coordiantes of the second point cloud.
44-
45-
Returns:
46-
(np.array)(src_points.shape[1]+1)x(src_points.shape[1]+1) homogeneous transformation matrix that maps src_points on to dst_points
47-
(np.array)(src_points.shape[1]xsrc_points.shape[1]) rotation matrix
48-
(np.array)(src_points.shape[1]x1) translation vector
49-
'''
50-
# translate points to their centroids
51-
centroid_src_points = np.mean(src_points, axis=0)
52-
centroid_dst_points = np.mean(dst_points, axis=0)
53-
# compute covariance
54-
cov = compute_cross_covariace(src_points, dst_points, centroid_src_points, centroid_dst_points)
55-
# rotation matrix
56-
U, S, Vt = np.linalg.svd(cov)
57-
R = np.dot(Vt.T, U.T)
58-
# get number of dimensions
59-
m = src_points.shape[1]
60-
# special reflection case
61-
if np.linalg.det(R) < 0:
62-
Vt[m-1,:] *= -1
63-
R = np.dot(Vt.T, U.T)
64-
# translation
65-
t = centroid_dst_points.T - np.dot(R,centroid_src_points.T)
66-
# homogeneous transformation
67-
T = np.identity(m+1)
68-
T[:m, :m] = R
69-
T[:m, m] = t
70-
return T, R, t
71-
72-
def nearest_neighbor(src_points: np.array, dst_points: np.array):
73-
'''
74-
Find the nearest neighbor between the point clouds
75-
76-
Args:
77-
src_points(np.array): The coordinates of the first point cloud.
78-
dst_points(np.array): The coordiantes of the second point cloud.
79-
80-
Return:
81-
(np.ndarray): Distances from the src_points to the closest points in dst_points
82-
(np.ndarray): Indices of the nearest points in the point clouds.
83-
'''
84-
neigh = NearestNeighbors(n_neighbors=1)
85-
neigh.fit(dst_points)
86-
neigh_dist, neigh_ind = neigh.kneighbors(src_points, return_distance=True)
87-
return neigh_dist.ravel(), neigh_ind.ravel()
88-
89-
def icp(src_points: np.array, dst_points: np.array, init_pose: np.array=None, max_iterations: int=20, tolerance: float=0.001):
90-
'''
91-
Finds best-fit transform that maps points in src_points on to points in dst_points
92-
using Iterative Closest Point method.
93-
94-
Input:
95-
src_points(np.array): The coordinates of the first point cloud.
96-
dst_points(np.array): The coordiantes of the second point cloud.
97-
init_pose(np.array): homogeneous transformation
98-
max_iterations(int): exit algorithm after max_iterations
99-
tolerance(float): convergence criteria
100-
101-
Return:
102-
(np.array)(src_points.shape[1]+1)x(src_points.shape[1]+1) homogeneous transformation matrix that maps src_points on to dst_points
103-
Euclidean distances (errors) of the nearest neighbor
104-
number of iterations to converge
105-
'''
106-
# get number of dimensions
107-
m = src_points.shape[1]
108-
# make points homogeneous, copy them to maintain the originals
109-
src = np.ones((m+1, src_points.shape[0]))
110-
dst = np.ones((m+1, dst_points.shape[0]))
111-
src[:m,:] = np.copy(src_points.T)
112-
dst[:m,:] = np.copy(dst_points.T)
113-
# apply the initial pose estimation
114-
if init_pose is not None:
115-
src = np.dot(init_pose, src)
116-
prev_error = 0
117-
for i in range(max_iterations):
118-
# find the nearest neighbors between the current source and destination points
119-
distances, indices = nearest_neighbor(src[:m,:].T, dst[:m,:].T)
120-
# compute the transformation between the current source and nearest destination points
121-
T,_,_ = calculate_best_transform(src[:m,:].T, dst[:m,indices].T)
122-
# update the current source
123-
src = np.dot(T, src)
124-
# check error
125-
mean_error = np.mean(distances)
126-
if np.abs(prev_error - mean_error) < tolerance:
127-
break
128-
prev_error = mean_error
129-
# calculate final transformation
130-
T,_,_ = calculate_best_transform(src_points, src[:m,:].T)
131-
return T, distances, i
132-
133-
def icp_pos(src_points, dst_points, init_pose=None, max_iterations: int=20, tolerance: float=0.001):
134-
'''
135-
Translate position of src_points to match best-fit transform that maps points in src_points on to points in dst_points
136-
using Iterative Closest Point method.
137-
138-
Input:
139-
src_points(np.array): The coordinates of the first point cloud.
140-
dst_points(np.array): The coordiantes of the second point cloud.
141-
init_pose(np.array): homogeneous transformation
142-
max_iterations(int): exit algorithm after max_iterations
143-
tolerance(float): convergence criteria
144-
145-
Return:
146-
(np.array)(src_points.shape): Translated src_points to match best-fit transform to points dst_points
147-
'''
148-
m = src_points.shape[1]
149-
T, distances, i = icp(src_points, dst_points, init_pose, max_iterations, tolerance)
150-
src = np.ones((m+1, src_points.shape[0]))
151-
src[:m,:] = np.copy(src_points.T)
152-
src = np.dot(T, src)
153-
return src[:m,:].T
154-
"""
15513
import mrcfile
15614
import numpy as np
15715
import MDAnalysis as mda
15816

15917

16018
def to_point_cloud(mrcdata, voxel, contour_level):
19+
"""
20+
Convert an EM density map to a point cloud.
21+
Args:
22+
mrcdata - ndarray of size resolutionXresolutionXresolution
23+
containg MRC data.
24+
voxel - voxel size, from the mrcfile library, should contain three
25+
member variables, x, y, and z (floats), for the voxel size in those
26+
dimensions.
27+
contour_level: density above which to add a point to the point cloud,
28+
a float.
29+
Returns:
30+
point cloud as an ndarray with three columns (x, y, z) and a row for
31+
each point.
32+
"""
16133
point_cloud = []
16234
for x in range(len(mrcdata)):
16335
for y in range(len(mrcdata[x])):
@@ -167,7 +39,18 @@ def to_point_cloud(mrcdata, voxel, contour_level):
16739
return np.array(point_cloud)
16840

16941
def score_pdb_map(pdb, em_map, contour_level):
170-
42+
"""
43+
For a given pdb and em_map, convert them to point clouds and score their
44+
similarity based on the error in an alignment between two point clouds.
45+
Args:
46+
pdb - path to a pdb file, a string
47+
em_map - path to an an em map file for the same structure as that pdb,
48+
a string.
49+
contour_level: density above which to add a point to the point cloud,
50+
a float.
51+
Returns:
52+
the error, a float
53+
"""
17154
with mrcfile.open(em_map) as mrc:
17255
vsize = mrc.voxel_size
17356
point_cloud = to_point_cloud(mrc.data, mrc.voxel_size, 0.01)

prepmd/prep.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from prepmd import run
2020
from prepmd import fix
2121
from prepmd import model
22+
from prepmd import pdb2pqr as pqr
2223

2324
parser = argparse.ArgumentParser(prog="prepmd",
2425
description="Get structures from the PDB ready for "
@@ -50,16 +51,19 @@
5051
" an external fasta file", action="store_true")
5152
parser.add_argument("-m", "--leavemissing",
5253
help="Don't restore missing atoms", action="store_true")
53-
parser.add_argument("-p", "--pqrfmt",
54+
parser.add_argument("-p", "--pqr",
55+
help="PQR output filename. If set, will output a PQR "
56+
"as well as a pdb.")
57+
parser.add_argument("-ff", "--pqrfmt",
5458
help="Force field to use for creating the PQR file. Can be"
5559
"AMBER or LAMMPS. Will only output a PQR file if this is "
56-
"set.")
60+
"set.", default="AMBER")
5761
parser.add_argument("-r", "--redo", help="Get PDB from PDB-REDO",
5862
action="store_true")
5963
parser.add_argument("-n", "--num",
6064
help="Number of models to create with MODELLER. The best "
6165
"model will be selected based on the MODELLER objective "
62-
"function.", type=int)
66+
"function.", type=int, default=1)
6367
parser.add_argument("-em", "--em_map",
6468
help="If multiple models are being generated, the best "
6569
"model will be selected based on agreement with this "
@@ -83,8 +87,8 @@ def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
8387
def prep(code, outmodel, workingdir, folder=None, fastafile=None, inmodel=None,
8488
alignmentout="alignment_out.fasta", download_format="mmCif",
8589
quiet=False, fix_after=True, download_sequence=False,
86-
fix_missing_atoms=True, write_metadata="prepmeta.json", pqrff=None,
87-
redo=False, num_models=1, em_map=None, em_contour=None):
90+
fix_missing_atoms=True, write_metadata="prepmeta.json", pqrff="AMBER",
91+
pqr_out=None, redo=False, num_models=1, em_map=None,em_contour=None):
8892
"""
8993
Prepare a PDB/MMCIF structure file for simulation.
9094
Args:
@@ -109,8 +113,9 @@ def prep(code, outmodel, workingdir, folder=None, fastafile=None, inmodel=None,
109113
UNIPROT sequence is normally very different from the pdb sequence, so
110114
the alignment might fail. a boolean
111115
fix_missing_atoms: whether to add missing atoms with pdbfixer. A bool
112-
pqrff: if this is set, will output a PQR file created with this force
113-
field. A string, can be AMBER or CHARMM
116+
pqrff: A string, can be AMBER or CHARMM. Force field to use for the
117+
PQR creation. Note: when I tested, CHARMM was buggy.
118+
pqr_out: output PQR filename, a string.
114119
redo: if True, will download PDB from PDB-REDO instead of the regular
115120
PDB.
116121
num_models: number of models to generate, an int. If >1, the best model
@@ -251,16 +256,32 @@ def in_string(substr, text): return text == None or substr in text.lower()
251256
# fix.restore_metadata_pdb(inmodel, outmodel)
252257
#if ".cif" in inmodel or ".mmcif" in inmodel:
253258
# print("Metadata restoration not implemented for mmCif (yet)")
259+
260+
if pqr_out:
261+
print("Creating PQR...")
262+
pqr.run_pdb2pqr(outmodel, pqr_out, ff=pqrff)
254263

255264
print("Simulating "+code)
256-
run.test_sim(outmodel)
265+
if pqr_out:
266+
#run.test_sim(pqr_out)
267+
run.run(pqr_out, minimised_structure_out=None, md_steps=None,
268+
integrator_str="LangevinMiddleIntegrator",
269+
pressure=None, test_sim_steps=250)
270+
else:
271+
run.test_sim(outmodel)
257272
print("Done.")
258273

259274
with open(write_metadata, "w") as file:
260275
file.write(locals_json)
261276

262-
if not os.path.isabs(outmodel):
277+
if not os.path.isabs(outmodel) and not pqr_out:
263278
shutil.copyfile(outmodel, run_dir+os.path.sep+outmodel)
279+
280+
if not os.path.isabs(outmodel) and pqr_out:
281+
shutil.copyfile(pqr_out, run_dir+os.path.sep+pqr_out)
282+
print("Wrote final PQR file to "+pqr_out+".")
283+
284+
if not os.path.isabs(outmodel):
264285
shutil.copyfile(write_metadata, run_dir+os.path.sep+write_metadata)
265286

266287

@@ -275,8 +296,9 @@ def entry_point():
275296
alignmentout=args.alignmentout, download_format=args.dlformat,
276297
quiet=args.quiet, fix_after=fix_after,
277298
download_sequence=args.download, fix_missing_atoms=args.leavemissing,
278-
pqrff=args.pqrfmt, redo=args.redo, num_models=args.num,
279-
em_map = args.em_map, em_contour=args.contour)
299+
pqrff=args.pqrfmt, pqr_out=args.pqr,
300+
redo=args.redo, num_models=args.num, em_map = args.em_map,
301+
em_contour=args.contour)
280302

281303

282304
if __name__ == "__main__":

prepmd/run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
"amoeba,None": ['amoeba2018.xml'],
3636
"amber19,None": ['amber19-all.xml'],
3737
"amber19,implicit": ['amber19-all.xml', 'implicit/gbn2.xml'],
38-
"amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml'],
38+
"amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml',],
3939
"charmm36,implicit": ['charmm36.xml', 'implicit/gbn2.xml'],
4040
"amoeba,implicit": ['amoeba2018.xml', 'amoeba2009_gk.xml'],
4141
}

test/test_all.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ def test_bestpdb(self, tmp_path):
4343
str(path)+os.path.sep+"testout"+os.path.sep+code+"_test",
4444
download_format="pdb",
4545
num_models=2, em_map="22281", em_contour=0.01)
46+
47+
def test_pqr(self, tmp_path):
48+
path = str(tmp_path)
49+
code = "1UBQ"
50+
prep(code,
51+
str(path)+os.path.sep+code+"."+"pdb",
52+
str(path)+os.path.sep+"testout"+os.path.sep+code+"_test",
53+
download_format="pdb",
54+
pqr_out=str(path)+os.path.sep+code+"."+"pqr",)
4655

4756
# removed: 6TY4, 6XOV, 9I3U, 8RTO (too slow!)
4857

0 commit comments

Comments
 (0)