PDB2PQR output option

Rob-Welch · Rob-Welch · commit 9e4dcc126fe5 · 2026-02-25T14:17:43.000Z
diff --git a/.gitignore b/.gitignore
@@ -14,7 +14,7 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+#lib/
 lib64/
 parts/
 sdist/
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 A utility to automatically prepare structures from the PDB for molecular dynamics simulation and perform minimisations and simple MD simulations.
 
 ## Features
-* [X] Automatically download structures, sequences and metadata from the PDB and UNIPROT
+* [X] Automatically download structures, sequences and metadata from the PDB, PDB-REDO, EMDB and UNIPROT
 * [X] Automatically fill missing loops with modeller
 * [X] Automatically add missing atoms and fix non-standard residues with pdbfixer
 * [X] Automatically resolve steric clashes and minimise structures
diff --git a/prepmd/pdb2pqr.py b/prepmd/pdb2pqr.py
@@ -6,13 +6,18 @@
 @author: rob
 """
 
-#import pdb2pqr
+import pdb2pqr
 
-# steps:
-    # run as normal but not including test simulation and fixing
-    # apply pdb2pqr before fixing
-    # then fix, and especially add missing atoms AND remove hetatms
-    # then run as normal
+def run_pdb2pqr(infile, outfile, ff="AMBER"):
+    """
+    Run PDB2PQR.
 
-#pdb2pqr.run_pdb2pqr("A")
-# example usage: UBQ.pdb 1UBQ.pqr --titration-state-method=propka --with-ph=7 --ff=CHARMM --ffout=CHARMM
+    Args:
+        infile - path to input file, a string
+        outfile - path to output PQR file, a string
+        ff - force field to use for calculations and residue naming
+    Returns:
+        nothing, but writes 'outfile'
+    """
+    pdb2pqr.run_pdb2pqr([infile, outfile, "--titration-state-method=propka",
+                        "--with-ph=7", "--ff="+ff, "--ffout="+ff, "--drop-water"])
diff --git a/prepmd/point_cloud.py b/prepmd/point_cloud.py
@@ -10,154 +10,26 @@
 from sklearn.neighbors import NearestNeighbors
 import numpy as np
 from prepmd.lib.icp import icp
-
-"""
-# via Oriol Manzano Duran
-def compute_cross_covariace(src_points: np.array, dst_points: np.array, centroid_src_points: np.array, centroid_dst_points: np.array):
-    '''
-    Compute the cross-covariance between two set of point clouds.
-    It will tell us how the coordinates of point in src_points change with changes in coodrinates of points belonging to dst_points
-    
-    Args:
-        src_points(np.array): The coordinates of the first point cloud to correlate.
-        dst_points(np.array): The coordinates of the second points to correlate with the first.
-        centroid_src_points(np.array): Center position of all points in first point cloud, equivalent to get the mean of all the points
-        centroid_dst_points(np.array): Center position of all points in second point cloud, equivalent to get the mean of all the points
-    
-    Return
-        (np.array)(src_points.shape[1]xsrc_points.shape[1]): The cross-covariance matrix between the points
-    '''
-    centered_src_points: np.array = src_points - centroid_src_points
-    centered_dst_points: np.array = dst_points - centroid_dst_points
-    cov: np.array = np.dot(centered_src_points.T, centered_dst_points)
-    return cov
-
-def calculate_best_transform(src_points: np.array, dst_points: np.array):
-    '''
-    Calculates the best transform that maps  points between two point clouds.
-    The cross-covariance matrix between the point clouds is calculated, then
-    rotations and translations are extracted using singular value decomposition.
-    
-    Args:
-      src_points(np.array): The coordinates of the first point cloud.
-      dst_points(np.array): The coordiantes of the second point cloud.
-
-    Returns:
-        (np.array)(src_points.shape[1]+1)x(src_points.shape[1]+1) homogeneous transformation matrix that maps src_points on to dst_points
-        (np.array)(src_points.shape[1]xsrc_points.shape[1]) rotation matrix
-        (np.array)(src_points.shape[1]x1) translation vector
-    '''
-    # translate points to their centroids
-    centroid_src_points = np.mean(src_points, axis=0)
-    centroid_dst_points = np.mean(dst_points, axis=0)
-    # compute covariance
-    cov = compute_cross_covariace(src_points, dst_points, centroid_src_points, centroid_dst_points)
-    # rotation matrix
-    U, S, Vt = np.linalg.svd(cov)
-    R = np.dot(Vt.T, U.T)
-    # get number of dimensions
-    m = src_points.shape[1]
-    # special reflection case
-    if np.linalg.det(R) < 0:
-        Vt[m-1,:] *= -1
-        R = np.dot(Vt.T, U.T)
-    # translation
-    t = centroid_dst_points.T - np.dot(R,centroid_src_points.T)
-    # homogeneous transformation
-    T = np.identity(m+1)
-    T[:m, :m] = R
-    T[:m, m] = t
-    return T, R, t
-
-def nearest_neighbor(src_points: np.array, dst_points: np.array):
-    '''
-    Find the nearest neighbor between the point clouds
-    
-    Args:
-      src_points(np.array): The coordinates of the first point cloud.
-      dst_points(np.array): The coordiantes of the second point cloud.
-
-    Return:
-        (np.ndarray): Distances from the src_points to the closest points in dst_points
-        (np.ndarray): Indices of the nearest points in the point clouds.
-    '''
-    neigh = NearestNeighbors(n_neighbors=1)
-    neigh.fit(dst_points)
-    neigh_dist, neigh_ind = neigh.kneighbors(src_points, return_distance=True)
-    return neigh_dist.ravel(), neigh_ind.ravel()
-
-def icp(src_points: np.array, dst_points: np.array, init_pose: np.array=None, max_iterations: int=20, tolerance: float=0.001):
-    '''
-    Finds best-fit transform that maps points in src_points on to points in dst_points
-    using Iterative Closest Point method.
-
-    Input:
-        src_points(np.array): The coordinates of the first point cloud.
-        dst_points(np.array): The coordiantes of the second point cloud.
-        init_pose(np.array):  homogeneous transformation
-        max_iterations(int): exit algorithm after max_iterations
-        tolerance(float): convergence criteria
-   
-    Return:
-        (np.array)(src_points.shape[1]+1)x(src_points.shape[1]+1) homogeneous transformation matrix that maps src_points on to dst_points
-        Euclidean distances (errors) of the nearest neighbor
-        number of iterations to converge
-    '''
-    # get number of dimensions
-    m = src_points.shape[1]
-    # make points homogeneous, copy them to maintain the originals
-    src = np.ones((m+1, src_points.shape[0]))
-    dst = np.ones((m+1, dst_points.shape[0]))
-    src[:m,:] = np.copy(src_points.T)
-    dst[:m,:] = np.copy(dst_points.T)
-    # apply the initial pose estimation
-    if init_pose is not None:
-        src = np.dot(init_pose, src)
-    prev_error = 0
-    for i in range(max_iterations):
-        # find the nearest neighbors between the current source and destination points
-        distances, indices = nearest_neighbor(src[:m,:].T, dst[:m,:].T)
-        # compute the transformation between the current source and nearest destination points
-        T,_,_ = calculate_best_transform(src[:m,:].T, dst[:m,indices].T)
-        # update the current source
-        src = np.dot(T, src)
-        # check error
-        mean_error = np.mean(distances)
-        if np.abs(prev_error - mean_error) < tolerance:
-            break
-        prev_error = mean_error
-    # calculate final transformation
-    T,_,_ = calculate_best_transform(src_points, src[:m,:].T)
-    return T, distances, i
-
-def icp_pos(src_points, dst_points, init_pose=None, max_iterations: int=20, tolerance: float=0.001):
-    '''
-    Translate position of src_points to match best-fit transform that maps points in src_points on to points in dst_points
-    using Iterative Closest Point method.
-
-     Input:
-        src_points(np.array): The coordinates of the first point cloud.
-        dst_points(np.array): The coordiantes of the second point cloud.
-        init_pose(np.array):  homogeneous transformation
-        max_iterations(int): exit algorithm after max_iterations
-        tolerance(float): convergence criteria
-   
-    Return:
-        (np.array)(src_points.shape): Translated src_points to match best-fit transform to points dst_points
-    '''
-    m = src_points.shape[1]
-    T, distances, i = icp(src_points, dst_points, init_pose, max_iterations, tolerance)
-    src = np.ones((m+1, src_points.shape[0]))
-    src[:m,:] = np.copy(src_points.T)
-    src = np.dot(T, src)
-    return src[:m,:].T
-"""
 import mrcfile
 import numpy as np
 import MDAnalysis as mda
 
 
 def to_point_cloud(mrcdata, voxel, contour_level):
+    """
+    Convert an EM density map to a point cloud.
+    Args:
+        mrcdata - ndarray of size resolutionXresolutionXresolution
+        containg MRC data.
+        voxel - voxel size, from the mrcfile library, should contain three
+        member variables, x, y, and z (floats), for the voxel size in those
+        dimensions.
+        contour_level: density above which to add a point to the point cloud,
+        a float.
+    Returns:
+        point cloud as an ndarray with three columns (x, y, z) and a row for
+        each point.
+    """
     point_cloud = []
     for x in range(len(mrcdata)):
         for y in range(len(mrcdata[x])):
@@ -167,7 +39,18 @@ def to_point_cloud(mrcdata, voxel, contour_level):
     return np.array(point_cloud)
 
 def score_pdb_map(pdb, em_map, contour_level):
-
+    """
+    For a given pdb and em_map, convert them to point clouds and score their
+    similarity based on the error in an alignment between two point clouds.
+    Args:
+        pdb - path to a pdb file, a string
+        em_map - path to an an em map file for the same structure as that pdb,
+        a string.
+        contour_level: density above which to add a point to the point cloud,
+        a float.
+    Returns:
+        the error, a float
+    """
     with mrcfile.open(em_map) as mrc:
         vsize = mrc.voxel_size
         point_cloud = to_point_cloud(mrc.data, mrc.voxel_size, 0.01)
diff --git a/prepmd/prep.py b/prepmd/prep.py
@@ -19,6 +19,7 @@
 from prepmd import run
 from prepmd import fix
 from prepmd import model
+from prepmd import pdb2pqr as pqr
 
 parser = argparse.ArgumentParser(prog="prepmd",
                                  description="Get structures from the PDB ready for "
@@ -50,16 +51,19 @@
                     " an external fasta file", action="store_true")
 parser.add_argument("-m", "--leavemissing",
                     help="Don't restore missing atoms", action="store_true")
-parser.add_argument("-p", "--pqrfmt",
+parser.add_argument("-p", "--pqr",
+                    help="PQR output filename. If set, will output a PQR "
+                    "as well as a pdb.")
+parser.add_argument("-ff", "--pqrfmt",
                     help="Force field to use for creating the PQR file. Can be"
                     "AMBER or LAMMPS. Will only output a PQR file if this is "
-                    "set.")
+                    "set.", default="AMBER")
 parser.add_argument("-r", "--redo", help="Get PDB from PDB-REDO",
                     action="store_true")
 parser.add_argument("-n", "--num",
                     help="Number of models to create with MODELLER. The best "
                     "model will be selected based on the MODELLER objective "
-                    "function.", type=int)
+                    "function.", type=int, default=1)
 parser.add_argument("-em", "--em_map",
                     help="If multiple models are being generated, the best "
                     "model will be selected based on agreement with this "
@@ -83,8 +87,8 @@ def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
 def prep(code, outmodel, workingdir, folder=None, fastafile=None, inmodel=None,
          alignmentout="alignment_out.fasta", download_format="mmCif",
          quiet=False, fix_after=True, download_sequence=False,
-         fix_missing_atoms=True, write_metadata="prepmeta.json", pqrff=None,
-         redo=False, num_models=1, em_map=None, em_contour=None):
+         fix_missing_atoms=True, write_metadata="prepmeta.json", pqrff="AMBER",
+         pqr_out=None, redo=False, num_models=1, em_map=None,em_contour=None):
     """
     Prepare a PDB/MMCIF structure file for simulation.
     Args:
@@ -109,8 +113,9 @@ def prep(code, outmodel, workingdir, folder=None, fastafile=None, inmodel=None,
         UNIPROT sequence is normally very different from the pdb sequence, so
         the alignment might fail. a boolean
         fix_missing_atoms: whether to add missing atoms with pdbfixer. A bool
-        pqrff: if this is set, will output a PQR file created with this force
-        field. A string, can be AMBER or CHARMM
+        pqrff: A string, can be AMBER or CHARMM. Force field to use for the
+        PQR creation. Note: when I tested, CHARMM was buggy.
+        pqr_out: output PQR filename, a string.
         redo: if True, will download PDB from PDB-REDO instead of the regular
         PDB.
         num_models: number of models to generate, an int. If >1, the best model
@@ -251,16 +256,32 @@ def in_string(substr, text): return text == None or substr in text.lower()
     #    fix.restore_metadata_pdb(inmodel, outmodel)
     #if ".cif" in inmodel or ".mmcif" in inmodel:
     #    print("Metadata restoration not implemented for mmCif (yet)")
+    
+    if pqr_out:
+        print("Creating PQR...")
+        pqr.run_pdb2pqr(outmodel, pqr_out, ff=pqrff)        
 
     print("Simulating "+code)
-    run.test_sim(outmodel)
+    if pqr_out:
+        #run.test_sim(pqr_out)
+        run.run(pqr_out, minimised_structure_out=None, md_steps=None,
+            integrator_str="LangevinMiddleIntegrator",
+            pressure=None, test_sim_steps=250)
+    else:
+        run.test_sim(outmodel)
     print("Done.")
     
     with open(write_metadata, "w") as file:
         file.write(locals_json)
 
-    if not os.path.isabs(outmodel):
+    if not os.path.isabs(outmodel) and not pqr_out:
         shutil.copyfile(outmodel, run_dir+os.path.sep+outmodel)
+        
+    if not os.path.isabs(outmodel) and pqr_out:
+        shutil.copyfile(pqr_out, run_dir+os.path.sep+pqr_out)
+        print("Wrote final PQR file to "+pqr_out+".")
+    
+    if not os.path.isabs(outmodel):
         shutil.copyfile(write_metadata, run_dir+os.path.sep+write_metadata)
 
 
@@ -275,8 +296,9 @@ def entry_point():
          alignmentout=args.alignmentout, download_format=args.dlformat,
          quiet=args.quiet, fix_after=fix_after,
          download_sequence=args.download, fix_missing_atoms=args.leavemissing,
-         pqrff=args.pqrfmt, redo=args.redo, num_models=args.num,
-         em_map = args.em_map, em_contour=args.contour)
+         pqrff=args.pqrfmt, pqr_out=args.pqr,
+         redo=args.redo, num_models=args.num, em_map = args.em_map,
+         em_contour=args.contour)
 
 
 if __name__ == "__main__":
diff --git a/prepmd/run.py b/prepmd/run.py
@@ -35,7 +35,7 @@
     "amoeba,None": ['amoeba2018.xml'],
     "amber19,None": ['amber19-all.xml'],
     "amber19,implicit": ['amber19-all.xml', 'implicit/gbn2.xml'],
-    "amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml'],
+    "amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml',],
     "charmm36,implicit": ['charmm36.xml', 'implicit/gbn2.xml'],
     "amoeba,implicit": ['amoeba2018.xml', 'amoeba2009_gk.xml'],  
 }
diff --git a/test/test_all.py b/test/test_all.py
@@ -43,6 +43,15 @@ def test_bestpdb(self, tmp_path):
              str(path)+os.path.sep+"testout"+os.path.sep+code+"_test",
              download_format="pdb",
              num_models=2, em_map="22281", em_contour=0.01)
+        
+    def test_pqr(self, tmp_path):
+        path = str(tmp_path)
+        code = "1UBQ"
+        prep(code,
+             str(path)+os.path.sep+code+"."+"pdb",
+             str(path)+os.path.sep+"testout"+os.path.sep+code+"_test",
+             download_format="pdb",
+             pqr_out=str(path)+os.path.sep+code+"."+"pqr",)
 
 # removed: 6TY4, 6XOV, 9I3U, 8RTO (too slow!)
 

-Original file line number
+Diff line change
 downloads/
 eggs/
 .eggs/
 -lib/
 +#lib/
 lib64/
 parts/
 sdist/
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@`
`35`	`35`	`"amoeba,None": ['amoeba2018.xml'],`
`36`	`36`	`"amber19,None": ['amber19-all.xml'],`
`37`	`37`	`"amber19,implicit": ['amber19-all.xml', 'implicit/gbn2.xml'],`
`38`		`- "amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml'],`
	`38`	`+ "amber14,implicit": ['amber14-all.xml', 'implicit/gbn2.xml',],`
`39`	`39`	`"charmm36,implicit": ['charmm36.xml', 'implicit/gbn2.xml'],`
`40`	`40`	`"amoeba,implicit": ['amoeba2018.xml', 'amoeba2009_gk.xml'],`
`41`	`41`	`}`