@@ -62,6 +62,23 @@ def __init__(self, decoy, ref, verbose=False):
6262 def __repr__ (self ):
6363 return f'{ self .__module__ } .{ self .__class__ .__name__ } ({ self .decoy } , { self .ref } , { self .verbose } )'
6464
65+ def check_residues (self ):
66+ """Check if the residue numbering matches."""
67+
68+ res_ref = pdb2sql (self .ref ).get_residues ()
69+ res_dec = pdb2sql (self .decoy ).get_residues ()
70+
71+ if res_ref != res_dec :
72+ print ('Residues are different in the reference and decoy' )
73+ print ('Residues found in %s and not in %s' %
74+ (self .ref , self .decoy ))
75+ print (set (res_ref ).difference (set (res_dec )))
76+ print ('Residues found in %s and not in %s' %
77+ (self .decoy , self .ref ))
78+ print (set (res_dec ).difference (set (res_ref )))
79+ raise ValueError (
80+ 'Residue numbering not identical in ref and decoy' )
81+
6582 ##########################################################################
6683 #
6784 # FAST ROUTINE TO COMPUTE THE L-RMSD
@@ -71,16 +88,14 @@ def __repr__(self):
7188 #
7289 ##########################################################################
7390
74- # TODO add output zone files
75-
7691 # compute the L-RMSD
77- def compute_lrmsd_fast (self , lzone = None , method = 'svd' , check = True ):
92+ def compute_lrmsd_fast (self , lzone = None , method = 'svd' , check = True , name = [ 'C' , 'CA' , 'N' , 'O' ] ):
7893 """Fast routine to compute the L-RMSD.
7994
8095 L-RMSD is computed by aligning the longest chain of the decoy to
8196 the one of the reference and computing the RMSD of the shortest
82- chain between decoy and reference. Both fitting and rms calculation
83- use only backbone atoms. See reference:
97+ chain between decoy and reference. By default, both fitting and
98+ rms calculation use only backbone atoms. See reference:
8499
85100 DockQ: A Quality Measure for Protein-Protein Docking Models
86101 https://doi.org/10.1371/journal.pone.0161879
@@ -92,6 +107,8 @@ def compute_lrmsd_fast(self, lzone=None, method='svd', check=True):
92107 'svd' or 'quaternion'.
93108 check (bool, optional): Check if the sequences are aligned
94109 and fix it if not. Defaults to True.
110+ name (list, optional): atom name to include in the zone.
111+ Defaults to ['C', 'CA', 'N', 'O']
95112
96113 Returns:
97114 float: L-RMSD value of the conformation
@@ -115,30 +132,39 @@ def compute_lrmsd_fast(self, lzone=None, method='svd', check=True):
115132 # 1. get_data_zone_backbone returns in_zone and not_in_zone
116133 # here the in_zone defines the zone for fitting,
117134 # and not_in_zone defines the zone for rms calculation.
118- # 2. the decoy and ref pdb must have consistent residue
119- # numbering, otherwise e.g. shifted numbering can also give
120- # results which is totally wrong, because the code here does
121- # not do sequence alignment.
135+
136+ self .check_residues ()
122137
123138 data_decoy_long , data_decoy_short = self .get_data_zone_backbone (
124- self .decoy , resData , return_not_in_zone = True )
139+ self .decoy , resData , return_not_in_zone = True , name = name )
140+
125141 data_ref_long , data_ref_short = self .get_data_zone_backbone (
126- self .ref , resData , return_not_in_zone = True )
142+ self .ref , resData , return_not_in_zone = True , name = name )
143+
144+ if data_decoy_long .symmetric_difference (data_ref_long ) != set ():
145+ raise ValueError (
146+ 'Issue in the calculation of the l-rmsd' )
127147
128148 atom_long = data_ref_long .intersection (data_decoy_long )
129149 xyz_decoy_long = self ._get_xyz (self .decoy , atom_long )
130150 xyz_ref_long = self ._get_xyz (self .ref , atom_long )
131151
152+ if data_decoy_short .symmetric_difference (data_ref_short ) != set ():
153+ raise ValueError (
154+ 'Issue in the calculation of the l-rmsd' )
155+
132156 atom_short = data_ref_short .intersection (data_decoy_short )
133157 xyz_decoy_short = self ._get_xyz (self .decoy , atom_short )
134158 xyz_ref_short = self ._get_xyz (self .ref , atom_short )
135159
136160 # extract the xyz
137161 else :
162+
138163 xyz_decoy_long , xyz_decoy_short = self .get_xyz_zone_backbone (
139- self .decoy , resData , return_not_in_zone = True )
164+ self .decoy , resData , return_not_in_zone = True , name = name )
165+
140166 xyz_ref_long , xyz_ref_short = self .get_xyz_zone_backbone (
141- self .ref , resData , return_not_in_zone = True )
167+ self .ref , resData , return_not_in_zone = True , name = name )
142168
143169 xyz_decoy_short = superpose_selection (
144170 xyz_decoy_short , xyz_decoy_long , xyz_ref_long , method )
@@ -218,12 +244,8 @@ def compute_lzone(self, save_file=True, filename=None):
218244 #
219245 ##########################################################################
220246
221- def compute_irmsd_fast (
222- self ,
223- izone = None ,
224- method = 'svd' ,
225- cutoff = 10 ,
226- check = True ):
247+ def compute_irmsd_fast (self , izone = None , method = 'svd' ,
248+ cutoff = 10 , check = True ):
227249 """Fast method to compute the i-rmsd.
228250
229251 i-RMSD is computed by selecting the backbone atoms of reference
@@ -254,16 +276,25 @@ def compute_irmsd_fast(
254276 # read the izone file
255277 if izone is None :
256278 resData = self .compute_izone (cutoff , save_file = False )
279+ elif not os .path .isfile (izone ):
280+ resData = self .compute_izone (
281+ cutoff , save_file = True , filename = izone )
257282 else :
258283 resData = self .read_zone (izone )
259284
260285 if check :
261286
287+ self .check_residues ()
288+
262289 data_decoy = self .get_data_zone_backbone (
263290 self .decoy , resData , return_not_in_zone = False )
264291 data_ref = self .get_data_zone_backbone (
265292 self .ref , resData , return_not_in_zone = False )
266293
294+ if data_ref .symmetric_difference (data_decoy ) != set ():
295+ raise ValueError (
296+ 'Issue in the calculation of the i-rmsd' )
297+
267298 atom_common = data_ref .intersection (data_decoy )
268299 xyz_contact_decoy = self ._get_xyz (self .decoy , atom_common )
269300 xyz_contact_ref = self ._get_xyz (self .ref , atom_common )
@@ -277,7 +308,8 @@ def compute_irmsd_fast(
277308
278309 # superpose the fragments
279310 xyz_contact_decoy = superpose_selection (xyz_contact_decoy ,
280- xyz_contact_decoy , xyz_contact_ref , method )
311+ xyz_contact_decoy ,
312+ xyz_contact_ref , method )
281313
282314 # return the RMSD
283315 return self .get_rmsd (xyz_contact_decoy , xyz_contact_ref )
@@ -473,7 +505,7 @@ def compute_residue_pairs_ref(
473505 ##########################################################################
474506
475507 # compute the L-RMSD
476- def compute_lrmsd_pdb2sql (self , exportpath = None , method = 'svd' ):
508+ def compute_lrmsd_pdb2sql (self , exportpath = None , method = 'svd' , ** kwargs ):
477509 """Slow routine to compute the L-RMSD.
478510
479511 L-RMSD is computed by aligning the longest chain of the decoy to
@@ -490,13 +522,25 @@ def compute_lrmsd_pdb2sql(self, exportpath=None, method='svd'):
490522 method (str, optional): Method to align the fragments,
491523 'svd' or 'quaternion'.
492524
525+ Kwargs: selection keywords used in the pdb2sql.get() method :
526+ 'rowID', 'serial', 'name', 'altLoc',
527+ 'resName', 'resSeq', 'iCode',
528+ 'x', 'y', 'z', 'occ', 'temp', 'element', 'model'
529+
530+
493531 Returns:
494532 float: L-RMSD value of the conformation
495533
496534 See also:
497535 :meth:`compute_lrmsd_fast`
498536 """
499537 backbone = ['CA' , 'C' , 'N' , 'O' ]
538+ if 'name' not in kwargs :
539+ kwargs ['name' ] = backbone
540+
541+ if 'chainID' in kwargs :
542+ raise ValueError (
543+ 'do not specify chainID in compute_lrmsd_pdb2sql' )
500544
501545 # create the sql
502546 sql_decoy = pdb2sql (self .decoy , sqlfile = 'decoy.db' )
@@ -515,24 +559,24 @@ def compute_lrmsd_pdb2sql(self, exportpath=None, method='svd'):
515559
516560 # extract the pos of chains A
517561 xyz_decoy_A = np .array (
518- sql_decoy .get ('x,y,z' , chainID = chain1 , name = backbone ))
562+ sql_decoy .get ('x,y,z' , chainID = chain1 , ** kwargs ))
519563 xyz_ref_A = np .array (sql_ref .get (
520- 'x,y,z' , chainID = chain1 , name = backbone ))
564+ 'x,y,z' , chainID = chain1 , ** kwargs ))
521565
522566 # extract the pos of chains B
523567 xyz_decoy_B = np .array (
524- sql_decoy .get ('x,y,z' , chainID = chain2 , name = backbone ))
568+ sql_decoy .get ('x,y,z' , chainID = chain2 , ** kwargs ))
525569 xyz_ref_B = np .array (sql_ref .get (
526- 'x,y,z' , chainID = chain2 , name = backbone ))
570+ 'x,y,z' , chainID = chain2 , ** kwargs ))
527571
528572 # check the lengthes
529573 if len (xyz_decoy_A ) != len (xyz_ref_A ):
530574 xyz_decoy_A , xyz_ref_A = self .get_identical_atoms (
531- sql_decoy , sql_ref , chain1 )
575+ sql_decoy , sql_ref , chain1 , ** kwargs )
532576
533577 if len (xyz_decoy_B ) != len (xyz_ref_B ):
534578 xyz_decoy_B , xyz_ref_B = self .get_identical_atoms (
535- sql_decoy , sql_ref , chain2 )
579+ sql_decoy , sql_ref , ** kwargs )
536580
537581 # detect which chain is the longest
538582 nA , nB = len (xyz_decoy_A ), len (xyz_decoy_B )
@@ -611,23 +655,28 @@ def compute_lrmsd_pdb2sql(self, exportpath=None, method='svd'):
611655 # RETURN THE ATOMS THAT ARE SHARED BY THE TWO DB
612656 # FOR A GIVEN CHAINID
613657 @staticmethod
614- def get_identical_atoms (db1 , db2 , chain ):
658+ def get_identical_atoms (db1 , db2 , chain , ** kwargs ):
615659 """Return that atoms shared by both databse for a specific chain.
616660
617661 Args:
618662 db1 (TYPE): pdb2sql database of the first conformation
619663 db2 (TYPE): pdb2sql database of the 2nd conformation
620664 chain (str): chain name
621665
666+ Kwargs: selection keywords used in the pdb2sql.get() method :
667+ 'rowID', 'serial', 'name', 'altLoc',
668+ 'resName', 'chainID', 'resSeq', 'iCode',
669+ 'x', 'y', 'z', 'occ', 'temp', 'element', 'model'
670+
622671 Returns:
623672 list, list: list of xyz for both database
624673 """
625- backbone = [ 'CA' , 'C' , 'N' , 'O' ]
674+
626675 # get data
627676 data1 = db1 .get ('chainID,resSeq,name' ,
628- chainID = chain , name = backbone )
677+ chainID = chain , ** kwargs )
629678 data2 = db2 .get ('chainID,resSeq,name' ,
630- chainID = chain , name = backbone )
679+ chainID = chain , ** kwargs )
631680
632681 # tuplify
633682 data1 = [tuple (d1 ) for d1 in data1 ]
@@ -922,7 +971,7 @@ def compute_fnat_pdb2sql(self, cutoff=5.0):
922971 #
923972 ##########################################################################
924973 @staticmethod
925- def get_xyz_zone_backbone (pdb_file , resData , return_not_in_zone = False ):
974+ def get_xyz_zone_backbone (pdb_file , resData , return_not_in_zone = False , name = [ 'C' , 'CA' , 'N' , 'O' ] ):
926975 """Get the xyz of zone backbone atoms.
927976
928977 Args:
@@ -950,18 +999,17 @@ def get_xyz_zone_backbone(pdb_file, resData, return_not_in_zone=False):
950999 chainID = line [72 ]
9511000
9521001 resSeq = int (line [22 :26 ])
953- name = line [12 :16 ].strip ()
1002+ atname = line [12 :16 ].strip ()
9541003
9551004 x = float (line [30 :38 ])
9561005 y = float (line [38 :46 ])
9571006 z = float (line [46 :54 ])
9581007
959- backbone = ['C' , 'CA' , 'N' , 'O' ]
960- if chainID in resData .keys ():
961- if resSeq in resData [chainID ] and name in backbone :
962- xyz_in_zone .append ([x , y , z ])
963- else :
964- if name in backbone :
1008+ if atname in name :
1009+ if chainID in resData .keys ():
1010+ if resSeq in resData [chainID ]:
1011+ xyz_in_zone .append ([x , y , z ])
1012+ else :
9651013 xyz_not_in_zone .append ([x , y , z ])
9661014
9671015 if return_not_in_zone :
@@ -971,7 +1019,7 @@ def get_xyz_zone_backbone(pdb_file, resData, return_not_in_zone=False):
9711019 return xyz_in_zone
9721020
9731021 @staticmethod
974- def get_data_zone_backbone (pdb_file , resData , return_not_in_zone = False ):
1022+ def get_data_zone_backbone (pdb_file , resData , return_not_in_zone = False , name = [ 'C' , 'CA' , 'N' , 'O' ] ):
9751023 """Get the data (chainID, resSeq, name) of backbone atoms in the zone.
9761024
9771025 Args:
@@ -999,19 +1047,14 @@ def get_data_zone_backbone(pdb_file, resData, return_not_in_zone=False):
9991047 chainID = line [72 ]
10001048
10011049 resSeq = int (line [22 :26 ])
1002- name = line [12 :16 ].strip ()
1003-
1004- backbone = ['C' , 'CA' , 'N' , 'O' ]
1005-
1006- if chainID in resData .keys ():
1007-
1008- if resSeq in resData [chainID ] and name in backbone :
1009- data_in_zone .append ((chainID , resSeq , name ))
1010-
1011- else :
1012- if name in backbone :
1013- data_not_in_zone .append (
1014- (chainID , resSeq , name ))
1050+ atname = line [12 :16 ].strip ()
1051+
1052+ if atname in name :
1053+ if chainID in resData .keys ():
1054+ if resSeq in resData [chainID ]:
1055+ data_in_zone .append ((chainID , resSeq , atname ))
1056+ else :
1057+ data_not_in_zone .append ((chainID , resSeq , atname ))
10151058
10161059 if return_not_in_zone :
10171060 return set (data_in_zone ), set (data_not_in_zone )
0 commit comments