Skip to content

Commit d4b2989

Browse files
committed
check residue numbering
1 parent 11f309e commit d4b2989

1 file changed

Lines changed: 49 additions & 15 deletions

File tree

pdb2sql/StructureSimilarity.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,23 @@ def __init__(self, decoy, ref, verbose=False):
6262
def __repr__(self):
6363
return f'{self.__module__}.{self.__class__.__name__}({self.decoy}, {self.ref}, {self.verbose})'
6464

65+
def check_residues(self):
66+
"""Check if the residue numbering matches."""
67+
68+
res_ref = pdb2sql(self.ref).get_residues()
69+
res_dec = pdb2sql(self.decoy).get_residues()
70+
71+
if res_ref != res_dec:
72+
print('Residues are different in the reference and decoy')
73+
print('Residues found in %s and not in %s' %
74+
(self.ref, self.decoy))
75+
print(set(res_ref).difference(set(res_dec)))
76+
print('Residues found in %s and not in %s' %
77+
(self.decoy, self.ref))
78+
print(set(res_dec).difference(set(res_ref)))
79+
raise ValueError(
80+
'Residue numbering not identical in ref and decoy')
81+
6582
##########################################################################
6683
#
6784
# FAST ROUTINE TO COMPUTE THE L-RMSD
@@ -71,8 +88,6 @@ def __repr__(self):
7188
#
7289
##########################################################################
7390

74-
# TODO add output zone files
75-
7691
# compute the L-RMSD
7792
def compute_lrmsd_fast(self, lzone=None, method='svd', check=True, name=['C', 'CA', 'N', 'O']):
7893
"""Fast routine to compute the L-RMSD.
@@ -122,23 +137,36 @@ def compute_lrmsd_fast(self, lzone=None, method='svd', check=True, name=['C', 'C
122137
# results which is totally wrong, because the code here does
123138
# not do sequence alignment.
124139

140+
self.check_residues()
141+
125142
data_decoy_long, data_decoy_short = self.get_data_zone_backbone(
126143
self.decoy, resData, return_not_in_zone=True, name=name)
144+
127145
data_ref_long, data_ref_short = self.get_data_zone_backbone(
128146
self.ref, resData, return_not_in_zone=True, name=name)
129147

148+
if data_decoy_long.symmetric_difference(data_ref_long) != set():
149+
raise ValueError(
150+
'Issue in the calculation of the l-rmsd')
151+
130152
atom_long = data_ref_long.intersection(data_decoy_long)
131153
xyz_decoy_long = self._get_xyz(self.decoy, atom_long)
132154
xyz_ref_long = self._get_xyz(self.ref, atom_long)
133155

156+
if data_decoy_short.symmetric_difference(data_ref_short) != set():
157+
raise ValueError(
158+
'Issue in the calculation of the l-rmsd')
159+
134160
atom_short = data_ref_short.intersection(data_decoy_short)
135161
xyz_decoy_short = self._get_xyz(self.decoy, atom_short)
136162
xyz_ref_short = self._get_xyz(self.ref, atom_short)
137163

138164
# extract the xyz
139165
else:
166+
140167
xyz_decoy_long, xyz_decoy_short = self.get_xyz_zone_backbone(
141168
self.decoy, resData, return_not_in_zone=True, name=name)
169+
142170
xyz_ref_long, xyz_ref_short = self.get_xyz_zone_backbone(
143171
self.ref, resData, return_not_in_zone=True, name=name)
144172

@@ -959,6 +987,7 @@ def get_xyz_zone_backbone(pdb_file, resData, return_not_in_zone=False, name=['C'
959987
# get the xyz of the
960988
xyz_in_zone = []
961989
xyz_not_in_zone = []
990+
print(resData)
962991

963992
for line in data:
964993
if line.startswith('ATOM'):
@@ -973,14 +1002,17 @@ def get_xyz_zone_backbone(pdb_file, resData, return_not_in_zone=False, name=['C'
9731002
y = float(line[38:46])
9741003
z = float(line[46:54])
9751004

976-
if chainID in resData.keys():
977-
if resSeq in resData[chainID] and atname in name:
1005+
if chainID in resData.keys() and atname in name:
1006+
1007+
if resSeq in resData[chainID]:
9781008
xyz_in_zone.append([x, y, z])
979-
elif resSeq not in resData[chainID] and atname in name:
1009+
1010+
elif resSeq not in resData[chainID]:
9801011
xyz_not_in_zone.append([x, y, z])
1012+
9811013
else:
982-
if atname in backbone:
983-
xyz_not_in_zone.append([x, y, z])
1014+
xyz_not_in_zone.append([x, y, z])
1015+
print('not', chainID, resSeq, atname, x, y, z)
9841016

9851017
if return_not_in_zone:
9861018
return xyz_in_zone, xyz_not_in_zone
@@ -1019,17 +1051,19 @@ def get_data_zone_backbone(pdb_file, resData, return_not_in_zone=False, name=['C
10191051
resSeq = int(line[22:26])
10201052
atname = line[12:16].strip()
10211053

1022-
if chainID in resData.keys():
1054+
if atname in name:
10231055

1024-
if resSeq in resData[chainID] and atname in name:
1025-
data_in_zone.append((chainID, resSeq, atname))
1056+
if chainID in resData.keys():
10261057

1027-
elif resSeq not in resData[chainID] and atname in name:
1028-
data_not_in_zone.append(
1029-
(chainID, resSeq, atname))
1058+
if resSeq in resData[chainID]:
1059+
data_in_zone.append(
1060+
(chainID, resSeq, atname))
10301061

1031-
else:
1032-
if atname in name:
1062+
elif resSeq not in resData[chainID]:
1063+
data_not_in_zone.append(
1064+
(chainID, resSeq, atname))
1065+
1066+
else:
10331067
data_not_in_zone.append(
10341068
(chainID, resSeq, atname))
10351069

0 commit comments

Comments
 (0)