Skip to content

Commit 37d55c2

Browse files
authored
Merge pull request #19 from padix-key/infer-elements
Infer missing elements for consistency with Biotite
2 parents bfd4283 + 3c55415 commit 37d55c2

2 files changed

Lines changed: 53 additions & 25 deletions

File tree

python-src/fastpdb/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ def get_structure(self, model=None, altloc="first", extra_fields=None, include_b
137137
element = np.frombuffer(element, dtype="U2")
138138
altloc_id = np.frombuffer(altloc_id, dtype="U1")
139139

140+
# Replace empty strings for elements with guessed types
141+
# This is used e.g. for PDB files created by Gromacs
142+
empty_element_mask = element == ""
143+
if empty_element_mask.any():
144+
warnings.warn(
145+
f"{np.count_nonzero(empty_element_mask)} elements "
146+
"were guessed from atom name"
147+
)
148+
element[empty_element_mask] = struc.infer_elements(
149+
atom_name[empty_element_mask]
150+
)
151+
140152
if coord.ndim == 3:
141153
atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
142154
atoms.coord = coord

tests/test_fastpdb.py

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,22 @@
88
"""
99

1010
import itertools
11-
import glob
1211
from io import StringIO
13-
from os.path import join, dirname, realpath
12+
from pathlib import Path
1413
import pytest
1514
import biotite
1615
import numpy as np
1716
import biotite.structure.io.pdb as pdb
1817
import fastpdb as fastpdb
1918

20-
DATA_PATH = join(dirname(realpath(__file__)), "data")
21-
TEST_STRUCTURES = glob.glob(join(DATA_PATH, "*.pdb"))
19+
DATA_PATH = Path(__file__).parent / "data"
20+
TEST_STRUCTURES = list(DATA_PATH.glob("*.pdb"))
2221

2322

2423
def test_get_remark():
25-
ref_file = pdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
26-
27-
test_file = fastpdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
24+
ref_file = pdb.PDBFile.read(DATA_PATH / "1aki.pdb")
25+
26+
test_file = fastpdb.PDBFile.read(DATA_PATH / "1aki.pdb")
2827

2928
for remark in np.arange(0, 1000):
3029
assert test_file.get_remark(remark) == ref_file.get_remark(remark)
@@ -35,9 +34,9 @@ def test_get_remark():
3534
)
3635
def test_get_model_count(path):
3736
ref_file = pdb.PDBFile.read(path)
38-
37+
3938
test_file = fastpdb.PDBFile.read(path)
40-
39+
4140

4241
assert ref_file.get_model_count() == test_file.get_model_count()
4342

@@ -60,7 +59,7 @@ def test_get_coord(path, model):
6059
return
6160
else:
6261
raise
63-
62+
6463
test_file = fastpdb.PDBFile.read(path)
6564
test_coord = test_file.get_coord(model)
6665

@@ -83,8 +82,8 @@ def test_get_structure(path, model, altloc, extra_fields, include_bonds):
8382
extra_fields = ["atom_id", "b_factor", "occupancy", "charge"]
8483
else:
8584
extra_fields = None
86-
87-
85+
86+
8887
ref_file = pdb.PDBFile.read(path)
8988
try:
9089
ref_atoms = ref_file.get_structure(
@@ -98,28 +97,27 @@ def test_get_structure(path, model, altloc, extra_fields, include_bonds):
9897
else:
9998
raise
10099

101-
102100
test_file = fastpdb.PDBFile.read(path)
103101
test_atoms = test_file.get_structure(
104102
model, altloc, extra_fields, include_bonds
105103
)
106104

107-
105+
108106
if ref_atoms.box is not None:
109107
assert np.allclose(test_atoms.box, ref_atoms.box)
110108
else:
111109
assert test_atoms.box is None
112-
110+
113111
assert test_atoms.bonds == ref_atoms.bonds
114-
112+
115113
for category in ref_atoms.get_annotation_categories():
116114
if np.issubdtype(ref_atoms.get_annotation(category).dtype, float):
117115
assert test_atoms.get_annotation(category).tolist() \
118116
== pytest.approx(ref_atoms.get_annotation(category).tolist())
119117
else:
120118
assert test_atoms.get_annotation(category).tolist() \
121119
== ref_atoms.get_annotation(category).tolist()
122-
120+
123121
assert np.allclose(test_atoms.coord, ref_atoms.coord)
124122

125123

@@ -138,8 +136,7 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
138136
extra_fields = ["atom_id", "b_factor", "occupancy", "charge"]
139137
else:
140138
extra_fields = None
141-
142-
139+
143140
input_file = pdb.PDBFile.read(path)
144141
try:
145142
atoms = input_file.get_structure(
@@ -153,7 +150,6 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
153150
else:
154151
raise
155152

156-
157153
ref_file = pdb.PDBFile()
158154
ref_file.set_structure(atoms)
159155
ref_file_content = StringIO()
@@ -164,7 +160,6 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
164160
test_file_content = StringIO()
165161
test_file.write(test_file_content)
166162

167-
168163
assert test_file_content.getvalue() == ref_file_content.getvalue()
169164

170165

@@ -174,8 +169,29 @@ def test_get_assembly():
174169
as `get_assembly()` is not explicitly implemented in
175170
`fastpdb.PDBFile`.
176171
"""
177-
ref_file = pdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
178-
179-
test_file = fastpdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
172+
ref_file = pdb.PDBFile.read(DATA_PATH / "1aki.pdb")
173+
174+
test_file = fastpdb.PDBFile.read(DATA_PATH / "1aki.pdb")
175+
176+
assert test_file.get_assembly() == ref_file.get_assembly()
177+
178+
179+
@pytest.mark.filterwarnings("ignore")
180+
def test_inferred_elements(tmp_path):
181+
# Read valid pdb file
182+
pdb_file = fastpdb.PDBFile.read(DATA_PATH / "1l2y.pdb")
183+
atoms = pdb_file.get_structure()
184+
# Remove all elements
185+
atoms_wo_elements = atoms.copy()
186+
atoms_wo_elements.element[:] = ''
187+
# Save stack without elements to file
188+
temp = tmp_path / "tmp.pdb"
189+
tmp_pdb_file = pdb.PDBFile()
190+
tmp_pdb_file.set_structure(atoms_wo_elements)
191+
tmp_pdb_file.write(temp)
192+
193+
# Read new stack from file with guessed elements
194+
guessed_pdb_file = fastpdb.PDBFile.read(temp)
195+
atoms_guessed_elements = guessed_pdb_file.get_structure()
180196

181-
assert test_file.get_assembly() == ref_file.get_assembly()
197+
assert atoms_guessed_elements.element.tolist() == atoms.element.tolist()

0 commit comments

Comments
 (0)