Skip to content

Commit 07b2291

Browse files
authored
Properly create assemblies and species (#172)
Fix assemblies and species creation in the DLite parse strategy. Pass in config as a normal dict in test. Extend dlite parse strategy test: It now checks the content of the dlite instances in the collection matches what comes from the OPTIMADE response.
1 parent 2e2b8a4 commit 07b2291

3 files changed

Lines changed: 193 additions & 37 deletions

File tree

oteapi_optimade/dlite/entities/OPTIMADEStructureSpecies.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ meta: http://onto-ns.com/meta/0.3/EntitySchema
33
description: Species can represent pure chemical elements, virtual-crystal atoms representing a statistical occupation of a given site by multiple chemical elements, and/or a location to which there are attached atoms, i.e., atoms whose precise location are unknown beyond that they are attached to that position (frequently used to indicate hydrogen atoms attached to another element, e.g., a carbon with three attached hydrogens might represent a methyl group, -CH3).
44
dimensions:
55
nelements: Number of chemical elements composing this species.
6-
nattached_elements: Number of checmical symbols for the elements attached to this species.
6+
nattached_elements: Number of chemical symbols for the elements attached to this species.
77
properties:
88
name:
99
type: string

oteapi_optimade/dlite/parse.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -210,31 +210,61 @@ def get(
210210

211211
# DLite-fy OPTIMADE structures
212212
for structure in structures:
213-
new_structure_attributes = {}
213+
new_structure_attributes: dict[str, "Any"] = {}
214214

215215
# Most inner layer: assemblies & species
216216
if structure.attributes.assemblies:
217-
dimensions = {
218-
"ngroups": len(structure.attributes.assemblies),
219-
"nsites": max(len(_) for _ in structure.attributes.assemblies),
220-
}
221-
new_structure_attributes["assemblies"] = OPTIMADEStructureAssembly(
222-
dimensions=dimensions, properties=structure.attributes.assemblies
223-
)
217+
# Non-zero length list of assemblies (which could be a list of dicts or
218+
# a list of pydantic models)
219+
220+
new_structure_attributes["assemblies"] = []
221+
222+
for assembly in structure.attributes.assemblies:
223+
# Ensure we're dealing with a normal Python dict
224+
assembly = (
225+
assembly.dict(exclude_none=True)
226+
if isinstance(assembly, BaseModel)
227+
else assembly
228+
)
229+
230+
dimensions = {
231+
"ngroups": len(assembly.get("group_probabilities", []) or []),
232+
"nsites": len(assembly.get("sites_in_groups", []) or []),
233+
}
234+
new_structure_attributes["assemblies"].append(
235+
OPTIMADEStructureAssembly(
236+
dimensions=dimensions, properties=assembly
237+
)
238+
)
239+
224240
if structure.attributes.species:
225-
dimensions = {
226-
"nelements": structure.attributes.nelements,
227-
"nattached_elements": max(
228-
_.nattached or 0 for _ in structure.attributes.species
229-
),
230-
}
231-
new_structure_attributes["species"] = [
232-
OPTIMADEStructureSpecies(
233-
dimensions=dimensions,
234-
properties=species,
241+
# Non-zero length list of species (which could be a list of dicts or a
242+
# list of pydantic models)
243+
244+
new_structure_attributes["species"] = []
245+
246+
for species_individual in structure.attributes.species:
247+
# Ensure we're dealing with a normal Python dict
248+
species_individual = (
249+
species_individual.dict(exclude_none=True)
250+
if isinstance(species_individual, BaseModel)
251+
else species_individual
252+
)
253+
254+
dimensions = {
255+
"nelements": len(
256+
species_individual.get("chemical_symbols", []) or []
257+
),
258+
"nattached_elements": len(
259+
species_individual.get("attached", []) or []
260+
),
261+
}
262+
new_structure_attributes["species"].append(
263+
OPTIMADEStructureSpecies(
264+
dimensions=dimensions,
265+
properties=species_individual,
266+
)
235267
)
236-
for species in structure.attributes.species
237-
]
238268

239269
# Attributes
240270
new_structure_attributes.update(

tests/dlite/test_parse.py

Lines changed: 142 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,46 +5,51 @@
55

66
if TYPE_CHECKING:
77
from pathlib import Path
8+
from typing import Any
9+
10+
from dlite import Instance
811

912

1013
@pytest.mark.parametrize("return_object", [True, False])
1114
def test_parse(static_files: "Path", return_object: bool) -> None:
1215
"""Test parsing."""
16+
from datetime import datetime
17+
from enum import Enum
1318
import json
1419

20+
from numpy import ndarray
21+
from optimade.adapters import Structure
1522
from oteapi.datacache import DataCache
1623
from oteapi_dlite.utils import get_collection
1724

1825
from oteapi_optimade.dlite.parse import OPTIMADEDLiteParseStrategy
19-
from oteapi_optimade.models import OPTIMADEDLiteParseConfig
2026
from oteapi_optimade.models.custom_types import OPTIMADEUrl
2127

2228
url = OPTIMADEUrl(
2329
"https://example.org/some/base/v0.1/optimade/v1/structures"
2430
'?filter=elements HAS ALL "Si","O"&sort=nelements&page_limit=2'
2531
)
26-
config = OPTIMADEDLiteParseConfig(
27-
**{
28-
"mediaType": "application/vnd.OPTIMADE+DLite",
29-
"downloadUrl": url,
30-
"configuration": {
31-
"datacache_config": {
32-
"expireTime": 60 * 60 * 24,
33-
"tag": "optimade",
34-
"accessKey": url,
35-
},
36-
"return_object": return_object,
32+
config = {
33+
"mediaType": "application/vnd.OPTIMADE+DLite",
34+
"downloadUrl": url,
35+
"configuration": {
36+
"datacache_config": {
37+
"expireTime": 60 * 60 * 24,
38+
"tag": "optimade",
39+
"accessKey": url,
3740
},
38-
}
39-
)
41+
"return_object": return_object,
42+
},
43+
}
4044

41-
cache = DataCache(config.configuration.datacache_config)
45+
cache = DataCache(config["configuration"]["datacache_config"])
4246
sample_file = static_files / "optimade_response.json"
47+
response_json: dict[str, "Any"] = json.loads(sample_file.read_bytes())
4348
cache.add(
4449
{
4550
"status_code": 200,
4651
"ok": True,
47-
"json": json.loads(sample_file.read_bytes()),
52+
"json": response_json,
4853
}
4954
)
5055

@@ -53,3 +58,124 @@ def test_parse(static_files: "Path", return_object: bool) -> None:
5358

5459
dlite_collection = get_collection(session)
5560
assert dlite_collection
61+
62+
assert len(list(dlite_collection.get_labels())) == len(response_json["data"])
63+
64+
for structure in response_json["data"]:
65+
optimade_structure = Structure(structure)
66+
dlite_collection_labels = list(dlite_collection.get_labels())
67+
68+
# The structure `id` is used as the label for the instance in the DLite
69+
# collection
70+
assert optimade_structure.id in dlite_collection_labels
71+
72+
dlite_structure: "Instance" = dlite_collection[optimade_structure.id]
73+
74+
## Go over other top-level non-container keys in the OPTIMADE structure
75+
assert dlite_structure.type == optimade_structure.type
76+
77+
## attributes
78+
79+
# Avoid attributes with special model values for now
80+
model_values = ("assemblies", "species")
81+
82+
for field in optimade_structure.attributes.__fields__:
83+
if field in model_values:
84+
continue
85+
86+
expected_value = getattr(optimade_structure.attributes, field)
87+
88+
# Ensure expected values come in the right format
89+
if expected_value and isinstance(expected_value, list):
90+
# We expect that all internal types in the list are the same
91+
if isinstance(expected_value[0], Enum):
92+
expected_value = [value.value for value in expected_value]
93+
if isinstance(expected_value[0], datetime):
94+
expected_value = [
95+
value.isoformat(sep=" ") for value in expected_value
96+
]
97+
98+
if isinstance(expected_value, Enum):
99+
expected_value = expected_value.value
100+
if isinstance(expected_value, datetime):
101+
expected_value = expected_value.isoformat(sep=" ")
102+
103+
dlite_value = getattr(dlite_structure.attributes, field)
104+
105+
# Convert NumPy NDArrays to lists
106+
if isinstance(dlite_value, ndarray):
107+
dlite_value = dlite_value.tolist()
108+
# Convert string "None" values to actual Python None values
109+
if isinstance(dlite_value, str) and dlite_value == "None":
110+
dlite_value = None
111+
112+
assert dlite_value == expected_value, f"Field: {field}"
113+
114+
# Special attributes
115+
for field in model_values:
116+
# The model value fields are lists of models
117+
expected_value = getattr(optimade_structure.attributes, field)
118+
if expected_value is None:
119+
assert getattr(dlite_structure.attributes, field) is None
120+
continue
121+
122+
assert isinstance(expected_value, list)
123+
assert isinstance(getattr(dlite_structure.attributes, field), ndarray)
124+
125+
for i, entry in enumerate(getattr(dlite_structure.attributes, field)):
126+
for sub_field in getattr(optimade_structure.attributes, field)[
127+
0
128+
].__fields__:
129+
expected_sub_value = getattr(expected_value[i], sub_field)
130+
dlite_sub_value = getattr(entry, sub_field)
131+
132+
# Ensure expected values come in the right format
133+
if expected_sub_value and isinstance(expected_sub_value, list):
134+
# We expect that all internal types in the list are the same
135+
if isinstance(expected_sub_value[0], Enum):
136+
expected_sub_value = [
137+
value.value for value in expected_sub_value
138+
]
139+
if isinstance(expected_sub_value[0], datetime):
140+
expected_sub_value = [
141+
value.isoformat(sep=" ") for value in expected_sub_value
142+
]
143+
144+
if isinstance(expected_sub_value, Enum):
145+
expected_sub_value = expected_sub_value.value
146+
if isinstance(expected_sub_value, datetime):
147+
expected_sub_value = expected_sub_value.isoformat(sep=" ")
148+
149+
# Convert NumPy NDArrays to lists
150+
if isinstance(dlite_sub_value, ndarray):
151+
dlite_sub_value = dlite_sub_value.tolist()
152+
# Convert string "None" values to actual Python None values
153+
if isinstance(dlite_sub_value, str) and dlite_sub_value == "None":
154+
dlite_sub_value = None
155+
156+
# If an optional field is not present in the data, it will be None.
157+
# In DLite it will always be instantiated with the default values.
158+
# This is an issue with a shaped properties.
159+
# Here we update the expected_sub_value accordingly, with respect
160+
# to specific known optional sub-fields.
161+
162+
# species.mass
163+
if field == "species" and sub_field == "mass":
164+
if expected_sub_value is None:
165+
expected_sub_value = [0.0] * len(
166+
expected_value[i].chemical_symbols
167+
)
168+
169+
# species.attached
170+
if field == "species" and sub_field == "attached":
171+
if expected_sub_value is None:
172+
expected_sub_value = []
173+
174+
# species.nattached
175+
if field == "species" and sub_field == "nattached":
176+
if expected_sub_value is None:
177+
expected_sub_value = []
178+
179+
assert dlite_sub_value == (
180+
expected_sub_value or []
181+
), f"Field: {field}, sub-field: {sub_field}"

0 commit comments

Comments
 (0)