Skip to content

Commit 6c5adb9

Browse files
authored
Merge pull request #111 from apdavison/cache-fix
Invalidate KGClient.cache after writes to prevent stale-data bugs
2 parents bdc8d5c + a31ad93 commit 6c5adb9

4 files changed

Lines changed: 130 additions & 3 deletions

File tree

fairgraph/client.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,14 @@ def update_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocum
528528
extended_response_configuration=default_response_configuration,
529529
)
530530
error_context = f"update_instance(data={data}, instance_id={instance_id})"
531-
return self._check_response(response, error_context=error_context).data
531+
response_data = self._check_response(response, error_context=error_context).data
532+
# The cached document for this URI is now stale. Drop it so the next
533+
# fetch goes to the KG. We deliberately do not refresh the cache from
534+
# the response, because depending on the server's `returnPayload`
535+
# default the response may be a minimal document (e.g. just `@id`)
536+
# rather than the full updated instance.
537+
self.cache.pop(self.uri_from_uuid(instance_id), None)
538+
return response_data
532539

533540
def replace_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocument:
534541
"""
@@ -548,7 +555,10 @@ def replace_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocu
548555
extended_response_configuration=default_response_configuration,
549556
)
550557
error_context = f"replace_instance(data={data}, instance_id={instance_id})"
551-
return self._check_response(response, error_context=error_context).data
558+
response_data = self._check_response(response, error_context=error_context).data
559+
# See update_instance for rationale: invalidate, don't refresh.
560+
self.cache.pop(self.uri_from_uuid(instance_id), None)
561+
return response_data
552562

553563
def delete_instance(self, instance_id: str, ignore_not_found: bool = True, ignore_errors: bool = True):
554564
"""
@@ -563,6 +573,8 @@ def delete_instance(self, instance_id: str, ignore_not_found: bool = True, ignor
563573
if response: # error
564574
if not ignore_errors:
565575
raise Exception(response.message)
576+
else:
577+
self.cache.pop(self.uri_from_uuid(instance_id), None)
566578
return response
567579

568580
def uri_from_uuid(self, uuid: str) -> str:

fairgraph/kgobject.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,6 @@ def save(
756756
try:
757757
assert self.uuid is not None
758758
client.replace_instance(self.uuid, local_data)
759-
# what does this return? Can we use it to update `remote_data`?
760759
except AuthorizationError as err:
761760
if ignore_auth_errors:
762761
logger.error(str(err))

test/test_client.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,115 @@ def test_delete_instance(kg_client, mocker):
234234
fake_id = "00000000-0000-0000-0000-000000000000"
235235
response = kg_client.delete_instance(fake_id)
236236
kg_client._kg_client.instances.delete.assert_called_once_with(fake_id)
237+
238+
239+
@pytest.fixture
240+
def offline_kg_client(mocker):
241+
"""A KGClient that can be constructed without network access, for testing
242+
behaviour that doesn't require a real KG. The underlying kg-core SDK methods
243+
must be patched per-test."""
244+
from fairgraph.client import KGClient
245+
246+
client = KGClient(token="fake-token", allow_interactive=False)
247+
# Skip the feature-detection fetch that the `migrated` property triggers.
248+
client._migrated = True
249+
# `instance_from_full_uri` uses this to build the cache key after writes
250+
mocker.patch.object(
251+
client._kg_client.instances._kg_config,
252+
"id_namespace",
253+
"https://kg.ebrains.eu/api/instances/",
254+
create=True,
255+
)
256+
return client
257+
258+
259+
class TestCacheInvalidationOnWrite:
260+
"""Regression tests for the bug where writes left stale entries in
261+
`client.cache`, causing subsequent `from_id(use_cache=True)` calls to
262+
return out-of-date data and `save()` to no-op on what looked like a
263+
legitimate modification. See issue #110."""
264+
265+
uuid = "00000000-0000-0000-0000-000000000000"
266+
uri = "https://kg.ebrains.eu/api/instances/00000000-0000-0000-0000-000000000000"
267+
268+
def test_update_instance_invalidates_cache(self, offline_kg_client, mocker):
269+
offline_kg_client.cache[self.uri] = {"@id": self.uri, "stale": True}
270+
mocker.patch.object(
271+
offline_kg_client._kg_client.instances,
272+
"contribute_to_partial_replacement",
273+
lambda **kw: MockKGResponse({"@id": self.uri}),
274+
)
275+
offline_kg_client.update_instance(self.uuid, {"some": "patch"})
276+
assert self.uri not in offline_kg_client.cache
277+
278+
def test_replace_instance_invalidates_cache(self, offline_kg_client, mocker):
279+
offline_kg_client.cache[self.uri] = {"@id": self.uri, "stale": True}
280+
mocker.patch.object(
281+
offline_kg_client._kg_client.instances,
282+
"contribute_to_full_replacement",
283+
lambda **kw: MockKGResponse({"@id": self.uri}),
284+
)
285+
offline_kg_client.replace_instance(self.uuid, {"some": "data"})
286+
assert self.uri not in offline_kg_client.cache
287+
288+
def test_delete_instance_invalidates_cache(self, offline_kg_client, mocker):
289+
offline_kg_client.cache[self.uri] = {"@id": self.uri}
290+
mocker.patch.object(offline_kg_client._kg_client.instances, "delete", return_value=None)
291+
offline_kg_client.delete_instance(self.uuid)
292+
assert self.uri not in offline_kg_client.cache
293+
294+
def test_unlink_after_refetch_sends_patch(self, offline_kg_client, mocker):
295+
"""End-to-end: this is the user-visible bug. Load a DatasetVersion,
296+
link a subject, save; re-load it via `from_id`, set the link back to
297+
`None`, save again — the second save must PATCH studiedSpecimen=None,
298+
not be a silent no-op."""
299+
from fairgraph.openminds.core import DatasetVersion, Subject
300+
301+
sub_uri = "https://kg.ebrains.eu/api/instances/00000000-0000-0000-0000-000000000abc"
302+
studied_specimen_path = "https://openminds.om-i.org/props/studiedSpecimen"
303+
# Server-side state of the DSV, mutated by each PATCH so subsequent
304+
# `instance_from_full_uri` calls see fresh data.
305+
server_state = {
306+
"@id": self.uri,
307+
"@type": ["https://openminds.om-i.org/types/DatasetVersion"],
308+
"http://schema.org/identifier": [self.uri],
309+
"https://core.kg.ebrains.eu/vocab/meta/space": "myspace",
310+
}
311+
312+
def get_by_id(stage, instance_id, extended_response_configuration):
313+
return MockKGResponse(dict(server_state))
314+
315+
def contribute_to_partial_replacement(instance_id, payload, extended_response_configuration):
316+
for key, value in payload.items():
317+
if value is None:
318+
server_state.pop(key, None)
319+
else:
320+
server_state[key] = value
321+
return MockKGResponse(dict(server_state))
322+
323+
mocker.patch.object(offline_kg_client._kg_client.instances, "get_by_id", get_by_id)
324+
mocker.patch.object(
325+
offline_kg_client._kg_client.instances,
326+
"contribute_to_partial_replacement",
327+
contribute_to_partial_replacement,
328+
)
329+
330+
# 1. Load fresh, link a subject, save.
331+
dsv = DatasetVersion.from_id(self.uuid, offline_kg_client, scope="any")
332+
dsv.studied_specimens = [Subject(id=sub_uri)]
333+
dsv.save(offline_kg_client, space="myspace", recursive=False)
334+
assert studied_specimen_path in server_state, "first save should have linked the subject"
335+
336+
# 2. Re-fetch via from_id. Before the fix, this would have returned
337+
# stale cached data with no studiedSpecimen.
338+
dsv2 = DatasetVersion.from_id(self.uuid, offline_kg_client, scope="any")
339+
assert dsv2.studied_specimens is not None, (
340+
"re-fetched object must see the link added by the prior save"
341+
)
342+
343+
# 3. Unlink and save. The PATCH must clear studiedSpecimen on the server.
344+
dsv2.studied_specimens = None
345+
dsv2.save(offline_kg_client, space="myspace", recursive=False)
346+
assert studied_specimen_path not in server_state, (
347+
"second save should have sent a PATCH that cleared studiedSpecimen"
348+
)

test/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class MockKGClient:
6363

6464
def __init__(self):
6565
self.instances = {}
66+
self.cache = {}
6667

6768
def retrieve_query(self, query_label):
6869
return {"@id": f"mock-query-{query_label}"}
@@ -175,6 +176,9 @@ def replace_instance(self, instance_id, data):
175176
assert instance_id is not None
176177
assert data is not None
177178

179+
def uri_from_uuid(self, uuid):
180+
return f"https://kg.ebrains.eu/api/instances/{uuid}"
181+
178182

179183
@pytest.fixture
180184
def mock_client():

0 commit comments

Comments
 (0)