Skip to content

Commit b9b6eef

Browse files
committed
Address review round 2: self-heal corrupt cache entries; comment accuracy
- get_cached_result: explicitly catch decode/JSON errors from a corrupt or truncated gz: entry, purge it, and return None so it repopulates on the next call (was swallowed by the outer handler -> permanent miss). (Copilot) - _decode_cache_field: reword the comment so it no longer implies the function itself purges (the caller does), and fix "un-caught" -> "uncaught". (Copilot) - test: a corrupt gz: payload decodes to the raw string without raising.
1 parent 268a2d8 commit b9b6eef

2 files changed

Lines changed: 20 additions & 5 deletions

File tree

src/vfbquery/solr_result_cache.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,10 @@ def _decode_cache_field(cached_field) -> str:
6868
blob = base64.b64decode(cached_field[len(_CACHE_GZIP_PREFIX):])
6969
return gzip.decompress(blob).decode("utf-8")
7070
except Exception:
71-
# Corrupt/truncated gz payload: return the raw string so callers'
72-
# json.loads fails and the entry is treated as invalid (and purged),
73-
# rather than raising an un-caught error that aborts cleanup/stats.
71+
# Corrupt/truncated gz payload: return the raw string rather than
72+
# raising an uncaught error that would abort cleanup/stats runs. The
73+
# caller's json.loads then fails, so the entry is treated as invalid
74+
# JSON (get_cached_result purges it; other callers skip it).
7475
logger.warning("Failed to decode compressed cache payload; treating as invalid")
7576
return cached_field
7677
return cached_field
@@ -309,8 +310,15 @@ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional
309310
if isinstance(cached_field, list):
310311
cached_field = cached_field[0]
311312

312-
# Parse the cached metadata and result
313-
cached_data = json.loads(_decode_cache_field(cached_field))
313+
# Parse the cached metadata and result. A corrupt/undecodable entry
314+
# (e.g. a truncated gz: payload) raises here; purge it so the next
315+
# call repopulates, rather than leaving a permanent cache miss.
316+
try:
317+
cached_data = json.loads(_decode_cache_field(cached_field))
318+
except (ValueError, TypeError):
319+
logger.warning(f"Corrupt cache entry for {query_type}({term_id}); clearing it")
320+
self._clear_expired_cache_document(cache_doc_id)
321+
return None
314322

315323
# Check package version before anything else so stale cache is rejected early.
316324
# Only invalidate when the cached entry is OLDER than the current code

tests/test_gzip_cache.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ def test_roundtrip_compresses_and_restores():
1313
assert _decode_cache_field(enc) == env
1414

1515

16+
def test_decode_returns_raw_string_on_corrupt_gz_payload():
17+
# A gz:-prefixed but undecodable value must not raise; it returns the raw
18+
# string so the caller's json.loads fails and the entry is treated as invalid.
19+
for bad in (_CACHE_GZIP_PREFIX + "!!!not-base64!!!", _CACHE_GZIP_PREFIX + "AAAA"):
20+
assert _decode_cache_field(bad) == bad
21+
22+
1623
def test_decode_handles_legacy_plain_json_and_list_shape():
1724
legacy = json.dumps({"result": 1})
1825
assert _decode_cache_field(legacy) == legacy

0 commit comments

Comments
 (0)