Skip to content

Commit 268a2d8

Browse files
committed
Address PR review: defensive cache decode, stronger cap test, drop manual version bump
- _decode_cache_field: catch base64/gzip/unicode errors on corrupt gz: payloads and return the raw string, so callers treat it as invalid JSON (and purge it) instead of aborting cleanup/stats runs. (Copilot) - test: prove the cap is enforced on the COMPRESSED payload (raw > cap, compressed < cap) with a small cap and a highly compressible result. (Copilot) - revert manual _version.py bump (1.22.0 -> 1.21.0); the release workflow owns the version, and a manual mismatch forces cache recomputes. Legacy plain-JSON entries are still read transparently, so no invalidation is needed. (Clare)
1 parent 695561c commit 268a2d8

3 files changed

Lines changed: 23 additions & 11 deletions

File tree

src/vfbquery/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
cache's version stamp can never drift apart.
77
"""
88

9-
__version__ = "1.22.0"
9+
__version__ = "1.21.0"

src/vfbquery/solr_result_cache.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,15 @@ def _decode_cache_field(cached_field) -> str:
6464
if isinstance(cached_field, list):
6565
cached_field = cached_field[0] if cached_field else ""
6666
if isinstance(cached_field, str) and cached_field.startswith(_CACHE_GZIP_PREFIX):
67-
blob = base64.b64decode(cached_field[len(_CACHE_GZIP_PREFIX):])
68-
return gzip.decompress(blob).decode("utf-8")
67+
try:
68+
blob = base64.b64decode(cached_field[len(_CACHE_GZIP_PREFIX):])
69+
return gzip.decompress(blob).decode("utf-8")
70+
except Exception:
71+
# Corrupt/truncated gz payload: return the raw string so callers'
72+
# json.loads fails and the entry is treated as invalid (and purged),
73+
# rather than raising an un-caught error that aborts cleanup/stats.
74+
logger.warning("Failed to decode compressed cache payload; treating as invalid")
75+
return cached_field
6976
return cached_field
7077

7178

tests/test_gzip_cache.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,19 @@ def test_decode_handles_legacy_plain_json_and_list_shape():
2121
assert _decode_cache_field([enc]) == legacy
2222

2323

24-
def test_cap_is_on_compressed_size():
25-
c = SolrResultCache(max_result_size_mb=100)
26-
assert c.max_result_size_bytes == 100 * 1024 * 1024
27-
big = {"result": {"rows": [{"id": i, "name": "n"} for i in range(300000)]},
28-
"cached_at": "2026-01-01T00:00:00+00:00",
29-
"expires_at": "2026-04-01T00:00:00+00:00", "result_size": 0}
30-
enc = _encode_cache_field(json.dumps(big))
31-
assert len(enc.encode("utf-8")) < c.max_result_size_bytes
24+
def test_cap_is_enforced_on_compressed_not_raw_size():
25+
# Small cap + a highly compressible payload: the RAW JSON must exceed the cap
26+
# while the gzip+base64 form stays under it, proving the cap is on the stored
27+
# (compressed) size, not the raw size. Kept fast/memory-light via repetition.
28+
cap_mb = 1
29+
c = SolrResultCache(max_result_size_mb=cap_mb)
30+
cap = cap_mb * 1024 * 1024
31+
assert c.max_result_size_bytes == cap
32+
payload = json.dumps({"result": {"rows": ["x" * 100] * 50000}}) # ~5 MB raw, compresses hard
33+
raw = len(payload.encode("utf-8"))
34+
compressed = len(_encode_cache_field(payload).encode("utf-8"))
35+
assert raw > cap, f"raw {raw} should exceed cap {cap}"
36+
assert compressed < cap, f"compressed {compressed} should be under cap {cap}"
3237

3338

3439
def test_env_override(monkeypatch):

0 commit comments

Comments
 (0)