1010
1111from pydantic import ValidationError
1212
13+ from mcp_server_python_docs .cache .codec import decode as decode_cache_payload
14+ from mcp_server_python_docs .cache .codec import encode as encode_cache_payload
1315from mcp_server_python_docs .models import GetDocsResult
1416
1517logger = logging .getLogger (__name__ )
1618_NO_ANCHOR_KEY = "\x00 mcp-python-docs:no-anchor\x00 "
19+ DEFAULT_RETRIEVED_DOCS_CACHE_CODEC = "zstd"
1720
1821
1922class CacheStats (NamedTuple ):
@@ -25,8 +28,15 @@ class CacheStats(NamedTuple):
2528class PersistentDocsCache :
2629 """Persist get_docs results by index fingerprint, version, and request identity."""
2730
28- def __init__ (self , cache_path : Path , index_path : Path ) -> None :
31+ def __init__ (
32+ self ,
33+ cache_path : Path ,
34+ index_path : Path ,
35+ * ,
36+ default_codec : str = DEFAULT_RETRIEVED_DOCS_CACHE_CODEC ,
37+ ) -> None :
2938 self ._cache_path = Path (cache_path )
39+ self ._default_codec = default_codec
3040 # Set after fingerprint stat succeeds; stays "" if init fails so the
3141 # cache disables cleanly without leaking partial state.
3242 self ._fingerprint = ""
@@ -47,9 +57,11 @@ def __init__(self, cache_path: Path, index_path: Path) -> None:
4757 "CREATE TABLE IF NOT EXISTS retrieved_docs_cache ("
4858 "index_fingerprint TEXT NOT NULL, version TEXT NOT NULL, slug TEXT NOT NULL, "
4959 "anchor TEXT NOT NULL, max_chars INTEGER NOT NULL, start_index INTEGER NOT NULL, "
50- "result_json TEXT NOT NULL, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, "
60+ "result_json TEXT NOT NULL, compression TEXT NOT NULL DEFAULT 'none', "
61+ "created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, "
5162 "PRIMARY KEY (index_fingerprint, version, slug, anchor, max_chars, start_index))"
5263 )
64+ self ._ensure_compression_column ()
5365 self ._conn .execute (
5466 "DELETE FROM retrieved_docs_cache WHERE index_fingerprint != ?" ,
5567 (self ._fingerprint ,),
@@ -74,6 +86,18 @@ def _fingerprint_index(index_path: Path) -> str:
7486 def _anchor_key (anchor : str | None ) -> str :
7587 return _NO_ANCHOR_KEY if anchor is None else anchor
7688
89+ def _ensure_compression_column (self ) -> None :
90+ if self ._conn is None :
91+ return
92+ columns = {
93+ row [1 ] for row in self ._conn .execute ("PRAGMA table_info(retrieved_docs_cache)" )
94+ }
95+ if "compression" not in columns :
96+ self ._conn .execute (
97+ "ALTER TABLE retrieved_docs_cache "
98+ "ADD COLUMN compression TEXT NOT NULL DEFAULT 'none'"
99+ )
100+
77101 def stats (self ) -> CacheStats :
78102 return CacheStats (self ._hits , self ._misses , self ._writes )
79103
@@ -87,7 +111,8 @@ def get(
87111 with self ._lock :
88112 try :
89113 row = self ._conn .execute (
90- "SELECT result_json FROM retrieved_docs_cache WHERE index_fingerprint = ? "
114+ "SELECT result_json, compression FROM retrieved_docs_cache "
115+ "WHERE index_fingerprint = ? "
91116 "AND version = ? AND slug = ? AND anchor = ? AND max_chars = ? "
92117 "AND start_index = ?" ,
93118 (
@@ -107,8 +132,10 @@ def get(
107132 self ._misses += 1
108133 return None
109134 try :
110- result = GetDocsResult .model_validate_json (row [0 ])
111- except (ValidationError , ValueError ) as e :
135+ payload = row [0 ].encode ("utf-8" ) if isinstance (row [0 ], str ) else bytes (row [0 ])
136+ result_json = decode_cache_payload (payload , row [1 ])
137+ result = GetDocsResult .model_validate_json (result_json )
138+ except (ValidationError , ValueError , TypeError ) as e :
112139 self ._misses += 1
113140 logger .warning ("Persistent docs cache entry ignored: %s" , e )
114141 return None
@@ -123,15 +150,16 @@ def put(self, *, result: GetDocsResult, max_chars: int, start_index: int) -> Non
123150 self ._conn .execute (
124151 "INSERT OR REPLACE INTO retrieved_docs_cache "
125152 "(index_fingerprint, version, slug, anchor, max_chars, start_index, "
126- "result_json) VALUES (?, ?, ?, ?, ?, ?, ?)" ,
153+ "result_json, compression ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)" ,
127154 (
128155 self ._fingerprint ,
129156 result .version ,
130157 result .slug ,
131158 self ._anchor_key (result .anchor ),
132159 max_chars ,
133160 start_index ,
134- result .model_dump_json (),
161+ encode_cache_payload (result .model_dump_json (), self ._default_codec ),
162+ self ._default_codec ,
135163 ),
136164 )
137165 self ._conn .commit ()
0 commit comments