MarkusNeusinger
diff --git a/‎api/cache.py‎
Lines changed: 104 additions & 1 deletion b/‎api/cache.py‎
Lines changed: 104 additions & 1 deletion
diff --git a/‎api/routers/libraries.py‎
Lines changed: 44 additions & 23 deletions b/‎api/routers/libraries.py‎
Lines changed: 44 additions & 23 deletions
diff --git a/‎api/routers/plots.py‎
Lines changed: 6 additions & 21 deletions b/‎api/routers/plots.py‎
Lines changed: 6 additions & 21 deletions
diff --git a/‎api/routers/specs.py‎
Lines changed: 34 additions & 19 deletions b/‎api/routers/specs.py‎
Lines changed: 34 additions & 19 deletions
@@ -2,18 +2,44 @@
 Caching utilities for pyplots API.
 
 Centralized cache management with consistent key patterns.
+Includes stampede protection (per-key asyncio.Lock) and
+stale-while-revalidate (background refresh before TTL expiry).
 """
 
-from typing import Any
+import asyncio
+import logging
+import time
+from collections.abc import Awaitable, Callable
+from typing import Any, TypeVar
 
 from cachetools import TTLCache
 
 from core.config import settings
 
 
+T = TypeVar("T")
+
+logger = logging.getLogger(__name__)
+
 # Global cache instance (configured via settings)
 _cache: TTLCache = TTLCache(maxsize=settings.cache_maxsize, ttl=settings.cache_ttl)
 
+# Per-key locks to prevent cache stampede (~20-30 unique keys, memory negligible)
+_locks: dict[str, asyncio.Lock] = {}
+
+# Timestamps for stale-while-revalidate (key -> monotonic time of last set)
+_timestamps: dict[str, float] = {}
+
+
+def _get_lock(key: str) -> asyncio.Lock:
+    """Get or create a lock for a specific cache key.
+
+    Safe because asyncio is single-threaded — no race on dict access.
+    """
+    if key not in _locks:
+        _locks[key] = asyncio.Lock()
+    return _locks[key]
+
 
 def cache_key(*parts: str) -> str:
     """
@@ -54,6 +80,13 @@ def set_cache(key: str, value: Any) -> None:
         value: Value to cache.
     """
     _cache[key] = value
+    _timestamps[key] = time.monotonic()
+
+
+def cache_age(key: str) -> float | None:
+    """Seconds since key was last set, or None if not tracked."""
+    ts = _timestamps.get(key)
+    return time.monotonic() - ts if ts is not None else None
 
 
 def clear_cache() -> None:
@@ -67,6 +100,7 @@ def clear_cache() -> None:
         >>> clear_cache()  # Invalidates all cached responses
     """
     _cache.clear()
+    _timestamps.clear()
 
 
 def clear_cache_by_pattern(pattern: str) -> int:
@@ -88,6 +122,7 @@ def clear_cache_by_pattern(pattern: str) -> int:
     keys_to_delete = [key for key in _cache.keys() if pattern in key]
     for key in keys_to_delete:
         del _cache[key]
+        _timestamps.pop(key, None)
     return len(keys_to_delete)
 
 
@@ -152,3 +187,71 @@ def get_cache_stats() -> dict:
         {"size": 42, "maxsize": 1000, "ttl": 600}
     """
     return {"size": len(_cache), "maxsize": _cache.maxsize, "ttl": _cache.ttl}
+
+
+# ---------------------------------------------------------------------------
+# Stampede protection + stale-while-revalidate
+# ---------------------------------------------------------------------------
+
+
+async def get_or_set_cache(
+    key: str,
+    factory: Callable[[], Awaitable[T]],
+    *,
+    refresh_after: float | None = None,
+    refresh_factory: Callable[[], Awaitable[T]] | None = None,
+) -> T:
+    """Get cached value or compute it. Prevents stampede via per-key lock.
+
+    If *refresh_after* is set and the cached entry is older than that many
+    seconds, a background refresh is scheduled and the stale value is
+    returned immediately (stale-while-revalidate).
+
+    Args:
+        key: Cache key.
+        factory: Async callable that produces the value (e.g. DB query).
+            Used for cold-miss (inline). May capture a request-scoped DB session.
+        refresh_after: Seconds after which to trigger background refresh.
+        refresh_factory: Standalone async callable for background refresh.
+            Must create its own DB session (via get_db_context). Only used
+            when refresh_after is set. Falls back to *factory* if not provided.
+    """
+    cached = get_cache(key)
+    if cached is not None:
+        # Stale-while-revalidate: schedule background refresh if stale
+        if refresh_after is not None:
+            age = cache_age(key)
+            if age is not None and age > refresh_after:
+                _schedule_refresh(key, refresh_factory or factory)
+        return cached
+
+    # Cold miss — must await. Lock prevents stampede.
+    async with _get_lock(key):
+        # Double-check after acquiring lock
+        cached = get_cache(key)
+        if cached is not None:
+            return cached
+        result = await factory()
+        set_cache(key, result)
+        return result
+
+
+def _schedule_refresh(key: str, factory: Callable[[], Awaitable[Any]]) -> None:
+    """Schedule a background cache refresh if one isn't already running."""
+    refresh_key = f"_refresh:{key}"
+    lock = _get_lock(refresh_key)
+    if lock.locked():
+        return  # refresh already in progress
+    asyncio.create_task(_background_refresh(key, factory, lock))
+
+
+async def _background_refresh(
+    key: str, factory: Callable[[], Awaitable[Any]], lock: asyncio.Lock
+) -> None:
+    """Run factory in background and update cache. Errors are logged, not raised."""
+    async with lock:
+        try:
+            result = await factory()
+            set_cache(key, result)
+        except Exception:
+            logger.warning("Background cache refresh failed for key: %s", key, exc_info=True)
@@ -3,17 +3,38 @@
 from fastapi import APIRouter, Depends
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from api.cache import cache_key, get_cache, set_cache
+from api.cache import cache_key, get_cache, get_or_set_cache, set_cache
 from api.dependencies import optional_db, require_db
 from api.exceptions import raise_not_found
+from core.config import settings
 from core.constants import LIBRARIES_METADATA, SUPPORTED_LIBRARIES
 from core.database import LibraryRepository, SpecRepository
+from core.database.connection import get_db_context
 from core.utils import strip_noqa_comments
 
 
 router = APIRouter(tags=["libraries"])
 
 
+async def _refresh_libraries() -> dict:
+    """Standalone factory for background refresh (creates own DB session)."""
+    async with get_db_context() as db:
+        repo = LibraryRepository(db)
+        libraries = await repo.get_all()
+        return {
+            "libraries": [
+                {
+                    "id": lib.id,
+                    "name": lib.name,
+                    "version": lib.version,
+                    "documentation_url": lib.documentation_url,
+                    "description": lib.description,
+                }
+                for lib in libraries
+            ]
+        }
+
+
 @router.get("/libraries")
 async def get_libraries(db: AsyncSession | None = Depends(optional_db)):
     """
@@ -24,28 +45,28 @@ async def get_libraries(db: AsyncSession | None = Depends(optional_db)):
     if db is None:
         return {"libraries": LIBRARIES_METADATA}
 
-    key = cache_key("libraries")
-    cached = get_cache(key)
-    if cached:
-        return cached
-
-    repo = LibraryRepository(db)
-    libraries = await repo.get_all()
-
-    result = {
-        "libraries": [
-            {
-                "id": lib.id,
-                "name": lib.name,
-                "version": lib.version,
-                "documentation_url": lib.documentation_url,
-                "description": lib.description,
-            }
-            for lib in libraries
-        ]
-    }
-    set_cache(key, result)
-    return result
+    async def _fetch() -> dict:
+        repo = LibraryRepository(db)
+        libraries = await repo.get_all()
+        return {
+            "libraries": [
+                {
+                    "id": lib.id,
+                    "name": lib.name,
+                    "version": lib.version,
+                    "documentation_url": lib.documentation_url,
+                    "description": lib.description,
+                }
+                for lib in libraries
+            ]
+        }
+
+    return await get_or_set_cache(
+        cache_key("libraries"),
+        _fetch,
+        refresh_after=settings.cache_refresh_after,
+        refresh_factory=_refresh_libraries,
+    )
 
 
 @router.get("/libraries/{library_id}/images")
 
@@ -7,7 +7,7 @@
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from api.cache import get_cache, set_cache
+from api.cache import get_or_set_cache
 from api.dependencies import require_db
 from api.exceptions import DatabaseQueryError
 from api.schemas import FilteredPlotsResponse
@@ -405,43 +405,30 @@ async def get_filtered_plots(
     """
     # Parse query parameters
     filter_groups = _parse_filter_groups(request)
+    cache_k = _build_cache_key(filter_groups)
 
-    # Check cache (cache stores unpaginated result; pagination applied after)
-    cache_key = _build_cache_key(filter_groups)
-    cached: FilteredPlotsResponse | None = None
-    try:
-        cached = get_cache(cache_key)
-    except Exception as e:
-        logger.warning("Cache read failed for key %s: %s", cache_key, e)
-
-    if cached is None:
-        # Fetch data from database
+    async def _fetch_filtered() -> FilteredPlotsResponse:
         try:
             repo = SpecRepository(db)
             all_specs = await repo.get_all()
         except SQLAlchemyError as e:
             logger.error("Database query failed in get_filtered_plots: %s", e)
             raise DatabaseQueryError("fetch_specs", str(e)) from e
 
-        # Build data structures
         spec_lookup = _build_spec_lookup(all_specs)
         impl_lookup = _build_impl_lookup(all_specs)
         all_images = _collect_all_images(all_specs)
         spec_id_to_tags = {spec_id: spec_data["tags"] for spec_id, spec_data in spec_lookup.items()}
 
-        # Filter images
         filtered_images = _filter_images(all_images, filter_groups, spec_lookup, impl_lookup)
 
-        # Calculate counts (always from ALL filtered images, not paginated)
         global_counts = _calculate_global_counts(all_specs)
         counts = _calculate_contextual_counts(filtered_images, spec_id_to_tags, impl_lookup)
         or_counts = _calculate_or_counts(filter_groups, all_images, spec_id_to_tags, spec_lookup, impl_lookup)
 
-        # Build spec_id -> title mapping for search/tooltips
         spec_titles = {spec_id: data["spec"].title for spec_id, data in spec_lookup.items() if data["spec"].title}
 
-        # Cache the full (unpaginated) result
-        cached = FilteredPlotsResponse(
+        return FilteredPlotsResponse(
             total=len(filtered_images),
             images=filtered_images,
             counts=counts,
@@ -450,10 +437,8 @@ async def get_filtered_plots(
             specTitles=spec_titles,
         )
 
-        try:
-            set_cache(cache_key, cached)
-        except Exception as e:
-            logger.warning("Cache write failed for key %s: %s", cache_key, e)
+    # get_or_set_cache provides stampede lock (no refresh_after — too many filter key variants)
+    cached = await get_or_set_cache(cache_k, _fetch_filtered)
 
     # Apply pagination on top of (possibly cached) result
     paginated = cached.images[offset : offset + limit] if limit else cached.images[offset:]
 
@@ -3,17 +3,33 @@
 from fastapi import APIRouter, Depends
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from api.cache import cache_key, get_cache, set_cache
+from api.cache import cache_key, get_cache, get_or_set_cache, set_cache
 from api.dependencies import require_db
 from api.exceptions import raise_not_found
 from api.schemas import ImplementationResponse, SpecDetailResponse, SpecListItem
+from core.config import settings
 from core.database import SpecRepository
+from core.database.connection import get_db_context
 from core.utils import strip_noqa_comments
 
 
 router = APIRouter(tags=["specs"])
 
 
+async def _refresh_specs_list() -> list[SpecListItem]:
+    """Standalone factory for background refresh (creates own DB session)."""
+    async with get_db_context() as db:
+        repo = SpecRepository(db)
+        specs = await repo.get_all()
+        return [
+            SpecListItem(
+                id=spec.id, title=spec.title, description=spec.description, tags=spec.tags, library_count=len(spec.impls)
+            )
+            for spec in specs
+            if spec.impls
+        ]
+
+
 @router.get("/specs", response_model=list[SpecListItem])
 async def get_specs(db: AsyncSession = Depends(require_db)):
     """
@@ -22,24 +38,23 @@ async def get_specs(db: AsyncSession = Depends(require_db)):
     Returns only specs that have at least one implementation.
     """
 
-    key = cache_key("specs_list")
-    cached = get_cache(key)
-    if cached:
-        return cached
-
-    repo = SpecRepository(db)
-    specs = await repo.get_all()
-
-    # Only return specs with at least one implementation
-    result = [
-        SpecListItem(
-            id=spec.id, title=spec.title, description=spec.description, tags=spec.tags, library_count=len(spec.impls)
-        )
-        for spec in specs
-        if spec.impls  # Filter: only specs with implementations
-    ]
-    set_cache(key, result)
-    return result
+    async def _fetch() -> list[SpecListItem]:
+        repo = SpecRepository(db)
+        specs = await repo.get_all()
+        return [
+            SpecListItem(
+                id=spec.id, title=spec.title, description=spec.description, tags=spec.tags, library_count=len(spec.impls)
+            )
+            for spec in specs
+            if spec.impls
+        ]
+
+    return await get_or_set_cache(
+        cache_key("specs_list"),
+        _fetch,
+        refresh_after=settings.cache_refresh_after,
+        refresh_factory=_refresh_specs_list,
+    )
 
 
 @router.get("/specs/{spec_id}", response_model=SpecDetailResponse)