perf: Cloud Run CPU optimization, server-side pagination, GCS cache (#4710)

MarkusNeusinger · claude · web-flow · commit b242d4f4389c · 2026-03-08T22:00:21.000+01:00
## Summary - **Cloud Run CPU 2→1**: Both frontend (nginx) and backend (uvicorn async) don't need 2 cores. Backend upgraded to gen2 execution environment for better CPU/network performance. - **Server-side pagination**: `/plots/filter` now accepts `limit` and `offset` query params. Counts and totals are still computed from all filtered images. Fully backward-compatible — no params = all images. - **GCS Cache-Control 1h→1d**: All `gsutil cp` commands in impl-generate, impl-merge, and impl-repair workflows now set `Cache-Control: public, max-age=86400`. ## Changed files | File | Change | |------|--------| | `app/cloudbuild.yaml` | CPU 2→1 | | `api/cloudbuild.yaml` | CPU 2→1, add `--execution-environment=gen2` | | `api/schemas.py` | Add `offset`/`limit` fields to `FilteredPlotsResponse` | | `api/routers/plots.py` | Add pagination query params, cache key update, slice logic | | `tests/unit/api/test_routers.py` | 4 new pagination tests + cached mock update | | `.github/workflows/impl-generate.yml` | Cache-Control header on gsutil cp | | `.github/workflows/impl-merge.yml` | Cache-Control header on gsutil cp | | `.github/workflows/impl-repair.yml` | Cache-Control header on gsutil cp | ## Test plan - [x] `uv run ruff check` passes on all changed Python files - [x] `uv run pytest tests/unit/api/test_routers.py` — 101 tests pass - [ ] Verify `/plots/filter?limit=10&offset=0` returns 10 images with correct `total` - [ ] Verify `/plots/filter` without params returns all images (backward compat) - [ ] Verify Cloud Run deploys successfully with new CPU/gen2 settings 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/.github/workflows/impl-generate.yml b/.github/workflows/impl-generate.yml
@@ -534,7 +534,7 @@ jobs:
 
           # Upload PNG (with watermark)
           if [ -f "$IMPL_DIR/plot.png" ]; then
-            gsutil cp "$IMPL_DIR/plot.png" "${STAGING_PATH}/plot.png"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot.png" "${STAGING_PATH}/plot.png"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot.png" 2>/dev/null || true
             echo "png_url=${PUBLIC_URL}/plot.png" >> $GITHUB_OUTPUT
             echo "uploaded=true" >> $GITHUB_OUTPUT
@@ -545,15 +545,15 @@ jobs:
 
           # Upload thumbnail
           if [ -f "$IMPL_DIR/plot_thumb.png" ]; then
-            gsutil cp "$IMPL_DIR/plot_thumb.png" "${STAGING_PATH}/plot_thumb.png"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot_thumb.png" "${STAGING_PATH}/plot_thumb.png"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot_thumb.png" 2>/dev/null || true
             echo "thumb_url=${PUBLIC_URL}/plot_thumb.png" >> $GITHUB_OUTPUT
             echo "::notice::Uploaded thumbnail"
           fi
 
           # Upload HTML (interactive libraries)
           if [ -f "$IMPL_DIR/plot.html" ]; then
-            gsutil cp "$IMPL_DIR/plot.html" "${STAGING_PATH}/plot.html"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot.html" "${STAGING_PATH}/plot.html"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot.html" 2>/dev/null || true
             echo "html_url=${PUBLIC_URL}/plot.html" >> $GITHUB_OUTPUT
           fi
diff --git a/.github/workflows/impl-merge.yml b/.github/workflows/impl-merge.yml
@@ -201,7 +201,7 @@ jobs:
           PRODUCTION="gs://pyplots-images/plots/${SPEC_ID}/${LIBRARY}"
 
           # Copy from staging to production
-          gsutil -m cp -r "${STAGING}/*" "${PRODUCTION}/" 2>/dev/null || echo "No staging files to promote"
+          gsutil -m -h "Cache-Control:public, max-age=86400" cp -r "${STAGING}/*" "${PRODUCTION}/" 2>/dev/null || echo "No staging files to promote"
 
           # Make production files public
           gsutil -m acl ch -r -u AllUsers:R "${PRODUCTION}/" 2>/dev/null || true
diff --git a/.github/workflows/impl-repair.yml b/.github/workflows/impl-repair.yml
@@ -187,17 +187,17 @@ jobs:
           gcloud auth activate-service-account --key-file=/tmp/gcs-key.json
 
           if [ -f "$IMPL_DIR/plot.png" ]; then
-            gsutil cp "$IMPL_DIR/plot.png" "${STAGING_PATH}/plot.png"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot.png" "${STAGING_PATH}/plot.png"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot.png" 2>/dev/null || true
           fi
 
           if [ -f "$IMPL_DIR/plot_thumb.png" ]; then
-            gsutil cp "$IMPL_DIR/plot_thumb.png" "${STAGING_PATH}/plot_thumb.png"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot_thumb.png" "${STAGING_PATH}/plot_thumb.png"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot_thumb.png" 2>/dev/null || true
           fi
 
           if [ -f "$IMPL_DIR/plot.html" ]; then
-            gsutil cp "$IMPL_DIR/plot.html" "${STAGING_PATH}/plot.html"
+            gsutil -h "Cache-Control:public, max-age=86400" cp "$IMPL_DIR/plot.html" "${STAGING_PATH}/plot.html"
             gsutil acl ch -u AllUsers:R "${STAGING_PATH}/plot.html" 2>/dev/null || true
           fi
 
diff --git a/api/cloudbuild.yaml b/api/cloudbuild.yaml
@@ -3,7 +3,7 @@ substitutions:
   _SERVICE_NAME: pyplots-backend
   _REGION: europe-west4
   _MEMORY: 512Mi
-  _CPU: "2"
+  _CPU: "1"
   _MIN_INSTANCES: "1"
   _MAX_INSTANCES: "3"
 
@@ -55,6 +55,7 @@ steps:
       - "--add-cloudsql-instances=pyplots:europe-west4:pyplots-db"
       - "--set-secrets=DATABASE_URL=DATABASE_URL:latest"
       - "--set-env-vars=ENVIRONMENT=production"
+      - "--execution-environment=gen2"
       - "--set-env-vars=GOOGLE_CLOUD_PROJECT=$PROJECT_ID"
       - "--set-env-vars=GCS_BUCKET=pyplots-images"
     id: "deploy"
diff --git a/api/routers/plots.py b/api/routers/plots.py
@@ -3,7 +3,7 @@
 import logging
 from collections.abc import Callable
 
-from fastapi import APIRouter, Depends, Request
+from fastapi import APIRouter, Depends, Query, Request
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 
@@ -267,6 +267,8 @@ def _build_cache_key(filter_groups: list[dict]) -> str:
     """
     Build cache key from filter groups.
 
+    Groups are sorted by category so key is stable regardless of query param order.
+
     Args:
         filter_groups: List of filter group dicts
 
@@ -276,7 +278,8 @@ def _build_cache_key(filter_groups: list[dict]) -> str:
     if not filter_groups:
         return "filter:all"
 
-    cache_parts = [f"{g['category']}={','.join(sorted(g['values']))}" for g in filter_groups]
+    normalized = sorted(filter_groups, key=lambda g: g["category"])
+    cache_parts = [f"{g['category']}={','.join(sorted(g['values']))}" for g in normalized]
     return f"filter:{':'.join(cache_parts)}"
 
 
@@ -370,7 +373,12 @@ def _filter_images(
 
 
 @router.get("/plots/filter", response_model=FilteredPlotsResponse)
-async def get_filtered_plots(request: Request, db: AsyncSession = Depends(require_db)):
+async def get_filtered_plots(
+    request: Request,
+    db: AsyncSession = Depends(require_db),
+    limit: int | None = Query(None, ge=1),
+    offset: int = Query(0, ge=0),
+):
     """
     Get filtered plot images with counts for all filter categories.
 
@@ -398,55 +406,65 @@ async def get_filtered_plots(request: Request, db: AsyncSession = Depends(requir
     # Parse query parameters
     filter_groups = _parse_filter_groups(request)
 
-    # Check cache
+    # Check cache (cache stores unpaginated result; pagination applied after)
     cache_key = _build_cache_key(filter_groups)
+    cached: FilteredPlotsResponse | None = None
     try:
         cached = get_cache(cache_key)
-        if cached:
-            return cached
     except Exception as e:
-        # Cache failures are non-fatal, log and continue
         logger.warning("Cache read failed for key %s: %s", cache_key, e)
 
-    # Fetch data from database
-    try:
-        repo = SpecRepository(db)
-        all_specs = await repo.get_all()
-    except SQLAlchemyError as e:
-        logger.error("Database query failed in get_filtered_plots: %s", e)
-        raise DatabaseQueryError("fetch_specs", str(e)) from e
-
-    # Build data structures
-    spec_lookup = _build_spec_lookup(all_specs)
-    impl_lookup = _build_impl_lookup(all_specs)
-    all_images = _collect_all_images(all_specs)
-    spec_id_to_tags = {spec_id: spec_data["tags"] for spec_id, spec_data in spec_lookup.items()}
-
-    # Filter images
-    filtered_images = _filter_images(all_images, filter_groups, spec_lookup, impl_lookup)
-
-    # Calculate counts
-    global_counts = _calculate_global_counts(all_specs)
-    counts = _calculate_contextual_counts(filtered_images, spec_id_to_tags, impl_lookup)
-    or_counts = _calculate_or_counts(filter_groups, all_images, spec_id_to_tags, spec_lookup, impl_lookup)
-
-    # Build spec_id -> title mapping for search/tooltips
-    spec_titles = {spec_id: data["spec"].title for spec_id, data in spec_lookup.items() if data["spec"].title}
-
-    # Build and cache response
-    result = FilteredPlotsResponse(
-        total=len(filtered_images),
-        images=filtered_images,
-        counts=counts,
-        globalCounts=global_counts,
-        orCounts=or_counts,
-        specTitles=spec_titles,
+    if cached is None:
+        # Fetch data from database
+        try:
+            repo = SpecRepository(db)
+            all_specs = await repo.get_all()
+        except SQLAlchemyError as e:
+            logger.error("Database query failed in get_filtered_plots: %s", e)
+            raise DatabaseQueryError("fetch_specs", str(e)) from e
+
+        # Build data structures
+        spec_lookup = _build_spec_lookup(all_specs)
+        impl_lookup = _build_impl_lookup(all_specs)
+        all_images = _collect_all_images(all_specs)
+        spec_id_to_tags = {spec_id: spec_data["tags"] for spec_id, spec_data in spec_lookup.items()}
+
+        # Filter images
+        filtered_images = _filter_images(all_images, filter_groups, spec_lookup, impl_lookup)
+
+        # Calculate counts (always from ALL filtered images, not paginated)
+        global_counts = _calculate_global_counts(all_specs)
+        counts = _calculate_contextual_counts(filtered_images, spec_id_to_tags, impl_lookup)
+        or_counts = _calculate_or_counts(filter_groups, all_images, spec_id_to_tags, spec_lookup, impl_lookup)
+
+        # Build spec_id -> title mapping for search/tooltips
+        spec_titles = {spec_id: data["spec"].title for spec_id, data in spec_lookup.items() if data["spec"].title}
+
+        # Cache the full (unpaginated) result
+        cached = FilteredPlotsResponse(
+            total=len(filtered_images),
+            images=filtered_images,
+            counts=counts,
+            globalCounts=global_counts,
+            orCounts=or_counts,
+            specTitles=spec_titles,
+        )
+
+        try:
+            set_cache(cache_key, cached)
+        except Exception as e:
+            logger.warning("Cache write failed for key %s: %s", cache_key, e)
+
+    # Apply pagination on top of (possibly cached) result
+    paginated = cached.images[offset : offset + limit] if limit else cached.images[offset:]
+
+    return FilteredPlotsResponse(
+        total=cached.total,
+        images=paginated,
+        counts=cached.counts,
+        globalCounts=cached.globalCounts,
+        orCounts=cached.orCounts,
+        specTitles=cached.specTitles,
+        offset=offset,
+        limit=limit,
     )
-
-    try:
-        set_cache(cache_key, result)
-    except Exception as e:
-        # Cache failures are non-fatal, log and continue
-        logger.warning("Cache write failed for key %s: %s", cache_key, e)
-
-    return result
diff --git a/api/schemas.py b/api/schemas.py
@@ -98,6 +98,8 @@ class FilteredPlotsResponse(BaseModel):
     globalCounts: dict[str, dict[str, int]]  # Same structure for global counts
     orCounts: list[dict[str, int]]  # Per-group OR counts
     specTitles: dict[str, str] = {}  # Mapping spec_id -> title for search/tooltips
+    offset: int = 0
+    limit: int | None = None
 
 
 class LibraryInfo(BaseModel):
diff --git a/app/cloudbuild.yaml b/app/cloudbuild.yaml
@@ -42,7 +42,7 @@ steps:
       - "--memory"
       - "256Mi"
       - "--cpu"
-      - "2"
+      - "1"
       - "--timeout"
       - "60"
       - "--min-instances"
diff --git a/tests/unit/api/mcp/test_tools.py b/tests/unit/api/mcp/test_tools.py
@@ -9,22 +9,15 @@
 import pytest
 
 # Import the tool functions from the module
-# Note: These are FunctionTool objects, we need to access .fn to get the actual callable
-from api.mcp.server import get_implementation as get_implementation_tool
-from api.mcp.server import get_spec_detail as get_spec_detail_tool
-from api.mcp.server import get_tag_values as get_tag_values_tool
-from api.mcp.server import list_libraries as list_libraries_tool
-from api.mcp.server import list_specs as list_specs_tool
-from api.mcp.server import search_specs_by_tags as search_specs_by_tags_tool
-
-
-# Extract the actual functions from the FunctionTool wrappers
-list_specs = list_specs_tool.fn
-search_specs_by_tags = search_specs_by_tags_tool.fn
-get_spec_detail = get_spec_detail_tool.fn
-get_implementation = get_implementation_tool.fn
-list_libraries = list_libraries_tool.fn
-get_tag_values = get_tag_values_tool.fn
+# With FastMCP's @mcp_server.tool() decorator, these are plain async functions
+from api.mcp.server import (
+    get_implementation,
+    get_spec_detail,
+    get_tag_values,
+    list_libraries,
+    list_specs,
+    search_specs_by_tags,
+)
 
 
 @pytest.fixture
@@ -358,7 +351,8 @@ async def test_all_tools_registered(self):
         """MCP server should have all 6 tools registered."""
         from api.mcp.server import mcp_server
 
-        tool_names = await mcp_server.get_tools()
+        tools = await mcp_server.list_tools()
+        tool_names = {t.name for t in tools}
         expected = {
             "list_specs",
             "search_specs_by_tags",
@@ -367,31 +361,29 @@ async def test_all_tools_registered(self):
             "list_libraries",
             "get_tag_values",
         }
-        assert set(tool_names) == expected
+        assert tool_names == expected
 
     @pytest.mark.asyncio
     async def test_tool_objects_have_correct_type(self):
         """Each registered tool should be a FunctionTool with a callable fn."""
         from api.mcp.server import mcp_server
 
-        tool_names = await mcp_server.get_tools()
-        for name in tool_names:
-            tool = await mcp_server.get_tool(name)
-            assert hasattr(tool, "fn"), f"Tool {name} has no 'fn' attribute"
-            assert callable(tool.fn), f"Tool {name}.fn is not callable"
+        tools = await mcp_server.list_tools()
+        for tool in tools:
+            assert hasattr(tool, "fn"), f"Tool {tool.name} has no 'fn' attribute"
+            assert callable(tool.fn), f"Tool {tool.name}.fn is not callable"
 
     @pytest.mark.asyncio
     async def test_tool_schemas_are_valid(self):
         """Each tool should have a valid JSON Schema for its parameters."""
         from api.mcp.server import mcp_server
 
-        tool_names = await mcp_server.get_tools()
-        for name in tool_names:
-            tool = await mcp_server.get_tool(name)
+        tools = await mcp_server.list_tools()
+        for tool in tools:
             schema = tool.parameters
-            assert isinstance(schema, dict), f"Tool {name} schema is not a dict"
-            assert "properties" in schema, f"Tool {name} schema has no 'properties'"
-            assert schema.get("type") == "object", f"Tool {name} schema type is not 'object'"
+            assert isinstance(schema, dict), f"Tool {tool.name} schema is not a dict"
+            assert "properties" in schema, f"Tool {tool.name} schema has no 'properties'"
+            assert schema.get("type") == "object", f"Tool {tool.name} schema type is not 'object'"
 
     @pytest.mark.asyncio
     async def test_get_tag_values_via_call_tool(self):
diff --git a/tests/unit/api/test_routers.py b/tests/unit/api/test_routers.py