bk86a · bk86a · May 1, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/).
 
+## [Unreleased]
+
+### Added
+
+- **Multi-worker deployment** (#68): set `PC2NUTS_WORKERS` to launch N uvicorn worker processes. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the published per-IP rate limit stays accurate across workers; the service refuses to start otherwise. Transient backend unavailability is tolerated via slowapi's `in_memory_fallback_enabled` — falls back to per-process in-memory rate limiting and re-probes with exponential backoff, with one WARNING log per outage and one INFO log on recovery.
+
 ## [0.17.1] - 2026-04-29
 
 ### Fixed

diff --git a/Dockerfile b/Dockerfile
@@ -21,4 +21,4 @@ USER appuser
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
 
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"]
diff --git a/README.md b/README.md
@@ -326,6 +326,32 @@ All settings are overridable via environment variables prefixed with `PC2NUTS_`:
 | `PC2NUTS_ACCESS_LOG_MAX_MB` | `10` | Maximum size of each access log file in MB before rotation. |
 | `PC2NUTS_ACCESS_LOG_BACKUP_COUNT` | `5` | Number of rotated access log files to keep (e.g. 5 x 10 MB = 50 MB max disk usage). |
 
+### Multi-worker deployment
+
+By default the service runs a single uvicorn worker process. Throughput is
+CPU-bound at ~30 RPS per worker (see [docs/performance.md](docs/performance.md)).
+For higher RPS, set `PC2NUTS_WORKERS` to the number of worker processes you
+want — the rough rule of thumb is one worker per CPU core, capped by the
+available memory (~150-200 MB resident set per worker).
+
+| Env var | Default | Effect |
+|---|---|---|
+| `PC2NUTS_WORKERS` | `1` | Number of uvicorn worker processes. |
+| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | (unset) | When unset, slowapi uses per-process in-memory storage (default). When set (e.g. `redis://host:6379/0`), counters are shared across workers so the published `rate_limit` cap stays accurate. |
+
+When `PC2NUTS_WORKERS > 1`, `PC2NUTS_RATE_LIMIT_STORAGE_URI` MUST be set
+to a reachable shared backend; the service refuses to start otherwise.
+This guards against the per-IP rate limit silently loosening to
+`PC2NUTS_WORKERS × rate_limit` per IP under multi-worker.
+
+**Degraded mode.** If the configured storage backend becomes unreachable
+at runtime, slowapi (`in_memory_fallback_enabled=True`) falls back to
+per-process in-memory rate limiting and re-probes the primary storage
+with exponential backoff. During the outage window the effective per-IP
+cap is `PC2NUTS_WORKERS × rate_limit`. Recovery is automatic; one
+WARNING log line is emitted at the start of the outage and one INFO line
+on recovery.
+
 ## Authentication & rate-limit bypass
 
 The service applies a per-IP rate limit (`120/minute` by default) to `/lookup` and `/pattern`. Trusted callers — operator-issued, manually distributed — can bypass this limit by presenting an `Authorization: Bearer <token>` header. `/health` stays anonymous.

diff --git a/app/config.py b/app/config.py
@@ -2,7 +2,7 @@
 import re
 from pathlib import Path
 
-from pydantic import Field
+from pydantic import Field, model_validator
 from pydantic_settings import BaseSettings
 
 _settings_path = Path(__file__).parent / "settings.json"
@@ -24,6 +24,8 @@ class Settings(BaseSettings):
     token_refresh_seconds: int = Field(default=60, ge=1)
     rate_limit: str = _defaults.get("rate_limit", "120/minute")
     rate_limit_headers: bool = _defaults.get("rate_limit_headers", True)
+    workers: int = Field(default=_defaults.get("workers", 1), ge=1)
+    rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None)
     cache_max_age: int = _defaults.get("cache_max_age", 3600)
     startup_timeout: int = 300
     docs_enabled: bool = True
@@ -37,6 +39,16 @@ class Settings(BaseSettings):
 
     model_config = {"env_prefix": "PC2NUTS_"}
 
+    @model_validator(mode="after")
+    def _check_workers_have_shared_storage(self) -> "Settings":
+        if self.workers > 1 and not self.rate_limit_storage_uri:
+            raise ValueError(
+                "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set "
+                "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate "
+                "limit would silently loosen by a factor of WORKERS."
+            )
+        return self
+
     @property
     def extra_source_urls(self) -> list[str]:
         """Parse PC2NUTS_EXTRA_SOURCES comma-separated list into URL list."""

diff --git a/app/limiter.py b/app/limiter.py
@@ -0,0 +1,28 @@
+"""Module-level slowapi Limiter, wired according to settings.
+
+When PC2NUTS_RATE_LIMIT_STORAGE_URI is unset, the Limiter falls back to
+slowapi's in-process MemoryStorage default — byte-for-byte the same as
+the pre-#68 inline construction.
+
+When the URI is set (e.g. 'redis://host:6379/0'), the Limiter routes
+counters through the configured backend, with in_memory_fallback_enabled
+giving us per-process MemoryStorage during transient backend outages.
+slowapi handles the fail-degraded behaviour internally with exponential-
+backoff re-probing — see app/main.py:_rate_limit_handler for the 429
+response, and the spec at docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md
+for the design rationale.
+"""
+
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+from app.config import settings
+
+if settings.rate_limit_storage_uri:
+    limiter = Limiter(
+        key_func=get_remote_address,
+        storage_uri=settings.rate_limit_storage_uri,
+        in_memory_fallback_enabled=True,
+    )
+else:
+    limiter = Limiter(key_func=get_remote_address)
diff --git a/app/main.py b/app/main.py
@@ -13,14 +13,13 @@
 from fastapi import FastAPI, HTTPException, Query, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.middleware.base import BaseHTTPMiddleware
-from slowapi import Limiter
 from slowapi.errors import RateLimitExceeded
-from slowapi.util import get_remote_address
 from starlette.responses import JSONResponse
 
 from app import __version__, config as _config
 from app.auth import AuthMiddleware, is_trusted_request
 from app.config import settings
+from app.limiter import limiter
 from app.data_loader import (
     get_data_loaded_at,
     get_data_stale,
@@ -42,8 +41,6 @@
 )
 logger = logging.getLogger(__name__)
 
-limiter = Limiter(key_func=get_remote_address)
-
 # Access logger — separate from app logger.
 # Propagates to the root logger so pytest caplog can capture records.
 # When access_log_file is set, also writes to a dedicated rotating file.

diff --git a/app/settings.json b/app/settings.json
@@ -23,5 +23,7 @@
   "approximate_min_confidence": 0.1,
   "rate_limit": "120/minute",
   "rate_limit_headers": true,
+  "workers": 1,
+  "rate_limit_storage_uri": null,
   "cache_max_age": 3600
 }