diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1ac0b6..73e41cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/).
 
+## [Unreleased]
+
+### Added
+
+- **Multi-worker deployment** (#68): set `PC2NUTS_WORKERS` to launch N uvicorn worker processes. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the published per-IP rate limit stays accurate across workers; the service refuses to start otherwise. Transient backend unavailability is tolerated via slowapi's `in_memory_fallback_enabled` — falls back to per-process in-memory rate limiting and re-probes with exponential backoff, with one WARNING log per outage and one INFO log on recovery.
+
 ## [0.17.1] - 2026-04-29
 
 ### Fixed
diff --git a/Dockerfile b/Dockerfile
index 188bcd9..83c1cc6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,4 +21,4 @@ USER appuser
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
     CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
 
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"]
diff --git a/README.md b/README.md
index 73e772d..597f7c1 100644
--- a/README.md
+++ b/README.md
@@ -326,6 +326,32 @@ All settings are overridable via environment variables prefixed with `PC2NUTS_`:
 | `PC2NUTS_ACCESS_LOG_MAX_MB` | `10` | Maximum size of each access log file in MB before rotation. |
 | `PC2NUTS_ACCESS_LOG_BACKUP_COUNT` | `5` | Number of rotated access log files to keep (e.g. 5 x 10 MB = 50 MB max disk usage). |
 
+### Multi-worker deployment
+
+By default the service runs a single uvicorn worker process. Throughput is
+CPU-bound at ~30 RPS per worker (see [docs/performance.md](docs/performance.md)).
+For higher RPS, set `PC2NUTS_WORKERS` to the number of worker processes you
+want — the rough rule of thumb is one worker per CPU core, capped by the
+available memory (~150-200 MB resident set per worker).
+
+| Env var | Default | Effect |
+|---|---|---|
+| `PC2NUTS_WORKERS` | `1` | Number of uvicorn worker processes. |
+| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | (unset) | When unset, slowapi uses per-process in-memory storage (default). When set (e.g. `redis://host:6379/0`), counters are shared across workers so the published `rate_limit` cap stays accurate. |
+
+When `PC2NUTS_WORKERS > 1`, `PC2NUTS_RATE_LIMIT_STORAGE_URI` MUST be set
+to a reachable shared backend; the service refuses to start otherwise.
+This guards against the per-IP rate limit silently loosening to
+`PC2NUTS_WORKERS × rate_limit` per IP under multi-worker.
+
+**Degraded mode.** If the configured storage backend becomes unreachable
+at runtime, slowapi (`in_memory_fallback_enabled=True`) falls back to
+per-process in-memory rate limiting and re-probes the primary storage
+with exponential backoff. During the outage window the effective per-IP
+cap is `PC2NUTS_WORKERS × rate_limit`. Recovery is automatic; one
+WARNING log line is emitted at the start of the outage and one INFO line
+on recovery.
+
 ## Authentication & rate-limit bypass
 
 The service applies a per-IP rate limit (`120/minute` by default) to `/lookup` and `/pattern`. Trusted callers — operator-issued, manually distributed — can bypass this limit by presenting an `Authorization: Bearer <token>` header. `/health` stays anonymous.
diff --git a/app/config.py b/app/config.py
index dbe1fb2..86ba828 100644
--- a/app/config.py
+++ b/app/config.py
@@ -2,7 +2,7 @@
 import re
 from pathlib import Path
 
-from pydantic import Field
+from pydantic import Field, model_validator
 from pydantic_settings import BaseSettings
 
 _settings_path = Path(__file__).parent / "settings.json"
@@ -24,6 +24,8 @@ class Settings(BaseSettings):
     token_refresh_seconds: int = Field(default=60, ge=1)
     rate_limit: str = _defaults.get("rate_limit", "120/minute")
     rate_limit_headers: bool = _defaults.get("rate_limit_headers", True)
+    workers: int = Field(default=_defaults.get("workers", 1), ge=1)
+    rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None)
     cache_max_age: int = _defaults.get("cache_max_age", 3600)
     startup_timeout: int = 300
     docs_enabled: bool = True
@@ -37,6 +39,16 @@ class Settings(BaseSettings):
 
     model_config = {"env_prefix": "PC2NUTS_"}
 
+    @model_validator(mode="after")
+    def _check_workers_have_shared_storage(self) -> "Settings":
+        if self.workers > 1 and not self.rate_limit_storage_uri:
+            raise ValueError(
+                "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set "
+                "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate "
+                "limit would silently loosen by a factor of WORKERS."
+            )
+        return self
+
     @property
     def extra_source_urls(self) -> list[str]:
         """Parse PC2NUTS_EXTRA_SOURCES comma-separated list into URL list."""
diff --git a/app/limiter.py b/app/limiter.py
new file mode 100644
index 0000000..969c5c5
--- /dev/null
+++ b/app/limiter.py
@@ -0,0 +1,28 @@
+"""Module-level slowapi Limiter, wired according to settings.
+
+When PC2NUTS_RATE_LIMIT_STORAGE_URI is unset, the Limiter falls back to
+slowapi's in-process MemoryStorage default — byte-for-byte the same as
+the pre-#68 inline construction.
+
+When the URI is set (e.g. 'redis://host:6379/0'), the Limiter routes
+counters through the configured backend, with in_memory_fallback_enabled
+giving us per-process MemoryStorage during transient backend outages.
+slowapi handles the fail-degraded behaviour internally with exponential-
+backoff re-probing — see app/main.py:_rate_limit_handler for the 429
+response, and the spec at docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md
+for the design rationale.
+"""
+
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+from app.config import settings
+
+if settings.rate_limit_storage_uri:
+    limiter = Limiter(
+        key_func=get_remote_address,
+        storage_uri=settings.rate_limit_storage_uri,
+        in_memory_fallback_enabled=True,
+    )
+else:
+    limiter = Limiter(key_func=get_remote_address)
diff --git a/app/main.py b/app/main.py
index 4034202..5b4004e 100644
--- a/app/main.py
+++ b/app/main.py
@@ -13,14 +13,13 @@
 from fastapi import FastAPI, HTTPException, Query, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.middleware.base import BaseHTTPMiddleware
-from slowapi import Limiter
 from slowapi.errors import RateLimitExceeded
-from slowapi.util import get_remote_address
 from starlette.responses import JSONResponse
 
 from app import __version__, config as _config
 from app.auth import AuthMiddleware, is_trusted_request
 from app.config import settings
+from app.limiter import limiter
 from app.data_loader import (
     get_data_loaded_at,
     get_data_stale,
@@ -42,8 +41,6 @@
 )
 logger = logging.getLogger(__name__)
 
-limiter = Limiter(key_func=get_remote_address)
-
 # Access logger — separate from app logger.
 # Propagates to the root logger so pytest caplog can capture records.
 # When access_log_file is set, also writes to a dedicated rotating file.
diff --git a/app/settings.json b/app/settings.json
index 21b7b3b..773de1d 100644
--- a/app/settings.json
+++ b/app/settings.json
@@ -23,5 +23,7 @@
   "approximate_min_confidence": 0.1,
   "rate_limit": "120/minute",
   "rate_limit_headers": true,
+  "workers": 1,
+  "rate_limit_storage_uri": null,
   "cache_max_age": 3600
 }
diff --git a/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md b/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md
new file mode 100644
index 0000000..0685648
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md
@@ -0,0 +1,795 @@
+# Multi-worker uvicorn Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** allow the service to run with N uvicorn workers behind a Redis-backed shared rate-limit backend, while leaving single-worker / in-memory deploys byte-for-byte unchanged.
+
+**Architecture:** two new opt-in env vars (`PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`) drive the change. The slowapi `Limiter` is moved to a small dedicated module that picks between the current default construction and a Redis-configured construction with `in_memory_fallback_enabled=True`. A Pydantic model validator on `Settings` hard-fails startup when `WORKERS > 1` without a storage URI to prevent silent cap loosening. The Dockerfile `CMD` becomes shell-form so `${PC2NUTS_WORKERS}` expands at container start.
+
+**Tech Stack:** Python 3.14, FastAPI 0.129, uvicorn 0.40, slowapi 0.1.9, limits 5.8.0, pydantic 2.12.5, pydantic-settings 2.13.0, redis-py (added via `limits[redis]` extra).
+
+**Spec:** `docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md`
+
+**Issue:** [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68)
+
+---
+
+## File map
+
+| File | Action | Responsibility |
+|---|---|---|
+| `app/config.py` | Modify | Add `workers` and `rate_limit_storage_uri` fields + cross-field validator |
+| `app/limiter.py` | Create | Single-purpose module exposing the configured `Limiter` |
+| `app/main.py` | Modify | Replace inline `limiter = Limiter(...)` (line 45) with import from `app.limiter` |
+| `app/settings.json` | Modify | Add default for `workers` (`1`) and explicit null for `rate_limit_storage_uri` |
+| `Dockerfile` | Modify | Switch `CMD` from exec-form to shell-form so `${PC2NUTS_WORKERS}` expands |
+| `requirements.lock` | Modify | Add `redis` runtime dep (via `limits[redis]` extra) |
+| `tests/test_limiter.py` | Create | Unit tests for `Limiter` storage-mode selection |
+| `tests/test_config.py` | Modify or create | Unit tests for the new `Settings` validator |
+| `README.md` | Modify | Document `PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`, degraded-mode behaviour |
+| `CHANGELOG.md` | Modify | Unreleased section entry |
+
+---
+
+## Pre-flight
+
+Before starting, verify the worktree is on the `feat/multi-worker-uvicorn` branch and the spec is committed:
+
+```bash
+git rev-parse --abbrev-ref HEAD
+# Expected: feat/multi-worker-uvicorn
+
+git log --oneline main..HEAD
+# Expected: at least one commit referencing the spec
+```
+
+If the branch is wrong, stop and switch (`git switch feat/multi-worker-uvicorn`).
+
+Verify the test suite is green at HEAD before touching anything:
+
+```bash
+pytest -x -q
+```
+
+If any tests fail at HEAD, stop and report — don't start making changes against a broken baseline.
+
+---
+
+## Task 1: Add `Settings` fields and validator
+
+**Files:**
+- Modify: `app/config.py`
+- Modify: `app/settings.json`
+- Modify or create: `tests/test_config.py`
+
+This task introduces the two new config fields and the cross-field validator that prevents the dangerous combination (`workers > 1` with no storage URI). Tests come first.
+
+- [ ] **Step 1: Confirm whether `tests/test_config.py` exists**
+
+```bash
+ls tests/test_config.py 2>&1
+```
+
+If the file does not exist, create it in Step 2 with the imports in place. If it exists, append to it.
+
+- [ ] **Step 2: Write the failing tests**
+
+If creating `tests/test_config.py`, full contents:
+
+```python
+"""Tests for app.config.Settings."""
+
+import pytest
+from pydantic import ValidationError
+
+from app.config import Settings
+
+
+class TestWorkersValidator:
+    def test_workers_eq_1_without_storage_uri_succeeds(self):
+        """Default config must keep validating — single-worker, no storage URI."""
+        s = Settings(workers=1, rate_limit_storage_uri=None)
+        assert s.workers == 1
+        assert s.rate_limit_storage_uri is None
+
+    def test_workers_gt_1_with_storage_uri_succeeds(self):
+        """Multi-worker is permitted when a storage URI is configured."""
+        s = Settings(workers=4, rate_limit_storage_uri="redis://localhost:6379/0")
+        assert s.workers == 4
+        assert s.rate_limit_storage_uri == "redis://localhost:6379/0"
+
+    def test_workers_gt_1_without_storage_uri_fails_startup(self):
+        """The unsafe combination must raise — silent cap loosening is the
+        failure mode this validator exists to prevent."""
+        with pytest.raises(ValidationError) as excinfo:
+            Settings(workers=2, rate_limit_storage_uri=None)
+        msg = str(excinfo.value)
+        assert "PC2NUTS_WORKERS" in msg
+        assert "PC2NUTS_RATE_LIMIT_STORAGE_URI" in msg
+
+    def test_workers_gt_1_with_empty_storage_uri_fails_startup(self):
+        """Empty string should be treated the same as None — both mean unset."""
+        with pytest.raises(ValidationError):
+            Settings(workers=2, rate_limit_storage_uri="")
+```
+
+If the file already exists, append the `TestWorkersValidator` class.
+
+- [ ] **Step 3: Run tests to verify they fail**
+
+```bash
+pytest tests/test_config.py -v
+```
+
+Expected: all four tests FAIL because `workers` and `rate_limit_storage_uri` aren't fields on `Settings` yet (most likely an `AttributeError` or unexpected-kwarg error from Pydantic).
+
+- [ ] **Step 4: Add the fields and validator to `app/config.py`**
+
+In `app/config.py`, change the imports at the top:
+
+```python
+from pydantic import Field, model_validator
+from pydantic_settings import BaseSettings
+```
+
+Inside the `Settings` class body, add the two new fields next to the existing `rate_limit` and `rate_limit_headers` fields (around line 25-26). The `_defaults.get(...)` pattern matches the existing fields:
+
+```python
+    workers: int = Field(default=_defaults.get("workers", 1), ge=1)
+    rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None)
+```
+
+Then add the validator method inside the class (place it after the existing `model_config = {...}` line and before the `@property` block):
+
+```python
+    @model_validator(mode="after")
+    def _check_workers_have_shared_storage(self) -> "Settings":
+        if self.workers > 1 and not self.rate_limit_storage_uri:
+            raise ValueError(
+                "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set "
+                "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate "
+                "limit would silently loosen by a factor of WORKERS."
+            )
+        return self
+```
+
+- [ ] **Step 5: Add defaults to `app/settings.json`**
+
+Edit `app/settings.json`. Inside the JSON object, add two keys after `"rate_limit_headers"`:
+
+```json
+  "rate_limit": "120/minute",
+  "rate_limit_headers": true,
+  "workers": 1,
+  "rate_limit_storage_uri": null,
+  "cache_max_age": 3600
+```
+
+The trailing comma after `"rate_limit_headers": true,` already exists. The trailing comma after `"rate_limit_storage_uri": null,` is new — verify it parses by reading the file back and asserting validity:
+
+```bash
+python3 -c "import json; json.load(open('app/settings.json'))" && echo "valid JSON"
+```
+
+Expected: `valid JSON`
+
+- [ ] **Step 6: Run tests to verify they pass**
+
+```bash
+pytest tests/test_config.py -v
+```
+
+Expected: all four tests PASS.
+
+- [ ] **Step 7: Run the full test suite to verify no regressions**
+
+```bash
+pytest -x -q
+```
+
+Expected: green. Note: `Settings()` is instantiated at module import time in `app/config.py:88`. The default config (`workers=1`, `rate_limit_storage_uri=None`) must continue to validate, which Step 6 confirmed.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add app/config.py app/settings.json tests/test_config.py
+git commit -m "feat(config): add workers and rate_limit_storage_uri settings (#68)
+
+New Pydantic model validator hard-fails startup if PC2NUTS_WORKERS > 1
+without PC2NUTS_RATE_LIMIT_STORAGE_URI configured, so the per-IP rate
+limit can never silently loosen under multi-worker.
+
+Defaults preserve current behaviour: workers=1, storage URI unset."
+```
+
+---
+
+## Task 2: Add `redis` runtime dependency
+
+**Files:**
+- Modify: `requirements.lock`
+
+The `limits[redis]` extra pulls in the `redis-py` client at the version `limits 5.8.0` expects. We add it before the limiter tests because constructing a `Limiter(storage_uri="redis://…")` eagerly imports the `redis` module — Task 3 below would fail without it. We confirmed with `limits/storage/redis.py:96` that `RedisStorage.__init__` resolves `self.dependency = self.dependencies["redis"].module` at construction time; if the module isn't importable, slowapi raises `ConfigurationError` immediately. Construction does NOT open a TCP connection (`register_script` is local-only), so the test does not require a running Redis.
+
+- [ ] **Step 1: Inspect the current `requirements.lock` format**
+
+```bash
+head -5 requirements.lock
+grep -E "^(slowapi|limits|pydantic)" requirements.lock
+```
+
+Confirm the file is a flat list of `package==version` pins (one per line). The relevant context lines are `slowapi==0.1.9` and `limits==5.8.0`.
+
+- [ ] **Step 2: Determine the redis-py version `limits[redis]` requires**
+
+```bash
+python3 -c "
+import importlib.metadata as md
+reqs = md.requires('limits') or []
+print('\n'.join(r for r in reqs if 'redis' in r.lower()))
+"
+pip index versions redis 2>&1 | head -5
+```
+
+Expected: a line like `redis (>=3,<6) ; extra == 'redis'` or similar. Note the version constraint and pick the highest currently-released version inside that constraint.
+
+- [ ] **Step 3: Verify the chosen version installs cleanly**
+
+```bash
+pip install --dry-run "redis==<chosen-version>" 2>&1 | tail -5
+```
+
+Expected: no version-conflict errors against the rest of `requirements.lock`.
+
+- [ ] **Step 4: Add the pin to `requirements.lock`**
+
+Edit `requirements.lock`. Insert the pin in alphabetical order (the file is already sorted; find where `r` belongs — typically between `pyyaml` and `requests` or similar). Add the single line:
+
+```
+redis==<chosen-version>
+```
+
+- [ ] **Step 5: Verify the requirements file as a whole still resolves**
+
+```bash
+pip install --dry-run -r requirements.lock 2>&1 | tail -10
+```
+
+Expected: no version-conflict errors.
+
+- [ ] **Step 6: Install the dep so subsequent tasks' tests can run**
+
+```bash
+pip install -q "redis==<chosen-version>"
+python3 -c "import redis; print('redis', redis.__version__)"
+```
+
+Expected: prints the installed redis version cleanly. (If the environment is PEP 668 / externally-managed and pip refuses, install into the project venv or use `--break-system-packages` per the local environment's policy. CI installs via `pip install -r requirements.lock` so the published artefact and CI both behave correctly.)
+
+- [ ] **Step 7: Run the existing test suite to verify no regressions**
+
+```bash
+pytest -x -q
+```
+
+Expected: green. Adding the dep shouldn't change behaviour because no test imports `redis` directly yet.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add requirements.lock
+git commit -m "deps: add redis runtime dependency for multi-worker rate limiting (#68)
+
+Pulls in redis-py at the version limits 5.8.0 expects, used only when
+PC2NUTS_RATE_LIMIT_STORAGE_URI is set. Single-host deployers who never
+configure shared storage pay the install-size cost but no runtime cost
+(redis is imported eagerly by limits.storage.RedisStorage at Limiter
+construction, but only when the storage URI is configured)."
+```
+
+---
+
+## Task 3: Extract `Limiter` into `app/limiter.py`
+
+**Files:**
+- Create: `app/limiter.py`
+- Modify: `app/main.py`
+- Create: `tests/test_limiter.py`
+
+This task pulls the inline `Limiter(key_func=get_remote_address)` line at `app/main.py:45` into a dedicated module that selects between the default and Redis-configured construction. The default branch is byte-for-byte the current code.
+
+- [ ] **Step 1: Write the failing tests**
+
+Create `tests/test_limiter.py`:
+
+```python
+"""Tests for app.limiter — verify the Limiter is wired with the right storage
+mode based on settings.rate_limit_storage_uri."""
+
+import importlib
+
+import pytest
+
+
+def _reload_limiter():
+    """Reload app.config and app.limiter so the module-level Limiter picks up
+    the current env. Returns the freshly-imported app.limiter module."""
+    import app.config
+    import app.limiter
+    importlib.reload(app.config)
+    importlib.reload(app.limiter)
+    return app.limiter
+
+
+class TestLimiterStorageSelection:
+    def test_limiter_default_uses_memory_storage(self, monkeypatch):
+        """When no storage URI is set, the Limiter is constructed with the
+        slowapi default (in-process memory) and fallback is disabled. This is
+        the byte-for-byte current behaviour."""
+        monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False)
+        monkeypatch.delenv("PC2NUTS_WORKERS", raising=False)
+
+        mod = _reload_limiter()
+
+        # slowapi stores the configured URI on _storage_uri; default is None.
+        assert mod.limiter._storage_uri is None
+        # in_memory_fallback_enabled is only meaningful when a URI is set.
+        assert mod.limiter._in_memory_fallback_enabled is False
+
+    def test_limiter_with_redis_uri_enables_fallback(self, monkeypatch):
+        """When a storage URI is set, the Limiter is constructed with that URI
+        AND in_memory_fallback_enabled=True. No network call should happen at
+        construction time — slowapi builds the storage object lazily."""
+        monkeypatch.setenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", "redis://localhost:6379/0")
+        monkeypatch.setenv("PC2NUTS_WORKERS", "2")
+
+        mod = _reload_limiter()
+
+        assert mod.limiter._storage_uri == "redis://localhost:6379/0"
+        assert mod.limiter._in_memory_fallback_enabled is True
+
+    @pytest.fixture(autouse=True)
+    def _restore_default_after_each_test(self, monkeypatch):
+        """After each test, force a reload back to defaults so other tests
+        in the suite see the unmodified module."""
+        yield
+        monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False)
+        monkeypatch.delenv("PC2NUTS_WORKERS", raising=False)
+        _reload_limiter()
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+pytest tests/test_limiter.py -v
+```
+
+Expected: all tests FAIL with `ModuleNotFoundError: No module named 'app.limiter'`.
+
+- [ ] **Step 3: Create `app/limiter.py`**
+
+Full contents:
+
+```python
+"""Module-level slowapi Limiter, wired according to settings.
+
+When PC2NUTS_RATE_LIMIT_STORAGE_URI is unset, the Limiter falls back to
+slowapi's in-process MemoryStorage default — byte-for-byte the same as
+the pre-#68 inline construction.
+
+When the URI is set (e.g. 'redis://host:6379/0'), the Limiter routes
+counters through the configured backend, with in_memory_fallback_enabled
+giving us per-process MemoryStorage during transient backend outages.
+slowapi handles the fail-degraded behaviour internally with exponential-
+backoff re-probing — see app/main.py:_rate_limit_handler for the 429
+response, and the spec at docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md
+for the design rationale.
+"""
+
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+from app.config import settings
+
+if settings.rate_limit_storage_uri:
+    limiter = Limiter(
+        key_func=get_remote_address,
+        storage_uri=settings.rate_limit_storage_uri,
+        in_memory_fallback_enabled=True,
+    )
+else:
+    limiter = Limiter(key_func=get_remote_address)
+```
+
+- [ ] **Step 4: Update `app/main.py` to import the limiter**
+
+In `app/main.py`, locate the existing slowapi imports and the inline Limiter construction:
+
+Current state (lines 16-18 and line 45):
+```python
+from slowapi import Limiter
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+...
+limiter = Limiter(key_func=get_remote_address)
+```
+
+Change to:
+
+```python
+from slowapi.errors import RateLimitExceeded
+...
+from app.limiter import limiter
+```
+
+Concretely:
+- Remove `from slowapi import Limiter` (line 16). The `Limiter` symbol is no longer used in `main.py`.
+- Remove `from slowapi.util import get_remote_address` (line 18). Same reason.
+- Keep `from slowapi.errors import RateLimitExceeded` (line 17) — still used by `_rate_limit_handler`.
+- Replace the standalone `limiter = Limiter(key_func=get_remote_address)` line (line 45) with:
+  ```python
+  from app.limiter import limiter
+  ```
+  Place this with the other `from app...` imports in the import block (around line 22-23, alongside `from app.auth import ...` and `from app.config import settings`).
+
+After the changes, the slowapi-related imports in `app/main.py` should be just:
+
+```python
+from slowapi.errors import RateLimitExceeded
+```
+
+And `limiter` should be sourced via:
+
+```python
+from app.limiter import limiter
+```
+
+The decorators `@limiter.limit(...)` and the `app.state.limiter = limiter` line stay exactly as they are.
+
+- [ ] **Step 5: Run the new tests to verify they pass**
+
+```bash
+pytest tests/test_limiter.py -v
+```
+
+Expected: both tests PASS.
+
+- [ ] **Step 6: Run the full test suite to verify no regressions**
+
+```bash
+pytest -x -q
+```
+
+Expected: green. Particular attention to `tests/test_api.py::TestRateLimitTokens::test_valid_token_bypasses_rate_limit` (around test_api.py:226) — this exercises the `@limiter.limit(...)` decorator path and must still pass exactly as before.
+
+If any test fails, the most likely cause is a circular import (`app.limiter` imports `app.config.settings`, which is fine; `app.main` imports `app.limiter`, which is also fine; but `app.config` must NOT import `app.limiter`). Verify by reading the import block of `app/config.py` — it should only import from `pydantic`, `pydantic_settings`, and stdlib.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add app/limiter.py app/main.py tests/test_limiter.py
+git commit -m "feat(limiter): extract slowapi Limiter into dedicated module (#68)
+
+When PC2NUTS_RATE_LIMIT_STORAGE_URI is set, construct the Limiter with
+that storage URI and in_memory_fallback_enabled=True so transient
+backend outages fall back to per-process MemoryStorage. When unset,
+construction is byte-for-byte the previous inline call.
+
+slowapi's built-in fallback handles outage detection, once-per-outage
+WARNING logging, and exponential-backoff recovery probes."
+```
+
+---
+
+## Task 4: Wire `PC2NUTS_WORKERS` into the Dockerfile
+
+**Files:**
+- Modify: `Dockerfile`
+
+Single-line change to the `CMD`. The shell-form `CMD ["sh", "-c", "..."]` lets `${PC2NUTS_WORKERS:-1}` expand at container start using the env-var-with-default shell idiom. Default `:-1` preserves single-worker behaviour when the env var is unset.
+
+- [ ] **Step 1: Read the current Dockerfile to confirm the CMD line**
+
+```bash
+grep -n "CMD" Dockerfile
+```
+
+Expected (current): `24:CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]`
+
+- [ ] **Step 2: Replace the `CMD` line**
+
+Edit `Dockerfile`, line 24. Replace:
+
+```
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+with:
+
+```
+CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"]
+```
+
+The `exec` keeps `uvicorn` as the foreground process (replaces `sh` instead of leaving it as parent), so SIGTERM from the container runtime reaches uvicorn directly. Critical for graceful shutdown.
+
+- [ ] **Step 3: Sanity-check the Dockerfile builds**
+
+```bash
+docker build -t pc2nuts:test-multi-worker . 2>&1 | tail -10
+```
+
+Expected: `Successfully tagged pc2nuts:test-multi-worker` or equivalent (depending on Docker version). If the build fails for unrelated reasons (e.g. base image version drift), record the error and stop — do NOT modify other parts of the Dockerfile to make the build pass.
+
+If `docker` is not available in the agent environment, skip this step and rely on CI (the existing `docker` job in `.github/workflows/ci.yml` will exercise the build on PR).
+
+- [ ] **Step 4: Smoke-test the container with default (single-worker)**
+
+If Step 3 succeeded:
+
+```bash
+docker run --rm -d --name pc2nuts-test -p 8000:8000 pc2nuts:test-multi-worker
+sleep 8
+curl -fsS http://localhost:8000/health | head -c 300
+docker stop pc2nuts-test
+```
+
+Expected: `/health` returns 200 with the usual JSON body. The `--workers 1` is implicit (no env var → default).
+
+If the curl returns non-200 or hangs, capture logs:
+
+```bash
+docker logs pc2nuts-test 2>&1 | tail -40
+```
+
+The most likely failure modes are:
+- `${PC2NUTS_WORKERS:-1}` not expanding (would be visible in logs as `--workers ${PC2NUTS_WORKERS:-1}` literal). If so, double-check that the `CMD` is shell-form (`sh -c`).
+- Existing data-load failures unrelated to this change (TERCET download). Not our problem here.
+
+- [ ] **Step 5: Smoke-test the container with `PC2NUTS_WORKERS=2` and no storage URI (must fail)**
+
+```bash
+docker run --rm -d --name pc2nuts-test-bad -e PC2NUTS_WORKERS=2 -p 8001:8000 pc2nuts:test-multi-worker
+sleep 5
+docker logs pc2nuts-test-bad 2>&1 | tail -20
+docker stop pc2nuts-test-bad 2>/dev/null || true
+docker rm pc2nuts-test-bad 2>/dev/null || true
+```
+
+Expected: container exits early; logs contain the validator's error message naming `PC2NUTS_WORKERS` and `PC2NUTS_RATE_LIMIT_STORAGE_URI`. This confirms the startup guard fires inside the container, not just in unit tests.
+
+If Docker is not available, skip — CI will exercise the build.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add Dockerfile
+git commit -m "feat(docker): support PC2NUTS_WORKERS env var (#68)
+
+Switches CMD from exec-form to shell-form with 'exec uvicorn …' so
+\${PC2NUTS_WORKERS:-1} expands at container start while uvicorn remains
+the foreground PID-1 process for proper SIGTERM handling.
+
+Default of 1 preserves current single-worker behaviour. Multi-worker
+mode also requires PC2NUTS_RATE_LIMIT_STORAGE_URI; the Settings
+validator (added in feat(config)) refuses to start otherwise."
+```
+
+---
+
+## Task 5: Document the new env vars in README
+
+**Files:**
+- Modify: `README.md`
+
+Add a "Multi-worker deployment" subsection near the existing rate-limit / configuration documentation. Reference the slowapi fail-degraded behaviour so operators know what to expect during a Redis outage.
+
+- [ ] **Step 1: Locate the right insertion point**
+
+```bash
+grep -n "rate_limit\|rate limit\|RATE_LIMIT" README.md | head -20
+grep -n "## Configuration\|### Configuration\|## Environment" README.md | head -10
+```
+
+Look for the existing rate-limit / env-var documentation block. The new content goes immediately after that block.
+
+- [ ] **Step 2: Read the surrounding context**
+
+Read the 30 lines around the rate-limit doc to match the existing style (table format, env-var naming convention, voice). Note whether the README uses backticks for env vars, what tone is used (terse vs prose), etc.
+
+- [ ] **Step 3: Insert the new section**
+
+Add the following block immediately after the existing rate-limit documentation, matching the README's existing formatting. If the README uses a table for env vars, add two rows; if it uses a definitions list, add two items. The content (adapt to the existing style):
+
+```markdown
+### Multi-worker deployment
+
+By default the service runs a single uvicorn worker process. Throughput is
+CPU-bound at ~30 RPS per worker (see [docs/performance.md](docs/performance.md)).
+For higher RPS, set `PC2NUTS_WORKERS` to the number of worker processes you
+want — the rough rule of thumb is one worker per CPU core, capped by the
+available memory (~150-200 MB resident set per worker).
+
+| Env var | Default | Effect |
+|---|---|---|
+| `PC2NUTS_WORKERS` | `1` | Number of uvicorn worker processes. |
+| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | (unset) | When unset, slowapi uses per-process in-memory storage (default). When set (e.g. `redis://host:6379/0`), counters are shared across workers so the published `rate_limit` cap stays accurate. |
+
+When `PC2NUTS_WORKERS > 1`, `PC2NUTS_RATE_LIMIT_STORAGE_URI` MUST be set
+to a reachable shared backend; the service refuses to start otherwise.
+This guards against the per-IP rate limit silently loosening to
+`PC2NUTS_WORKERS × rate_limit` per IP under multi-worker.
+
+**Degraded mode.** If the configured storage backend becomes unreachable
+at runtime, slowapi (`in_memory_fallback_enabled=True`) falls back to
+per-process in-memory rate limiting and re-probes the primary storage
+with exponential backoff. During the outage window the effective per-IP
+cap is `PC2NUTS_WORKERS × rate_limit`. Recovery is automatic; one
+WARNING log line is emitted at the start of the outage and one INFO line
+on recovery.
+```
+
+- [ ] **Step 4: Verify the README still renders sensibly**
+
+```bash
+# Syntax-check by piping through a markdown parser if one is available
+python3 -c "
+with open('README.md') as f:
+    content = f.read()
+# Crude check: equal counts of backticks (no unmatched code spans)
+import re
+inline = len(re.findall(r'(?<!`)\`(?!\`)', content))
+assert inline % 2 == 0, f'Odd number of inline backticks: {inline}'
+print('Backticks balanced')
+"
+```
+
+Expected: `Backticks balanced`. If unbalanced, find the unmatched backtick in the new block and fix it.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add README.md
+git commit -m "docs(README): document PC2NUTS_WORKERS and rate-limit storage URI (#68)
+
+New 'Multi-worker deployment' subsection covers both env vars, the
+startup-validation guard for the unsafe combination, and the slowapi
+fail-degraded behaviour during a backend outage."
+```
+
+---
+
+## Task 6: Add CHANGELOG entry
+
+**Files:**
+- Modify: `CHANGELOG.md`
+
+- [ ] **Step 1: Read the top of CHANGELOG.md to find the Unreleased section**
+
+```bash
+head -30 CHANGELOG.md
+```
+
+If the file has a `## [Unreleased]` heading at the top, append under it. If not, add one at the top, above the most recent released-version heading.
+
+- [ ] **Step 2: Add the entry**
+
+Add under the Unreleased section, in the existing style (most likely a `### Added` or `### Changed` subgroup):
+
+```markdown
+### Added
+- **Multi-worker deployment** (#68): set `PC2NUTS_WORKERS` to launch N uvicorn worker processes. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the published per-IP rate limit stays accurate across workers; the service refuses to start otherwise. Transient backend unavailability is tolerated via slowapi's `in_memory_fallback_enabled` — falls back to per-process in-memory rate limiting and re-probes with exponential backoff, with one WARNING log per outage and one INFO log on recovery.
+```
+
+If the existing Unreleased entries use a different bullet style (terse one-liners, no bold lead-in), match that style instead.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add CHANGELOG.md
+git commit -m "docs(changelog): multi-worker deployment entry (#68)"
+```
+
+---
+
+## Task 7: Final verification
+
+**Files:** none modified.
+
+- [ ] **Step 1: Run the full test suite**
+
+```bash
+pytest -x -q
+```
+
+Expected: green.
+
+- [ ] **Step 2: Run the linter**
+
+```bash
+ruff check . && ruff format --check .
+```
+
+Expected: clean. If the linter flags `app/limiter.py` for missing import or similar, fix the actual issue (don't suppress the lint).
+
+- [ ] **Step 3: Verify the import-check job logic locally**
+
+```bash
+python3 -c "from app.main import app; print('app imports cleanly:', app.title)"
+```
+
+Expected: `app imports cleanly: PostalCode2NUTS`. Catches circular-import surprises that pytest might mask.
+
+- [ ] **Step 4: Verify the security check still passes**
+
+```bash
+bandit -q -r app/ 2>&1 | tail -10
+```
+
+Expected: no new findings on `app/limiter.py`. If bandit complains about something legitimately security-sensitive, fix it; if it's a false positive on the slowapi/redis URI handling, document the suppression with a comment naming the rule.
+
+- [ ] **Step 5: Eyeball the diff against `main`**
+
+```bash
+git diff main...HEAD --stat
+git diff main...HEAD docs/superpowers/specs/ docs/superpowers/plans/
+```
+
+Expected: the file list matches the file map at the top of this plan. The spec and plan diffs should be readable and consistent. The implementation files (`app/config.py`, `app/limiter.py`, `app/main.py`, `Dockerfile`, `requirements.lock`, `app/settings.json`, two test files, README, CHANGELOG) should all be present.
+
+- [ ] **Step 6: Push the branch and open a PR**
+
+```bash
+git push -u origin feat/multi-worker-uvicorn
+```
+
+Then open the PR with `gh pr create`, mapping each acceptance criterion from issue #68 to the relevant component. Body template:
+
+```markdown
+## Summary
+
+Implements [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68): multi-worker uvicorn behind a shared rate-limit backend.
+
+- New env vars: `PC2NUTS_WORKERS` (default `1`), `PC2NUTS_RATE_LIMIT_STORAGE_URI` (default unset).
+- Startup hard-fails if `WORKERS > 1` without a storage URI configured.
+- slowapi `in_memory_fallback_enabled=True` handles transient backend outages with per-process MemoryStorage fallback and exponential-backoff re-probing.
+- Default behaviour (single worker, in-memory) is byte-for-byte unchanged.
+
+Spec: `docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md`
+Plan: `docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md`
+
+## Acceptance criteria from #68
+
+- [x] `Dockerfile` updated to launch N workers via `PC2NUTS_WORKERS`
+- [x] Rate-limit behaviour with N workers documented in `README.md`
+- [ ] Memory usage measured against the container's allocated memory — operational follow-up post-deploy
+- [ ] Performance baseline re-run; `docs/performance.md` updated — operational follow-up post-deploy
+
+## Test plan
+
+- [ ] Existing rate-limit tests still pass (single-worker default branch is byte-for-byte unchanged)
+- [ ] New `tests/test_config.py::TestWorkersValidator` proves the validator fires
+- [ ] New `tests/test_limiter.py::TestLimiterStorageSelection` proves the storage URI is honoured
+- [ ] Manual: deploy with `PC2NUTS_WORKERS=2` + a real Redis, confirm the cap holds across workers (operational)
+- [ ] Manual: kill Redis with traffic flowing, confirm WARNING logs once and INFO logs on recovery (operational)
+```
+
+---
+
+## Self-review
+
+Before marking the plan complete, the implementer (or dispatching agent) should run through this checklist:
+
+- **Spec coverage.** Every section of the spec maps to a task:
+  - §3 Configuration surface → Task 1
+  - §4.1 Settings additions → Task 1
+  - §4.2 `app/limiter.py` → Task 3
+  - §4.3 slowapi fail-degraded behaviour → Task 3 (the `in_memory_fallback_enabled=True` flag)
+  - §4.4 Dockerfile → Task 4
+  - §4.5 requirements.lock → Task 2
+  - §4.6 `app/main.py` → Task 3
+  - §6 Error handling — covered by slowapi internals; test scope per §7
+  - §7 Testing → Tasks 1 and 3 each include their own tests
+  - §8 Documentation → Tasks 5 and 6
+  - §9 Acceptance-criteria mapping → Task 7 PR body
+- **Placeholder scan.** Every "implement" step contains the actual code. The only `<chosen-version>` placeholders are in Task 2, where the version genuinely depends on the live `pip` resolution; the steps tell the implementer how to derive the concrete value.
+- **Type/name consistency.** `Settings.workers` (int, default 1), `Settings.rate_limit_storage_uri` (str|None, default None), `app.limiter.limiter` (slowapi.Limiter), `PC2NUTS_WORKERS` and `PC2NUTS_RATE_LIMIT_STORAGE_URI` env-var names — all match across tasks.
diff --git a/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md b/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md
new file mode 100644
index 0000000..74c3ceb
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md
@@ -0,0 +1,252 @@
+# Multi-worker uvicorn with Redis-backed rate limiting — design
+
+**Status:** approved (brainstorming complete)
+**Date:** 2026-05-01
+**Issue:** [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68)
+**Scope:** allow the service to run with N uvicorn workers behind a shared rate-limit backend, preserving the strict per-IP cap that single-worker deployments rely on. Single-host PyPI/Docker deployers must remain unaffected.
+
+---
+
+## 1. Goals and non-goals
+
+### Goals
+
+- Allow `uvicorn` to run with `N > 1` workers via a single env var (`PC2NUTS_WORKERS`).
+- Keep the per-IP rate-limit cap strict at `settings.rate_limit` (currently `120/minute`) across all workers, by routing slowapi storage through a shared backend (Redis) when configured.
+- Keep the trusted-token bypass (`exempt_when=is_trusted_request`) working unchanged.
+- Preserve the current single-worker, in-memory deploy path byte-for-byte when the new env vars are unset.
+- Tolerate transient unavailability of the shared backend without taking the API down.
+- Hard-fail at startup if the operator asks for `N > 1` without configuring shared storage, so the cap can never silently loosen.
+
+### Non-goals
+
+- Provisioning the Redis instance itself — operational concern, documented in `README.md` as a prerequisite for the multi-worker mode.
+- Re-running the performance baseline and updating `docs/performance.md` — separate post-deploy task once the multi-worker container is in production.
+- Edge-layer rate limiting (bunny.net Shield + Edge Rules) — investigated during brainstorming, not on the table without a documented header-conditional bypass mechanism. Left as a future option.
+- Switching from `uvicorn --workers` to `gunicorn` — not needed for this workload (no streaming, no long-poll, requests <100 ms), and adds a process-manager dependency.
+- Recycling worker processes after N requests, graceful timeouts on individual requests, or other lifecycle features.
+- Implementation work — this document is the design only.
+
+---
+
+## 2. Background
+
+### Why this is the next perf win
+
+The performance baseline at commit `526f289` (`docs/performance.md`) measured sustained throughput at ~30 RPS on a single uvicorn worker. The bottleneck is per-request server-side CPU work (~30 ms in `/lookup` logic + Pydantic serialisation; the actual dict lookup is statistically free). Each additional worker should add roughly +30 RPS of headroom, up to the container CPU count. This is the single largest available perf win that requires no application-logic change.
+
+### Why slowapi needs a shared backend
+
+slowapi's default storage is in-memory (`MemoryStorage` from the `limits` library), per-process. With `N` workers, each worker holds its own counters, so the per-IP cap effectively becomes `N × settings.rate_limit`. The project's published cap is `120/minute` per anonymous IP, with trusted-token holders exempt — that contract must hold under multi-worker.
+
+### Three rejected alternatives (recorded for the next time this is revisited)
+
+- **Accept the loosening (option (c) in brainstorming).** Document that the published cap is per-process; aggregate is N× higher. Rejected: violates the published contract.
+- **Edge-layer rate limiting via bunny.net (option (b)).** bunny.net Shield offers global per-IP rate limiting and Edge Rules support header-based logic, but combining the two — i.e. "rate-limit by IP, except when an Authorization header is present" — is not a documented configuration, and trusted-token holders would be throttled at the edge. Rejected without a support-ticket confirmation; left as a future option if bunny.net adds the capability.
+- **Divide the cap by N at the worker level (`120/N` per worker).** Rejected: with HTTP keepalive (default for almost all clients), all of a client's rapid requests pin to the same worker, so they would hit `120/N` and be throttled even though aggregate budget allows 120. False-positive rate limits.
+
+---
+
+## 3. Configuration surface
+
+Two new env vars, both with defaults preserving current behavior.
+
+| Env var | Type | Default | Effect |
+|---|---|---|---|
+| `PC2NUTS_WORKERS` | `int ≥ 1` | `1` | Number of uvicorn worker processes. |
+| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | `str \| None` | `None` (unset) | When unset, slowapi uses in-memory storage (current behavior). When set (e.g. `redis://host:6379/0`), slowapi routes through the fail-degraded shared backend wrapper. |
+
+Both surface in `app/config.py` (`Settings` model, `settings.json` defaults, env-var override per existing pattern). Naming follows the existing `PC2NUTS_*` convention.
+
+### Startup validation
+
+If `PC2NUTS_WORKERS > 1` and `PC2NUTS_RATE_LIMIT_STORAGE_URI` is unset/empty, the app refuses to start with a clear error message naming both env vars. Implemented as a Pydantic model validator on `Settings`. `Settings()` is instantiated at module-import time in `app/config.py`, which runs once in the uvicorn supervisor process before the workers are forked, so the failure is pre-fork and pre-bind: the supervisor exits non-zero with the validator's message and no workers are started. (Each worker also re-imports the module after fork, but the supervisor has already failed by then in the misconfigured case, so worker-side behavior is moot.)
+
+---
+
+## 4. Components
+
+### 4.1 `app/config.py` — settings additions
+
+Two new fields, default-preserving, plus a model validator. Defaults can also be set in `app/settings.json` for self-hosters who want to bake worker counts into the image.
+
+```python
+class Settings(BaseSettings):
+    # ... existing fields ...
+    workers: int = _defaults.get("workers", 1)
+    rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None)
+
+    @model_validator(mode="after")
+    def _check_workers_have_shared_storage(self) -> "Settings":
+        if self.workers > 1 and not self.rate_limit_storage_uri:
+            raise ValueError(
+                "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set "
+                "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate limit "
+                "would silently loosen by a factor of WORKERS."
+            )
+        return self
+```
+
+The exact field name and Pydantic version semantics are existing-codebase concerns to confirm during implementation; the design intent is "model-level validator that fails fast on the unsafe combination."
+
+### 4.2 `app/limiter.py` — new module
+
+Single responsibility: own the slowapi `Limiter` instance and the storage-mode choice. Today this is inline in `app/main.py`; pulling it out keeps `main.py` slimmer and isolates the (small) configuration logic.
+
+Public surface:
+
+```python
+# app/limiter.py
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from app.config import settings
+
+if settings.rate_limit_storage_uri:
+    limiter = Limiter(
+        key_func=get_remote_address,
+        storage_uri=settings.rate_limit_storage_uri,
+        in_memory_fallback_enabled=True,
+    )
+else:
+    limiter = Limiter(key_func=get_remote_address)
+```
+
+That's the entire module. Two branches; default branch is byte-for-byte identical to the current `app/main.py:45` line.
+
+`app/main.py` changes from `limiter = Limiter(...)` to `from app.limiter import limiter`. The `app.state.limiter = limiter` and `@limiter.limit(...)` decorators stay exactly as they are.
+
+### 4.3 Fail-degraded behavior — slowapi built-in
+
+slowapi 0.1.9 already implements the fail-degraded design we want via the `in_memory_fallback_enabled` parameter. Behavior, taken directly from `slowapi/extension.py`:
+
+- **Trigger:** when any storage call raises, slowapi sets an internal `_storage_dead = True` flag, logs once at WARNING (`"Rate limit storage unreachable - falling back to in-memory storage"`), and routes subsequent rate-limit checks to a separate per-process `MemoryStorage`-backed `MovingWindowRateLimiter`.
+- **Recovery:** uses exponential backoff. The `__should_check_backend()` helper waits `2^N` seconds between consecutive re-probes (where `N` is the consecutive check count, capped at `MAX_BACKEND_CHECKS`). When a probe succeeds (`self._storage.check()`), `_storage_dead` is cleared and slowapi logs at INFO (`"Rate limit storage recovered"`).
+- **Logging policy:** the `if not self._storage_dead` guard around the WARNING means it fires once per outage transition, exactly matching our spec intent.
+- **Concurrency:** each worker has its own `_storage_dead` flag and its own fallback `MemoryStorage`. Per-worker outage tracking is correct (a worker that can't reach Redis is independently degraded), and the fallback is per-worker, so during an outage the effective cap loosens to `N × settings.rate_limit` per IP — same as our designed §3 behavior.
+
+This means we write **zero custom storage code**. The plan only needs to wire `in_memory_fallback_enabled=True` into the `Limiter` construction and verify the wiring at the unit-test level.
+
+The exponential-backoff recovery cadence (rather than a fixed 30 s window) is strictly better for the operator: gentler on a recovering Redis under load, faster recovery for short blips.
+
+### 4.4 `Dockerfile`
+
+Single change: switch the `CMD` from exec form to shell form so `${PC2NUTS_WORKERS}` expands at container start.
+
+```dockerfile
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"]
+```
+
+The default `:-1` keeps single-worker behavior when the env var is unset. The shell-form CMD does mean the container's PID 1 is `sh`, which is acceptable for this service (no signal-handling-sensitive workloads); uvicorn forwards SIGTERM correctly when invoked this way. If signal handling becomes a concern later, switching to `exec uvicorn …` inside the shell command preserves the env expansion while making uvicorn PID 1.
+
+### 4.5 `requirements.lock`
+
+Add a `redis` runtime dependency. Easiest path is `slowapi[redis]` or `limits[redis]` (whichever extra is exposed by the version slowapi pins); fallback is a direct `redis` pin matching the major-version range that `limits` expects (concrete numbers chosen at implementation time — see §10).
+
+The dependency is loaded eagerly at import time by `app/limiter.py` only when `rate_limit_storage_uri` is set; single-host deployers who never set the env var still pay the install-size cost but no runtime cost. Acceptable trade-off.
+
+### 4.6 `app/main.py`
+
+- Remove the inline `limiter = Limiter(key_func=get_remote_address)` line.
+- Add `from app.limiter import limiter` near the existing slowapi imports.
+- Everything else (`app.state.limiter = limiter`, the `RateLimitExceeded` handler, the `@limiter.limit(...)` decorators on `/lookup` and `/lookup/{country}/{postal}`) stays exactly as it is.
+
+---
+
+## 5. Data flow
+
+### Single-worker deploy (default, unchanged)
+
+```
+Request → uvicorn (1 worker) → FastAPI → @limiter.limit → MemoryStorage (in-process dict) → handler
+```
+
+Identical to today. Trusted-token requests skip the limiter via `exempt_when`.
+
+### Multi-worker deploy (new)
+
+```
+Request → uvicorn (N workers, OS distributes) → FastAPI → @limiter.limit
+       → slowapi Limiter (in_memory_fallback_enabled=True)
+            _storage_dead=False:  → Redis (shared across workers) → handler
+            _storage_dead=True:   → per-worker MemoryStorage fallback → handler
+                                  (cap effectively loosened to N× during outage window;
+                                   slowapi re-probes Redis with exponential backoff)
+```
+
+Trusted-token requests still skip the limiter entirely; the storage call is not made for them.
+
+---
+
+## 6. Error handling
+
+The fail-degraded path lives entirely inside slowapi (`in_memory_fallback_enabled=True`). Behavior matrix, derived from `slowapi/extension.py`:
+
+| Condition | slowapi behavior | Cap during this state |
+|---|---|---|
+| Redis healthy | All counters in Redis, shared across workers | Strict `settings.rate_limit` per IP |
+| Redis raises (any exception) | `_storage_dead = True`; logs WARNING once; routes subsequent checks to per-worker `MemoryStorage` | `N × settings.rate_limit` per IP per worker |
+| Redis recovery probe (every `2^N` seconds, exponential backoff) succeeds | `_storage_dead = False`; logs INFO once; back to Redis | Strict on next request |
+| Redis still down at next probe | Probe fails silently; backoff continues; no new log | Continues N× per worker |
+
+slowapi catches `Exception` broadly around its storage interactions, so connection errors, timeouts, and redis-py-specific errors are all handled. Programming errors in our own code paths still bubble up.
+
+The existing `_rate_limit_handler` for `RateLimitExceeded` is unchanged. Trusted-token requests still bypass entirely (the `exempt_when` check fires before the storage call).
+
+### Documented degraded mode
+
+`README.md` adds a short note in the rate-limit section: "When `PC2NUTS_RATE_LIMIT_STORAGE_URI` is set and the configured backend is unreachable, the service falls back to per-process in-memory rate limiting (slowapi `in_memory_fallback_enabled`). During the outage window, the effective per-IP cap is `PC2NUTS_WORKERS × rate_limit`. slowapi re-probes the primary storage with exponential backoff and resumes shared rate limiting on recovery."
+
+---
+
+## 7. Testing
+
+### Unchanged (passes as-is)
+
+- All existing rate-limit tests run against the default in-memory `Limiter` and continue to pass byte-for-byte. The `Limiter` construction path for `rate_limit_storage_uri == None` is exactly the current code.
+
+### New tests
+
+Unit:
+
+- **`test_limiter_default_uses_memory_storage`** — when `rate_limit_storage_uri` is unset, `app.limiter.limiter._storage_uri` is the slowapi default (`None` or `"memory://"`) and `_in_memory_fallback_enabled` is `False`. Confirms the no-config path is byte-for-byte the current behavior.
+- **`test_limiter_with_redis_uri_enables_fallback`** — when `rate_limit_storage_uri="redis://localhost:6379/0"` is configured (via `monkeypatch.setenv`), `app.limiter.limiter._storage_uri` is the configured URI and `_in_memory_fallback_enabled` is `True`. Does not contact Redis (Limiter constructor builds the storage object lazily-enough that no connection is opened until first request, but to be safe the test asserts on the constructor-level attributes only).
+
+Startup validator:
+
+- **`test_workers_gt_1_without_storage_uri_fails_startup`** — instantiate `Settings(workers=2, rate_limit_storage_uri=None)` and assert `ValidationError`. Asserts the validator fires.
+- **`test_workers_gt_1_with_storage_uri_succeeds`** — `Settings(workers=2, rate_limit_storage_uri="redis://localhost:6379/0")` constructs cleanly without contacting Redis.
+- **`test_workers_eq_1_without_storage_uri_succeeds`** — defaults still validate (regression guard).
+
+Integration (out of scope for the implementation PR — left as a separate task if CI gains a Redis service container):
+
+- A future integration test could spin up an ephemeral Redis, run two workers via `uvicorn --workers 2`, fire `>120` requests in a minute from a single client, assert the cap is observed across workers. The slowapi fail-degraded path is library code (already tested upstream), so the implementation PR does not need to re-test it; we only verify that our wiring activates the right slowapi configuration.
+
+---
+
+## 8. Documentation
+
+- **`README.md`** — add a "Multi-worker deployment" subsection under the existing Configuration section: documents `PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`, the fail-degraded behavior during Redis outages, and the startup-validation guard. Cross-links to issue #68 in the changelog entry.
+- **`CHANGELOG.md`** — entry under Unreleased: "Added multi-worker deployment via `PC2NUTS_WORKERS` env var. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the per-IP rate limit stays strict across workers; transient backend unavailability is tolerated via per-worker in-memory fallback for 30 s windows."
+- **`docs/performance.md`** — no changes in this PR. The re-baseline run (acceptance criterion in #68) is post-deploy operational work.
+
+---
+
+## 9. Acceptance criteria mapping (from issue #68)
+
+| Issue criterion | Where addressed |
+|---|---|
+| `Dockerfile` updated to launch N workers (configurable via env var) | §4.4 |
+| Memory usage measured against the container's allocated memory; document the headroom | Out of scope for this PR — operational follow-up. README documents the per-worker memory ballpark from the issue body. |
+| Rate-limit behaviour with N workers documented in `README.md` | §8 — option (a) Redis chosen, behavior documented including degraded mode |
+| Performance baseline re-run; `docs/performance.md` updated | Out of scope for this PR — separate operational task post-deploy |
+
+The two "out of scope" items are operational/empirical work that can only happen against the deployed multi-worker container. They are tracked as a follow-up task in the issue rather than blocking the implementation PR.
+
+---
+
+## 10. Open questions deferred to implementation
+
+- **Redis client library version pin.** `redis` (redis-py) — exact version range driven by what `limits 5.8.0` requires. The `limits[redis]` extra is the cleanest path; falls back to a direct pin if needed. Confirmed at implementation time.
+- **Pydantic validator semantics.** Confirmed: project is on Pydantic v2 (`pydantic==2.12.5`, `pydantic-settings==2.13.0` per `requirements.lock`), so `@model_validator(mode="after")` is the target.
+
+These are mechanical questions that don't change the architecture; they're called out so the implementation plan addresses them rather than stumbling on them.
diff --git a/requirements.lock b/requirements.lock
index 9bbcf9d..1bbf077 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -17,6 +17,7 @@ pydantic==2.12.5
 pydantic-settings==2.13.0
 python-dotenv==1.2.2
 PyYAML==6.0.1
+redis==7.4.0
 slowapi==0.1.9
 starlette==0.52.1
 typing_extensions==4.15.0
diff --git a/requirements.txt b/requirements.txt
index 1a76291..cb1dd63 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ httpx>=0.28.1,<1
 pydantic>=2.13.3,<3
 pydantic-settings>=2.14.0,<3
 slowapi>=0.1.9,<1
+limits[redis]>=2.3
 python-dotenv>=1.2.2,<2
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..73802fb
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,34 @@
+"""Tests for app.config.Settings."""
+
+import pytest
+from pydantic import ValidationError
+
+from app.config import Settings
+
+
+class TestWorkersValidator:
+    def test_workers_eq_1_without_storage_uri_succeeds(self):
+        """Default config must keep validating — single-worker, no storage URI."""
+        s = Settings(workers=1, rate_limit_storage_uri=None)
+        assert s.workers == 1
+        assert s.rate_limit_storage_uri is None
+
+    def test_workers_gt_1_with_storage_uri_succeeds(self):
+        """Multi-worker is permitted when a storage URI is configured."""
+        s = Settings(workers=4, rate_limit_storage_uri="redis://localhost:6379/0")
+        assert s.workers == 4
+        assert s.rate_limit_storage_uri == "redis://localhost:6379/0"
+
+    def test_workers_gt_1_without_storage_uri_fails_startup(self):
+        """The unsafe combination must raise — silent cap loosening is the
+        failure mode this validator exists to prevent."""
+        with pytest.raises(ValidationError) as excinfo:
+            Settings(workers=2, rate_limit_storage_uri=None)
+        msg = str(excinfo.value)
+        assert "PC2NUTS_WORKERS" in msg
+        assert "PC2NUTS_RATE_LIMIT_STORAGE_URI" in msg
+
+    def test_workers_gt_1_with_empty_storage_uri_fails_startup(self):
+        """Empty string should be treated the same as None — both mean unset."""
+        with pytest.raises(ValidationError):
+            Settings(workers=2, rate_limit_storage_uri="")
diff --git a/tests/test_limiter.py b/tests/test_limiter.py
new file mode 100644
index 0000000..0bf9617
--- /dev/null
+++ b/tests/test_limiter.py
@@ -0,0 +1,54 @@
+"""Tests for app.limiter — verify the Limiter is wired with the right storage
+mode based on settings.rate_limit_storage_uri."""
+
+import importlib
+
+import pytest
+
+
+def _reload_limiter():
+    """Reload app.config and app.limiter so the module-level Limiter picks up
+    the current env. Returns the freshly-imported app.limiter module."""
+    import app.config
+    import app.limiter
+
+    importlib.reload(app.config)
+    importlib.reload(app.limiter)
+    return app.limiter
+
+
+class TestLimiterStorageSelection:
+    def test_limiter_default_uses_memory_storage(self, monkeypatch):
+        """When no storage URI is set, the Limiter is constructed with the
+        slowapi default (in-process memory) and fallback is disabled. This is
+        the byte-for-byte current behaviour."""
+        monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False)
+        monkeypatch.delenv("PC2NUTS_WORKERS", raising=False)
+
+        mod = _reload_limiter()
+
+        # slowapi stores the configured URI on _storage_uri; default is None.
+        assert mod.limiter._storage_uri is None
+        # in_memory_fallback_enabled is only meaningful when a URI is set.
+        assert mod.limiter._in_memory_fallback_enabled is False
+
+    def test_limiter_with_redis_uri_enables_fallback(self, monkeypatch):
+        """When a storage URI is set, the Limiter is constructed with that URI
+        AND in_memory_fallback_enabled=True. No network call should happen at
+        construction time — slowapi builds the storage object lazily."""
+        monkeypatch.setenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", "redis://localhost:6379/0")
+        monkeypatch.setenv("PC2NUTS_WORKERS", "2")
+
+        mod = _reload_limiter()
+
+        assert mod.limiter._storage_uri == "redis://localhost:6379/0"
+        assert mod.limiter._in_memory_fallback_enabled is True
+
+    @pytest.fixture(autouse=True)
+    def _restore_default_after_each_test(self, monkeypatch):
+        """After each test, force a reload back to defaults so other tests
+        in the suite see the unmodified module."""
+        yield
+        monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False)
+        monkeypatch.delenv("PC2NUTS_WORKERS", raising=False)
+        _reload_limiter()