diff --git a/CHANGELOG.md b/CHANGELOG.md index b1ac0b6..73e41cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/). +## [Unreleased] + +### Added + +- **Multi-worker deployment** (#68): set `PC2NUTS_WORKERS` to launch N uvicorn worker processes. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the published per-IP rate limit stays accurate across workers; the service refuses to start otherwise. Transient backend unavailability is tolerated via slowapi's `in_memory_fallback_enabled` — falls back to per-process in-memory rate limiting and re-probes with exponential backoff, with one WARNING log per outage and one INFO log on recovery. + ## [0.17.1] - 2026-04-29 ### Fixed diff --git a/Dockerfile b/Dockerfile index 188bcd9..83c1cc6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,4 +21,4 @@ USER appuser HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1 -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"] diff --git a/README.md b/README.md index 73e772d..597f7c1 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,32 @@ All settings are overridable via environment variables prefixed with `PC2NUTS_`: | `PC2NUTS_ACCESS_LOG_MAX_MB` | `10` | Maximum size of each access log file in MB before rotation. | | `PC2NUTS_ACCESS_LOG_BACKUP_COUNT` | `5` | Number of rotated access log files to keep (e.g. 5 x 10 MB = 50 MB max disk usage). | +### Multi-worker deployment + +By default the service runs a single uvicorn worker process. Throughput is +CPU-bound at ~30 RPS per worker (see [docs/performance.md](docs/performance.md)). +For higher RPS, set `PC2NUTS_WORKERS` to the number of worker processes you +want — the rough rule of thumb is one worker per CPU core, capped by the +available memory (~150-200 MB resident set per worker). + +| Env var | Default | Effect | +|---|---|---| +| `PC2NUTS_WORKERS` | `1` | Number of uvicorn worker processes. | +| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | (unset) | When unset, slowapi uses per-process in-memory storage (default). When set (e.g. `redis://host:6379/0`), counters are shared across workers so the published `rate_limit` cap stays accurate. | + +When `PC2NUTS_WORKERS > 1`, `PC2NUTS_RATE_LIMIT_STORAGE_URI` MUST be set +to a reachable shared backend; the service refuses to start otherwise. +This guards against the per-IP rate limit silently loosening to +`PC2NUTS_WORKERS × rate_limit` per IP under multi-worker. + +**Degraded mode.** If the configured storage backend becomes unreachable +at runtime, slowapi (`in_memory_fallback_enabled=True`) falls back to +per-process in-memory rate limiting and re-probes the primary storage +with exponential backoff. During the outage window the effective per-IP +cap is `PC2NUTS_WORKERS × rate_limit`. Recovery is automatic; one +WARNING log line is emitted at the start of the outage and one INFO line +on recovery. + ## Authentication & rate-limit bypass The service applies a per-IP rate limit (`120/minute` by default) to `/lookup` and `/pattern`. Trusted callers — operator-issued, manually distributed — can bypass this limit by presenting an `Authorization: Bearer ` header. `/health` stays anonymous. diff --git a/app/config.py b/app/config.py index dbe1fb2..86ba828 100644 --- a/app/config.py +++ b/app/config.py @@ -2,7 +2,7 @@ import re from pathlib import Path -from pydantic import Field +from pydantic import Field, model_validator from pydantic_settings import BaseSettings _settings_path = Path(__file__).parent / "settings.json" @@ -24,6 +24,8 @@ class Settings(BaseSettings): token_refresh_seconds: int = Field(default=60, ge=1) rate_limit: str = _defaults.get("rate_limit", "120/minute") rate_limit_headers: bool = _defaults.get("rate_limit_headers", True) + workers: int = Field(default=_defaults.get("workers", 1), ge=1) + rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None) cache_max_age: int = _defaults.get("cache_max_age", 3600) startup_timeout: int = 300 docs_enabled: bool = True @@ -37,6 +39,16 @@ class Settings(BaseSettings): model_config = {"env_prefix": "PC2NUTS_"} + @model_validator(mode="after") + def _check_workers_have_shared_storage(self) -> "Settings": + if self.workers > 1 and not self.rate_limit_storage_uri: + raise ValueError( + "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set " + "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate " + "limit would silently loosen by a factor of WORKERS." + ) + return self + @property def extra_source_urls(self) -> list[str]: """Parse PC2NUTS_EXTRA_SOURCES comma-separated list into URL list.""" diff --git a/app/limiter.py b/app/limiter.py new file mode 100644 index 0000000..969c5c5 --- /dev/null +++ b/app/limiter.py @@ -0,0 +1,28 @@ +"""Module-level slowapi Limiter, wired according to settings. + +When PC2NUTS_RATE_LIMIT_STORAGE_URI is unset, the Limiter falls back to +slowapi's in-process MemoryStorage default — byte-for-byte the same as +the pre-#68 inline construction. + +When the URI is set (e.g. 'redis://host:6379/0'), the Limiter routes +counters through the configured backend, with in_memory_fallback_enabled +giving us per-process MemoryStorage during transient backend outages. +slowapi handles the fail-degraded behaviour internally with exponential- +backoff re-probing — see app/main.py:_rate_limit_handler for the 429 +response, and the spec at docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md +for the design rationale. +""" + +from slowapi import Limiter +from slowapi.util import get_remote_address + +from app.config import settings + +if settings.rate_limit_storage_uri: + limiter = Limiter( + key_func=get_remote_address, + storage_uri=settings.rate_limit_storage_uri, + in_memory_fallback_enabled=True, + ) +else: + limiter = Limiter(key_func=get_remote_address) diff --git a/app/main.py b/app/main.py index 4034202..5b4004e 100644 --- a/app/main.py +++ b/app/main.py @@ -13,14 +13,13 @@ from fastapi import FastAPI, HTTPException, Query, Request, Response from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.base import BaseHTTPMiddleware -from slowapi import Limiter from slowapi.errors import RateLimitExceeded -from slowapi.util import get_remote_address from starlette.responses import JSONResponse from app import __version__, config as _config from app.auth import AuthMiddleware, is_trusted_request from app.config import settings +from app.limiter import limiter from app.data_loader import ( get_data_loaded_at, get_data_stale, @@ -42,8 +41,6 @@ ) logger = logging.getLogger(__name__) -limiter = Limiter(key_func=get_remote_address) - # Access logger — separate from app logger. # Propagates to the root logger so pytest caplog can capture records. # When access_log_file is set, also writes to a dedicated rotating file. diff --git a/app/settings.json b/app/settings.json index 21b7b3b..773de1d 100644 --- a/app/settings.json +++ b/app/settings.json @@ -23,5 +23,7 @@ "approximate_min_confidence": 0.1, "rate_limit": "120/minute", "rate_limit_headers": true, + "workers": 1, + "rate_limit_storage_uri": null, "cache_max_age": 3600 } diff --git a/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md b/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md new file mode 100644 index 0000000..0685648 --- /dev/null +++ b/docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md @@ -0,0 +1,795 @@ +# Multi-worker uvicorn Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** allow the service to run with N uvicorn workers behind a Redis-backed shared rate-limit backend, while leaving single-worker / in-memory deploys byte-for-byte unchanged. + +**Architecture:** two new opt-in env vars (`PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`) drive the change. The slowapi `Limiter` is moved to a small dedicated module that picks between the current default construction and a Redis-configured construction with `in_memory_fallback_enabled=True`. A Pydantic model validator on `Settings` hard-fails startup when `WORKERS > 1` without a storage URI to prevent silent cap loosening. The Dockerfile `CMD` becomes shell-form so `${PC2NUTS_WORKERS}` expands at container start. + +**Tech Stack:** Python 3.14, FastAPI 0.129, uvicorn 0.40, slowapi 0.1.9, limits 5.8.0, pydantic 2.12.5, pydantic-settings 2.13.0, redis-py (added via `limits[redis]` extra). + +**Spec:** `docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md` + +**Issue:** [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68) + +--- + +## File map + +| File | Action | Responsibility | +|---|---|---| +| `app/config.py` | Modify | Add `workers` and `rate_limit_storage_uri` fields + cross-field validator | +| `app/limiter.py` | Create | Single-purpose module exposing the configured `Limiter` | +| `app/main.py` | Modify | Replace inline `limiter = Limiter(...)` (line 45) with import from `app.limiter` | +| `app/settings.json` | Modify | Add default for `workers` (`1`) and explicit null for `rate_limit_storage_uri` | +| `Dockerfile` | Modify | Switch `CMD` from exec-form to shell-form so `${PC2NUTS_WORKERS}` expands | +| `requirements.lock` | Modify | Add `redis` runtime dep (via `limits[redis]` extra) | +| `tests/test_limiter.py` | Create | Unit tests for `Limiter` storage-mode selection | +| `tests/test_config.py` | Modify or create | Unit tests for the new `Settings` validator | +| `README.md` | Modify | Document `PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`, degraded-mode behaviour | +| `CHANGELOG.md` | Modify | Unreleased section entry | + +--- + +## Pre-flight + +Before starting, verify the worktree is on the `feat/multi-worker-uvicorn` branch and the spec is committed: + +```bash +git rev-parse --abbrev-ref HEAD +# Expected: feat/multi-worker-uvicorn + +git log --oneline main..HEAD +# Expected: at least one commit referencing the spec +``` + +If the branch is wrong, stop and switch (`git switch feat/multi-worker-uvicorn`). + +Verify the test suite is green at HEAD before touching anything: + +```bash +pytest -x -q +``` + +If any tests fail at HEAD, stop and report — don't start making changes against a broken baseline. + +--- + +## Task 1: Add `Settings` fields and validator + +**Files:** +- Modify: `app/config.py` +- Modify: `app/settings.json` +- Modify or create: `tests/test_config.py` + +This task introduces the two new config fields and the cross-field validator that prevents the dangerous combination (`workers > 1` with no storage URI). Tests come first. + +- [ ] **Step 1: Confirm whether `tests/test_config.py` exists** + +```bash +ls tests/test_config.py 2>&1 +``` + +If the file does not exist, create it in Step 2 with the imports in place. If it exists, append to it. + +- [ ] **Step 2: Write the failing tests** + +If creating `tests/test_config.py`, full contents: + +```python +"""Tests for app.config.Settings.""" + +import pytest +from pydantic import ValidationError + +from app.config import Settings + + +class TestWorkersValidator: + def test_workers_eq_1_without_storage_uri_succeeds(self): + """Default config must keep validating — single-worker, no storage URI.""" + s = Settings(workers=1, rate_limit_storage_uri=None) + assert s.workers == 1 + assert s.rate_limit_storage_uri is None + + def test_workers_gt_1_with_storage_uri_succeeds(self): + """Multi-worker is permitted when a storage URI is configured.""" + s = Settings(workers=4, rate_limit_storage_uri="redis://localhost:6379/0") + assert s.workers == 4 + assert s.rate_limit_storage_uri == "redis://localhost:6379/0" + + def test_workers_gt_1_without_storage_uri_fails_startup(self): + """The unsafe combination must raise — silent cap loosening is the + failure mode this validator exists to prevent.""" + with pytest.raises(ValidationError) as excinfo: + Settings(workers=2, rate_limit_storage_uri=None) + msg = str(excinfo.value) + assert "PC2NUTS_WORKERS" in msg + assert "PC2NUTS_RATE_LIMIT_STORAGE_URI" in msg + + def test_workers_gt_1_with_empty_storage_uri_fails_startup(self): + """Empty string should be treated the same as None — both mean unset.""" + with pytest.raises(ValidationError): + Settings(workers=2, rate_limit_storage_uri="") +``` + +If the file already exists, append the `TestWorkersValidator` class. + +- [ ] **Step 3: Run tests to verify they fail** + +```bash +pytest tests/test_config.py -v +``` + +Expected: all four tests FAIL because `workers` and `rate_limit_storage_uri` aren't fields on `Settings` yet (most likely an `AttributeError` or unexpected-kwarg error from Pydantic). + +- [ ] **Step 4: Add the fields and validator to `app/config.py`** + +In `app/config.py`, change the imports at the top: + +```python +from pydantic import Field, model_validator +from pydantic_settings import BaseSettings +``` + +Inside the `Settings` class body, add the two new fields next to the existing `rate_limit` and `rate_limit_headers` fields (around line 25-26). The `_defaults.get(...)` pattern matches the existing fields: + +```python + workers: int = Field(default=_defaults.get("workers", 1), ge=1) + rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None) +``` + +Then add the validator method inside the class (place it after the existing `model_config = {...}` line and before the `@property` block): + +```python + @model_validator(mode="after") + def _check_workers_have_shared_storage(self) -> "Settings": + if self.workers > 1 and not self.rate_limit_storage_uri: + raise ValueError( + "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set " + "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate " + "limit would silently loosen by a factor of WORKERS." + ) + return self +``` + +- [ ] **Step 5: Add defaults to `app/settings.json`** + +Edit `app/settings.json`. Inside the JSON object, add two keys after `"rate_limit_headers"`: + +```json + "rate_limit": "120/minute", + "rate_limit_headers": true, + "workers": 1, + "rate_limit_storage_uri": null, + "cache_max_age": 3600 +``` + +The trailing comma after `"rate_limit_headers": true,` already exists. The trailing comma after `"rate_limit_storage_uri": null,` is new — verify it parses by reading the file back and asserting validity: + +```bash +python3 -c "import json; json.load(open('app/settings.json'))" && echo "valid JSON" +``` + +Expected: `valid JSON` + +- [ ] **Step 6: Run tests to verify they pass** + +```bash +pytest tests/test_config.py -v +``` + +Expected: all four tests PASS. + +- [ ] **Step 7: Run the full test suite to verify no regressions** + +```bash +pytest -x -q +``` + +Expected: green. Note: `Settings()` is instantiated at module import time in `app/config.py:88`. The default config (`workers=1`, `rate_limit_storage_uri=None`) must continue to validate, which Step 6 confirmed. + +- [ ] **Step 8: Commit** + +```bash +git add app/config.py app/settings.json tests/test_config.py +git commit -m "feat(config): add workers and rate_limit_storage_uri settings (#68) + +New Pydantic model validator hard-fails startup if PC2NUTS_WORKERS > 1 +without PC2NUTS_RATE_LIMIT_STORAGE_URI configured, so the per-IP rate +limit can never silently loosen under multi-worker. + +Defaults preserve current behaviour: workers=1, storage URI unset." +``` + +--- + +## Task 2: Add `redis` runtime dependency + +**Files:** +- Modify: `requirements.lock` + +The `limits[redis]` extra pulls in the `redis-py` client at the version `limits 5.8.0` expects. We add it before the limiter tests because constructing a `Limiter(storage_uri="redis://…")` eagerly imports the `redis` module — Task 3 below would fail without it. We confirmed with `limits/storage/redis.py:96` that `RedisStorage.__init__` resolves `self.dependency = self.dependencies["redis"].module` at construction time; if the module isn't importable, slowapi raises `ConfigurationError` immediately. Construction does NOT open a TCP connection (`register_script` is local-only), so the test does not require a running Redis. + +- [ ] **Step 1: Inspect the current `requirements.lock` format** + +```bash +head -5 requirements.lock +grep -E "^(slowapi|limits|pydantic)" requirements.lock +``` + +Confirm the file is a flat list of `package==version` pins (one per line). The relevant context lines are `slowapi==0.1.9` and `limits==5.8.0`. + +- [ ] **Step 2: Determine the redis-py version `limits[redis]` requires** + +```bash +python3 -c " +import importlib.metadata as md +reqs = md.requires('limits') or [] +print('\n'.join(r for r in reqs if 'redis' in r.lower())) +" +pip index versions redis 2>&1 | head -5 +``` + +Expected: a line like `redis (>=3,<6) ; extra == 'redis'` or similar. Note the version constraint and pick the highest currently-released version inside that constraint. + +- [ ] **Step 3: Verify the chosen version installs cleanly** + +```bash +pip install --dry-run "redis==" 2>&1 | tail -5 +``` + +Expected: no version-conflict errors against the rest of `requirements.lock`. + +- [ ] **Step 4: Add the pin to `requirements.lock`** + +Edit `requirements.lock`. Insert the pin in alphabetical order (the file is already sorted; find where `r` belongs — typically between `pyyaml` and `requests` or similar). Add the single line: + +``` +redis== +``` + +- [ ] **Step 5: Verify the requirements file as a whole still resolves** + +```bash +pip install --dry-run -r requirements.lock 2>&1 | tail -10 +``` + +Expected: no version-conflict errors. + +- [ ] **Step 6: Install the dep so subsequent tasks' tests can run** + +```bash +pip install -q "redis==" +python3 -c "import redis; print('redis', redis.__version__)" +``` + +Expected: prints the installed redis version cleanly. (If the environment is PEP 668 / externally-managed and pip refuses, install into the project venv or use `--break-system-packages` per the local environment's policy. CI installs via `pip install -r requirements.lock` so the published artefact and CI both behave correctly.) + +- [ ] **Step 7: Run the existing test suite to verify no regressions** + +```bash +pytest -x -q +``` + +Expected: green. Adding the dep shouldn't change behaviour because no test imports `redis` directly yet. + +- [ ] **Step 8: Commit** + +```bash +git add requirements.lock +git commit -m "deps: add redis runtime dependency for multi-worker rate limiting (#68) + +Pulls in redis-py at the version limits 5.8.0 expects, used only when +PC2NUTS_RATE_LIMIT_STORAGE_URI is set. Single-host deployers who never +configure shared storage pay the install-size cost but no runtime cost +(redis is imported eagerly by limits.storage.RedisStorage at Limiter +construction, but only when the storage URI is configured)." +``` + +--- + +## Task 3: Extract `Limiter` into `app/limiter.py` + +**Files:** +- Create: `app/limiter.py` +- Modify: `app/main.py` +- Create: `tests/test_limiter.py` + +This task pulls the inline `Limiter(key_func=get_remote_address)` line at `app/main.py:45` into a dedicated module that selects between the default and Redis-configured construction. The default branch is byte-for-byte the current code. + +- [ ] **Step 1: Write the failing tests** + +Create `tests/test_limiter.py`: + +```python +"""Tests for app.limiter — verify the Limiter is wired with the right storage +mode based on settings.rate_limit_storage_uri.""" + +import importlib + +import pytest + + +def _reload_limiter(): + """Reload app.config and app.limiter so the module-level Limiter picks up + the current env. Returns the freshly-imported app.limiter module.""" + import app.config + import app.limiter + importlib.reload(app.config) + importlib.reload(app.limiter) + return app.limiter + + +class TestLimiterStorageSelection: + def test_limiter_default_uses_memory_storage(self, monkeypatch): + """When no storage URI is set, the Limiter is constructed with the + slowapi default (in-process memory) and fallback is disabled. This is + the byte-for-byte current behaviour.""" + monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False) + monkeypatch.delenv("PC2NUTS_WORKERS", raising=False) + + mod = _reload_limiter() + + # slowapi stores the configured URI on _storage_uri; default is None. + assert mod.limiter._storage_uri is None + # in_memory_fallback_enabled is only meaningful when a URI is set. + assert mod.limiter._in_memory_fallback_enabled is False + + def test_limiter_with_redis_uri_enables_fallback(self, monkeypatch): + """When a storage URI is set, the Limiter is constructed with that URI + AND in_memory_fallback_enabled=True. No network call should happen at + construction time — slowapi builds the storage object lazily.""" + monkeypatch.setenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", "redis://localhost:6379/0") + monkeypatch.setenv("PC2NUTS_WORKERS", "2") + + mod = _reload_limiter() + + assert mod.limiter._storage_uri == "redis://localhost:6379/0" + assert mod.limiter._in_memory_fallback_enabled is True + + @pytest.fixture(autouse=True) + def _restore_default_after_each_test(self, monkeypatch): + """After each test, force a reload back to defaults so other tests + in the suite see the unmodified module.""" + yield + monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False) + monkeypatch.delenv("PC2NUTS_WORKERS", raising=False) + _reload_limiter() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +pytest tests/test_limiter.py -v +``` + +Expected: all tests FAIL with `ModuleNotFoundError: No module named 'app.limiter'`. + +- [ ] **Step 3: Create `app/limiter.py`** + +Full contents: + +```python +"""Module-level slowapi Limiter, wired according to settings. + +When PC2NUTS_RATE_LIMIT_STORAGE_URI is unset, the Limiter falls back to +slowapi's in-process MemoryStorage default — byte-for-byte the same as +the pre-#68 inline construction. + +When the URI is set (e.g. 'redis://host:6379/0'), the Limiter routes +counters through the configured backend, with in_memory_fallback_enabled +giving us per-process MemoryStorage during transient backend outages. +slowapi handles the fail-degraded behaviour internally with exponential- +backoff re-probing — see app/main.py:_rate_limit_handler for the 429 +response, and the spec at docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md +for the design rationale. +""" + +from slowapi import Limiter +from slowapi.util import get_remote_address + +from app.config import settings + +if settings.rate_limit_storage_uri: + limiter = Limiter( + key_func=get_remote_address, + storage_uri=settings.rate_limit_storage_uri, + in_memory_fallback_enabled=True, + ) +else: + limiter = Limiter(key_func=get_remote_address) +``` + +- [ ] **Step 4: Update `app/main.py` to import the limiter** + +In `app/main.py`, locate the existing slowapi imports and the inline Limiter construction: + +Current state (lines 16-18 and line 45): +```python +from slowapi import Limiter +from slowapi.errors import RateLimitExceeded +from slowapi.util import get_remote_address +... +limiter = Limiter(key_func=get_remote_address) +``` + +Change to: + +```python +from slowapi.errors import RateLimitExceeded +... +from app.limiter import limiter +``` + +Concretely: +- Remove `from slowapi import Limiter` (line 16). The `Limiter` symbol is no longer used in `main.py`. +- Remove `from slowapi.util import get_remote_address` (line 18). Same reason. +- Keep `from slowapi.errors import RateLimitExceeded` (line 17) — still used by `_rate_limit_handler`. +- Replace the standalone `limiter = Limiter(key_func=get_remote_address)` line (line 45) with: + ```python + from app.limiter import limiter + ``` + Place this with the other `from app...` imports in the import block (around line 22-23, alongside `from app.auth import ...` and `from app.config import settings`). + +After the changes, the slowapi-related imports in `app/main.py` should be just: + +```python +from slowapi.errors import RateLimitExceeded +``` + +And `limiter` should be sourced via: + +```python +from app.limiter import limiter +``` + +The decorators `@limiter.limit(...)` and the `app.state.limiter = limiter` line stay exactly as they are. + +- [ ] **Step 5: Run the new tests to verify they pass** + +```bash +pytest tests/test_limiter.py -v +``` + +Expected: both tests PASS. + +- [ ] **Step 6: Run the full test suite to verify no regressions** + +```bash +pytest -x -q +``` + +Expected: green. Particular attention to `tests/test_api.py::TestRateLimitTokens::test_valid_token_bypasses_rate_limit` (around test_api.py:226) — this exercises the `@limiter.limit(...)` decorator path and must still pass exactly as before. + +If any test fails, the most likely cause is a circular import (`app.limiter` imports `app.config.settings`, which is fine; `app.main` imports `app.limiter`, which is also fine; but `app.config` must NOT import `app.limiter`). Verify by reading the import block of `app/config.py` — it should only import from `pydantic`, `pydantic_settings`, and stdlib. + +- [ ] **Step 7: Commit** + +```bash +git add app/limiter.py app/main.py tests/test_limiter.py +git commit -m "feat(limiter): extract slowapi Limiter into dedicated module (#68) + +When PC2NUTS_RATE_LIMIT_STORAGE_URI is set, construct the Limiter with +that storage URI and in_memory_fallback_enabled=True so transient +backend outages fall back to per-process MemoryStorage. When unset, +construction is byte-for-byte the previous inline call. + +slowapi's built-in fallback handles outage detection, once-per-outage +WARNING logging, and exponential-backoff recovery probes." +``` + +--- + +## Task 4: Wire `PC2NUTS_WORKERS` into the Dockerfile + +**Files:** +- Modify: `Dockerfile` + +Single-line change to the `CMD`. The shell-form `CMD ["sh", "-c", "..."]` lets `${PC2NUTS_WORKERS:-1}` expand at container start using the env-var-with-default shell idiom. Default `:-1` preserves single-worker behaviour when the env var is unset. + +- [ ] **Step 1: Read the current Dockerfile to confirm the CMD line** + +```bash +grep -n "CMD" Dockerfile +``` + +Expected (current): `24:CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]` + +- [ ] **Step 2: Replace the `CMD` line** + +Edit `Dockerfile`, line 24. Replace: + +``` +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +with: + +``` +CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"] +``` + +The `exec` keeps `uvicorn` as the foreground process (replaces `sh` instead of leaving it as parent), so SIGTERM from the container runtime reaches uvicorn directly. Critical for graceful shutdown. + +- [ ] **Step 3: Sanity-check the Dockerfile builds** + +```bash +docker build -t pc2nuts:test-multi-worker . 2>&1 | tail -10 +``` + +Expected: `Successfully tagged pc2nuts:test-multi-worker` or equivalent (depending on Docker version). If the build fails for unrelated reasons (e.g. base image version drift), record the error and stop — do NOT modify other parts of the Dockerfile to make the build pass. + +If `docker` is not available in the agent environment, skip this step and rely on CI (the existing `docker` job in `.github/workflows/ci.yml` will exercise the build on PR). + +- [ ] **Step 4: Smoke-test the container with default (single-worker)** + +If Step 3 succeeded: + +```bash +docker run --rm -d --name pc2nuts-test -p 8000:8000 pc2nuts:test-multi-worker +sleep 8 +curl -fsS http://localhost:8000/health | head -c 300 +docker stop pc2nuts-test +``` + +Expected: `/health` returns 200 with the usual JSON body. The `--workers 1` is implicit (no env var → default). + +If the curl returns non-200 or hangs, capture logs: + +```bash +docker logs pc2nuts-test 2>&1 | tail -40 +``` + +The most likely failure modes are: +- `${PC2NUTS_WORKERS:-1}` not expanding (would be visible in logs as `--workers ${PC2NUTS_WORKERS:-1}` literal). If so, double-check that the `CMD` is shell-form (`sh -c`). +- Existing data-load failures unrelated to this change (TERCET download). Not our problem here. + +- [ ] **Step 5: Smoke-test the container with `PC2NUTS_WORKERS=2` and no storage URI (must fail)** + +```bash +docker run --rm -d --name pc2nuts-test-bad -e PC2NUTS_WORKERS=2 -p 8001:8000 pc2nuts:test-multi-worker +sleep 5 +docker logs pc2nuts-test-bad 2>&1 | tail -20 +docker stop pc2nuts-test-bad 2>/dev/null || true +docker rm pc2nuts-test-bad 2>/dev/null || true +``` + +Expected: container exits early; logs contain the validator's error message naming `PC2NUTS_WORKERS` and `PC2NUTS_RATE_LIMIT_STORAGE_URI`. This confirms the startup guard fires inside the container, not just in unit tests. + +If Docker is not available, skip — CI will exercise the build. + +- [ ] **Step 6: Commit** + +```bash +git add Dockerfile +git commit -m "feat(docker): support PC2NUTS_WORKERS env var (#68) + +Switches CMD from exec-form to shell-form with 'exec uvicorn …' so +\${PC2NUTS_WORKERS:-1} expands at container start while uvicorn remains +the foreground PID-1 process for proper SIGTERM handling. + +Default of 1 preserves current single-worker behaviour. Multi-worker +mode also requires PC2NUTS_RATE_LIMIT_STORAGE_URI; the Settings +validator (added in feat(config)) refuses to start otherwise." +``` + +--- + +## Task 5: Document the new env vars in README + +**Files:** +- Modify: `README.md` + +Add a "Multi-worker deployment" subsection near the existing rate-limit / configuration documentation. Reference the slowapi fail-degraded behaviour so operators know what to expect during a Redis outage. + +- [ ] **Step 1: Locate the right insertion point** + +```bash +grep -n "rate_limit\|rate limit\|RATE_LIMIT" README.md | head -20 +grep -n "## Configuration\|### Configuration\|## Environment" README.md | head -10 +``` + +Look for the existing rate-limit / env-var documentation block. The new content goes immediately after that block. + +- [ ] **Step 2: Read the surrounding context** + +Read the 30 lines around the rate-limit doc to match the existing style (table format, env-var naming convention, voice). Note whether the README uses backticks for env vars, what tone is used (terse vs prose), etc. + +- [ ] **Step 3: Insert the new section** + +Add the following block immediately after the existing rate-limit documentation, matching the README's existing formatting. If the README uses a table for env vars, add two rows; if it uses a definitions list, add two items. The content (adapt to the existing style): + +```markdown +### Multi-worker deployment + +By default the service runs a single uvicorn worker process. Throughput is +CPU-bound at ~30 RPS per worker (see [docs/performance.md](docs/performance.md)). +For higher RPS, set `PC2NUTS_WORKERS` to the number of worker processes you +want — the rough rule of thumb is one worker per CPU core, capped by the +available memory (~150-200 MB resident set per worker). + +| Env var | Default | Effect | +|---|---|---| +| `PC2NUTS_WORKERS` | `1` | Number of uvicorn worker processes. | +| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | (unset) | When unset, slowapi uses per-process in-memory storage (default). When set (e.g. `redis://host:6379/0`), counters are shared across workers so the published `rate_limit` cap stays accurate. | + +When `PC2NUTS_WORKERS > 1`, `PC2NUTS_RATE_LIMIT_STORAGE_URI` MUST be set +to a reachable shared backend; the service refuses to start otherwise. +This guards against the per-IP rate limit silently loosening to +`PC2NUTS_WORKERS × rate_limit` per IP under multi-worker. + +**Degraded mode.** If the configured storage backend becomes unreachable +at runtime, slowapi (`in_memory_fallback_enabled=True`) falls back to +per-process in-memory rate limiting and re-probes the primary storage +with exponential backoff. During the outage window the effective per-IP +cap is `PC2NUTS_WORKERS × rate_limit`. Recovery is automatic; one +WARNING log line is emitted at the start of the outage and one INFO line +on recovery. +``` + +- [ ] **Step 4: Verify the README still renders sensibly** + +```bash +# Syntax-check by piping through a markdown parser if one is available +python3 -c " +with open('README.md') as f: + content = f.read() +# Crude check: equal counts of backticks (no unmatched code spans) +import re +inline = len(re.findall(r'(?&1 | tail -10 +``` + +Expected: no new findings on `app/limiter.py`. If bandit complains about something legitimately security-sensitive, fix it; if it's a false positive on the slowapi/redis URI handling, document the suppression with a comment naming the rule. + +- [ ] **Step 5: Eyeball the diff against `main`** + +```bash +git diff main...HEAD --stat +git diff main...HEAD docs/superpowers/specs/ docs/superpowers/plans/ +``` + +Expected: the file list matches the file map at the top of this plan. The spec and plan diffs should be readable and consistent. The implementation files (`app/config.py`, `app/limiter.py`, `app/main.py`, `Dockerfile`, `requirements.lock`, `app/settings.json`, two test files, README, CHANGELOG) should all be present. + +- [ ] **Step 6: Push the branch and open a PR** + +```bash +git push -u origin feat/multi-worker-uvicorn +``` + +Then open the PR with `gh pr create`, mapping each acceptance criterion from issue #68 to the relevant component. Body template: + +```markdown +## Summary + +Implements [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68): multi-worker uvicorn behind a shared rate-limit backend. + +- New env vars: `PC2NUTS_WORKERS` (default `1`), `PC2NUTS_RATE_LIMIT_STORAGE_URI` (default unset). +- Startup hard-fails if `WORKERS > 1` without a storage URI configured. +- slowapi `in_memory_fallback_enabled=True` handles transient backend outages with per-process MemoryStorage fallback and exponential-backoff re-probing. +- Default behaviour (single worker, in-memory) is byte-for-byte unchanged. + +Spec: `docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md` +Plan: `docs/superpowers/plans/2026-05-01-multi-worker-uvicorn.md` + +## Acceptance criteria from #68 + +- [x] `Dockerfile` updated to launch N workers via `PC2NUTS_WORKERS` +- [x] Rate-limit behaviour with N workers documented in `README.md` +- [ ] Memory usage measured against the container's allocated memory — operational follow-up post-deploy +- [ ] Performance baseline re-run; `docs/performance.md` updated — operational follow-up post-deploy + +## Test plan + +- [ ] Existing rate-limit tests still pass (single-worker default branch is byte-for-byte unchanged) +- [ ] New `tests/test_config.py::TestWorkersValidator` proves the validator fires +- [ ] New `tests/test_limiter.py::TestLimiterStorageSelection` proves the storage URI is honoured +- [ ] Manual: deploy with `PC2NUTS_WORKERS=2` + a real Redis, confirm the cap holds across workers (operational) +- [ ] Manual: kill Redis with traffic flowing, confirm WARNING logs once and INFO logs on recovery (operational) +``` + +--- + +## Self-review + +Before marking the plan complete, the implementer (or dispatching agent) should run through this checklist: + +- **Spec coverage.** Every section of the spec maps to a task: + - §3 Configuration surface → Task 1 + - §4.1 Settings additions → Task 1 + - §4.2 `app/limiter.py` → Task 3 + - §4.3 slowapi fail-degraded behaviour → Task 3 (the `in_memory_fallback_enabled=True` flag) + - §4.4 Dockerfile → Task 4 + - §4.5 requirements.lock → Task 2 + - §4.6 `app/main.py` → Task 3 + - §6 Error handling — covered by slowapi internals; test scope per §7 + - §7 Testing → Tasks 1 and 3 each include their own tests + - §8 Documentation → Tasks 5 and 6 + - §9 Acceptance-criteria mapping → Task 7 PR body +- **Placeholder scan.** Every "implement" step contains the actual code. The only `` placeholders are in Task 2, where the version genuinely depends on the live `pip` resolution; the steps tell the implementer how to derive the concrete value. +- **Type/name consistency.** `Settings.workers` (int, default 1), `Settings.rate_limit_storage_uri` (str|None, default None), `app.limiter.limiter` (slowapi.Limiter), `PC2NUTS_WORKERS` and `PC2NUTS_RATE_LIMIT_STORAGE_URI` env-var names — all match across tasks. diff --git a/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md b/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md new file mode 100644 index 0000000..74c3ceb --- /dev/null +++ b/docs/superpowers/specs/2026-05-01-multi-worker-uvicorn-design.md @@ -0,0 +1,252 @@ +# Multi-worker uvicorn with Redis-backed rate limiting — design + +**Status:** approved (brainstorming complete) +**Date:** 2026-05-01 +**Issue:** [#68](https://github.com/bk86a/PostalCode2NUTS/issues/68) +**Scope:** allow the service to run with N uvicorn workers behind a shared rate-limit backend, preserving the strict per-IP cap that single-worker deployments rely on. Single-host PyPI/Docker deployers must remain unaffected. + +--- + +## 1. Goals and non-goals + +### Goals + +- Allow `uvicorn` to run with `N > 1` workers via a single env var (`PC2NUTS_WORKERS`). +- Keep the per-IP rate-limit cap strict at `settings.rate_limit` (currently `120/minute`) across all workers, by routing slowapi storage through a shared backend (Redis) when configured. +- Keep the trusted-token bypass (`exempt_when=is_trusted_request`) working unchanged. +- Preserve the current single-worker, in-memory deploy path byte-for-byte when the new env vars are unset. +- Tolerate transient unavailability of the shared backend without taking the API down. +- Hard-fail at startup if the operator asks for `N > 1` without configuring shared storage, so the cap can never silently loosen. + +### Non-goals + +- Provisioning the Redis instance itself — operational concern, documented in `README.md` as a prerequisite for the multi-worker mode. +- Re-running the performance baseline and updating `docs/performance.md` — separate post-deploy task once the multi-worker container is in production. +- Edge-layer rate limiting (bunny.net Shield + Edge Rules) — investigated during brainstorming, not on the table without a documented header-conditional bypass mechanism. Left as a future option. +- Switching from `uvicorn --workers` to `gunicorn` — not needed for this workload (no streaming, no long-poll, requests <100 ms), and adds a process-manager dependency. +- Recycling worker processes after N requests, graceful timeouts on individual requests, or other lifecycle features. +- Implementation work — this document is the design only. + +--- + +## 2. Background + +### Why this is the next perf win + +The performance baseline at commit `526f289` (`docs/performance.md`) measured sustained throughput at ~30 RPS on a single uvicorn worker. The bottleneck is per-request server-side CPU work (~30 ms in `/lookup` logic + Pydantic serialisation; the actual dict lookup is statistically free). Each additional worker should add roughly +30 RPS of headroom, up to the container CPU count. This is the single largest available perf win that requires no application-logic change. + +### Why slowapi needs a shared backend + +slowapi's default storage is in-memory (`MemoryStorage` from the `limits` library), per-process. With `N` workers, each worker holds its own counters, so the per-IP cap effectively becomes `N × settings.rate_limit`. The project's published cap is `120/minute` per anonymous IP, with trusted-token holders exempt — that contract must hold under multi-worker. + +### Three rejected alternatives (recorded for the next time this is revisited) + +- **Accept the loosening (option (c) in brainstorming).** Document that the published cap is per-process; aggregate is N× higher. Rejected: violates the published contract. +- **Edge-layer rate limiting via bunny.net (option (b)).** bunny.net Shield offers global per-IP rate limiting and Edge Rules support header-based logic, but combining the two — i.e. "rate-limit by IP, except when an Authorization header is present" — is not a documented configuration, and trusted-token holders would be throttled at the edge. Rejected without a support-ticket confirmation; left as a future option if bunny.net adds the capability. +- **Divide the cap by N at the worker level (`120/N` per worker).** Rejected: with HTTP keepalive (default for almost all clients), all of a client's rapid requests pin to the same worker, so they would hit `120/N` and be throttled even though aggregate budget allows 120. False-positive rate limits. + +--- + +## 3. Configuration surface + +Two new env vars, both with defaults preserving current behavior. + +| Env var | Type | Default | Effect | +|---|---|---|---| +| `PC2NUTS_WORKERS` | `int ≥ 1` | `1` | Number of uvicorn worker processes. | +| `PC2NUTS_RATE_LIMIT_STORAGE_URI` | `str \| None` | `None` (unset) | When unset, slowapi uses in-memory storage (current behavior). When set (e.g. `redis://host:6379/0`), slowapi routes through the fail-degraded shared backend wrapper. | + +Both surface in `app/config.py` (`Settings` model, `settings.json` defaults, env-var override per existing pattern). Naming follows the existing `PC2NUTS_*` convention. + +### Startup validation + +If `PC2NUTS_WORKERS > 1` and `PC2NUTS_RATE_LIMIT_STORAGE_URI` is unset/empty, the app refuses to start with a clear error message naming both env vars. Implemented as a Pydantic model validator on `Settings`. `Settings()` is instantiated at module-import time in `app/config.py`, which runs once in the uvicorn supervisor process before the workers are forked, so the failure is pre-fork and pre-bind: the supervisor exits non-zero with the validator's message and no workers are started. (Each worker also re-imports the module after fork, but the supervisor has already failed by then in the misconfigured case, so worker-side behavior is moot.) + +--- + +## 4. Components + +### 4.1 `app/config.py` — settings additions + +Two new fields, default-preserving, plus a model validator. Defaults can also be set in `app/settings.json` for self-hosters who want to bake worker counts into the image. + +```python +class Settings(BaseSettings): + # ... existing fields ... + workers: int = _defaults.get("workers", 1) + rate_limit_storage_uri: str | None = _defaults.get("rate_limit_storage_uri", None) + + @model_validator(mode="after") + def _check_workers_have_shared_storage(self) -> "Settings": + if self.workers > 1 and not self.rate_limit_storage_uri: + raise ValueError( + "PC2NUTS_WORKERS > 1 requires PC2NUTS_RATE_LIMIT_STORAGE_URI to be set " + "(e.g. 'redis://host:6379/0'). Without shared storage the per-IP rate limit " + "would silently loosen by a factor of WORKERS." + ) + return self +``` + +The exact field name and Pydantic version semantics are existing-codebase concerns to confirm during implementation; the design intent is "model-level validator that fails fast on the unsafe combination." + +### 4.2 `app/limiter.py` — new module + +Single responsibility: own the slowapi `Limiter` instance and the storage-mode choice. Today this is inline in `app/main.py`; pulling it out keeps `main.py` slimmer and isolates the (small) configuration logic. + +Public surface: + +```python +# app/limiter.py +from slowapi import Limiter +from slowapi.util import get_remote_address +from app.config import settings + +if settings.rate_limit_storage_uri: + limiter = Limiter( + key_func=get_remote_address, + storage_uri=settings.rate_limit_storage_uri, + in_memory_fallback_enabled=True, + ) +else: + limiter = Limiter(key_func=get_remote_address) +``` + +That's the entire module. Two branches; default branch is byte-for-byte identical to the current `app/main.py:45` line. + +`app/main.py` changes from `limiter = Limiter(...)` to `from app.limiter import limiter`. The `app.state.limiter = limiter` and `@limiter.limit(...)` decorators stay exactly as they are. + +### 4.3 Fail-degraded behavior — slowapi built-in + +slowapi 0.1.9 already implements the fail-degraded design we want via the `in_memory_fallback_enabled` parameter. Behavior, taken directly from `slowapi/extension.py`: + +- **Trigger:** when any storage call raises, slowapi sets an internal `_storage_dead = True` flag, logs once at WARNING (`"Rate limit storage unreachable - falling back to in-memory storage"`), and routes subsequent rate-limit checks to a separate per-process `MemoryStorage`-backed `MovingWindowRateLimiter`. +- **Recovery:** uses exponential backoff. The `__should_check_backend()` helper waits `2^N` seconds between consecutive re-probes (where `N` is the consecutive check count, capped at `MAX_BACKEND_CHECKS`). When a probe succeeds (`self._storage.check()`), `_storage_dead` is cleared and slowapi logs at INFO (`"Rate limit storage recovered"`). +- **Logging policy:** the `if not self._storage_dead` guard around the WARNING means it fires once per outage transition, exactly matching our spec intent. +- **Concurrency:** each worker has its own `_storage_dead` flag and its own fallback `MemoryStorage`. Per-worker outage tracking is correct (a worker that can't reach Redis is independently degraded), and the fallback is per-worker, so during an outage the effective cap loosens to `N × settings.rate_limit` per IP — same as our designed §3 behavior. + +This means we write **zero custom storage code**. The plan only needs to wire `in_memory_fallback_enabled=True` into the `Limiter` construction and verify the wiring at the unit-test level. + +The exponential-backoff recovery cadence (rather than a fixed 30 s window) is strictly better for the operator: gentler on a recovering Redis under load, faster recovery for short blips. + +### 4.4 `Dockerfile` + +Single change: switch the `CMD` from exec form to shell form so `${PC2NUTS_WORKERS}` expands at container start. + +```dockerfile +CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${PC2NUTS_WORKERS:-1}"] +``` + +The default `:-1` keeps single-worker behavior when the env var is unset. The shell-form CMD does mean the container's PID 1 is `sh`, which is acceptable for this service (no signal-handling-sensitive workloads); uvicorn forwards SIGTERM correctly when invoked this way. If signal handling becomes a concern later, switching to `exec uvicorn …` inside the shell command preserves the env expansion while making uvicorn PID 1. + +### 4.5 `requirements.lock` + +Add a `redis` runtime dependency. Easiest path is `slowapi[redis]` or `limits[redis]` (whichever extra is exposed by the version slowapi pins); fallback is a direct `redis` pin matching the major-version range that `limits` expects (concrete numbers chosen at implementation time — see §10). + +The dependency is loaded eagerly at import time by `app/limiter.py` only when `rate_limit_storage_uri` is set; single-host deployers who never set the env var still pay the install-size cost but no runtime cost. Acceptable trade-off. + +### 4.6 `app/main.py` + +- Remove the inline `limiter = Limiter(key_func=get_remote_address)` line. +- Add `from app.limiter import limiter` near the existing slowapi imports. +- Everything else (`app.state.limiter = limiter`, the `RateLimitExceeded` handler, the `@limiter.limit(...)` decorators on `/lookup` and `/lookup/{country}/{postal}`) stays exactly as it is. + +--- + +## 5. Data flow + +### Single-worker deploy (default, unchanged) + +``` +Request → uvicorn (1 worker) → FastAPI → @limiter.limit → MemoryStorage (in-process dict) → handler +``` + +Identical to today. Trusted-token requests skip the limiter via `exempt_when`. + +### Multi-worker deploy (new) + +``` +Request → uvicorn (N workers, OS distributes) → FastAPI → @limiter.limit + → slowapi Limiter (in_memory_fallback_enabled=True) + _storage_dead=False: → Redis (shared across workers) → handler + _storage_dead=True: → per-worker MemoryStorage fallback → handler + (cap effectively loosened to N× during outage window; + slowapi re-probes Redis with exponential backoff) +``` + +Trusted-token requests still skip the limiter entirely; the storage call is not made for them. + +--- + +## 6. Error handling + +The fail-degraded path lives entirely inside slowapi (`in_memory_fallback_enabled=True`). Behavior matrix, derived from `slowapi/extension.py`: + +| Condition | slowapi behavior | Cap during this state | +|---|---|---| +| Redis healthy | All counters in Redis, shared across workers | Strict `settings.rate_limit` per IP | +| Redis raises (any exception) | `_storage_dead = True`; logs WARNING once; routes subsequent checks to per-worker `MemoryStorage` | `N × settings.rate_limit` per IP per worker | +| Redis recovery probe (every `2^N` seconds, exponential backoff) succeeds | `_storage_dead = False`; logs INFO once; back to Redis | Strict on next request | +| Redis still down at next probe | Probe fails silently; backoff continues; no new log | Continues N× per worker | + +slowapi catches `Exception` broadly around its storage interactions, so connection errors, timeouts, and redis-py-specific errors are all handled. Programming errors in our own code paths still bubble up. + +The existing `_rate_limit_handler` for `RateLimitExceeded` is unchanged. Trusted-token requests still bypass entirely (the `exempt_when` check fires before the storage call). + +### Documented degraded mode + +`README.md` adds a short note in the rate-limit section: "When `PC2NUTS_RATE_LIMIT_STORAGE_URI` is set and the configured backend is unreachable, the service falls back to per-process in-memory rate limiting (slowapi `in_memory_fallback_enabled`). During the outage window, the effective per-IP cap is `PC2NUTS_WORKERS × rate_limit`. slowapi re-probes the primary storage with exponential backoff and resumes shared rate limiting on recovery." + +--- + +## 7. Testing + +### Unchanged (passes as-is) + +- All existing rate-limit tests run against the default in-memory `Limiter` and continue to pass byte-for-byte. The `Limiter` construction path for `rate_limit_storage_uri == None` is exactly the current code. + +### New tests + +Unit: + +- **`test_limiter_default_uses_memory_storage`** — when `rate_limit_storage_uri` is unset, `app.limiter.limiter._storage_uri` is the slowapi default (`None` or `"memory://"`) and `_in_memory_fallback_enabled` is `False`. Confirms the no-config path is byte-for-byte the current behavior. +- **`test_limiter_with_redis_uri_enables_fallback`** — when `rate_limit_storage_uri="redis://localhost:6379/0"` is configured (via `monkeypatch.setenv`), `app.limiter.limiter._storage_uri` is the configured URI and `_in_memory_fallback_enabled` is `True`. Does not contact Redis (Limiter constructor builds the storage object lazily-enough that no connection is opened until first request, but to be safe the test asserts on the constructor-level attributes only). + +Startup validator: + +- **`test_workers_gt_1_without_storage_uri_fails_startup`** — instantiate `Settings(workers=2, rate_limit_storage_uri=None)` and assert `ValidationError`. Asserts the validator fires. +- **`test_workers_gt_1_with_storage_uri_succeeds`** — `Settings(workers=2, rate_limit_storage_uri="redis://localhost:6379/0")` constructs cleanly without contacting Redis. +- **`test_workers_eq_1_without_storage_uri_succeeds`** — defaults still validate (regression guard). + +Integration (out of scope for the implementation PR — left as a separate task if CI gains a Redis service container): + +- A future integration test could spin up an ephemeral Redis, run two workers via `uvicorn --workers 2`, fire `>120` requests in a minute from a single client, assert the cap is observed across workers. The slowapi fail-degraded path is library code (already tested upstream), so the implementation PR does not need to re-test it; we only verify that our wiring activates the right slowapi configuration. + +--- + +## 8. Documentation + +- **`README.md`** — add a "Multi-worker deployment" subsection under the existing Configuration section: documents `PC2NUTS_WORKERS`, `PC2NUTS_RATE_LIMIT_STORAGE_URI`, the fail-degraded behavior during Redis outages, and the startup-validation guard. Cross-links to issue #68 in the changelog entry. +- **`CHANGELOG.md`** — entry under Unreleased: "Added multi-worker deployment via `PC2NUTS_WORKERS` env var. Multi-worker mode requires `PC2NUTS_RATE_LIMIT_STORAGE_URI` (e.g. a Redis URL) so the per-IP rate limit stays strict across workers; transient backend unavailability is tolerated via per-worker in-memory fallback for 30 s windows." +- **`docs/performance.md`** — no changes in this PR. The re-baseline run (acceptance criterion in #68) is post-deploy operational work. + +--- + +## 9. Acceptance criteria mapping (from issue #68) + +| Issue criterion | Where addressed | +|---|---| +| `Dockerfile` updated to launch N workers (configurable via env var) | §4.4 | +| Memory usage measured against the container's allocated memory; document the headroom | Out of scope for this PR — operational follow-up. README documents the per-worker memory ballpark from the issue body. | +| Rate-limit behaviour with N workers documented in `README.md` | §8 — option (a) Redis chosen, behavior documented including degraded mode | +| Performance baseline re-run; `docs/performance.md` updated | Out of scope for this PR — separate operational task post-deploy | + +The two "out of scope" items are operational/empirical work that can only happen against the deployed multi-worker container. They are tracked as a follow-up task in the issue rather than blocking the implementation PR. + +--- + +## 10. Open questions deferred to implementation + +- **Redis client library version pin.** `redis` (redis-py) — exact version range driven by what `limits 5.8.0` requires. The `limits[redis]` extra is the cleanest path; falls back to a direct pin if needed. Confirmed at implementation time. +- **Pydantic validator semantics.** Confirmed: project is on Pydantic v2 (`pydantic==2.12.5`, `pydantic-settings==2.13.0` per `requirements.lock`), so `@model_validator(mode="after")` is the target. + +These are mechanical questions that don't change the architecture; they're called out so the implementation plan addresses them rather than stumbling on them. diff --git a/requirements.lock b/requirements.lock index 9bbcf9d..1bbf077 100644 --- a/requirements.lock +++ b/requirements.lock @@ -17,6 +17,7 @@ pydantic==2.12.5 pydantic-settings==2.13.0 python-dotenv==1.2.2 PyYAML==6.0.1 +redis==7.4.0 slowapi==0.1.9 starlette==0.52.1 typing_extensions==4.15.0 diff --git a/requirements.txt b/requirements.txt index 1a76291..cb1dd63 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ httpx>=0.28.1,<1 pydantic>=2.13.3,<3 pydantic-settings>=2.14.0,<3 slowapi>=0.1.9,<1 +limits[redis]>=2.3 python-dotenv>=1.2.2,<2 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..73802fb --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,34 @@ +"""Tests for app.config.Settings.""" + +import pytest +from pydantic import ValidationError + +from app.config import Settings + + +class TestWorkersValidator: + def test_workers_eq_1_without_storage_uri_succeeds(self): + """Default config must keep validating — single-worker, no storage URI.""" + s = Settings(workers=1, rate_limit_storage_uri=None) + assert s.workers == 1 + assert s.rate_limit_storage_uri is None + + def test_workers_gt_1_with_storage_uri_succeeds(self): + """Multi-worker is permitted when a storage URI is configured.""" + s = Settings(workers=4, rate_limit_storage_uri="redis://localhost:6379/0") + assert s.workers == 4 + assert s.rate_limit_storage_uri == "redis://localhost:6379/0" + + def test_workers_gt_1_without_storage_uri_fails_startup(self): + """The unsafe combination must raise — silent cap loosening is the + failure mode this validator exists to prevent.""" + with pytest.raises(ValidationError) as excinfo: + Settings(workers=2, rate_limit_storage_uri=None) + msg = str(excinfo.value) + assert "PC2NUTS_WORKERS" in msg + assert "PC2NUTS_RATE_LIMIT_STORAGE_URI" in msg + + def test_workers_gt_1_with_empty_storage_uri_fails_startup(self): + """Empty string should be treated the same as None — both mean unset.""" + with pytest.raises(ValidationError): + Settings(workers=2, rate_limit_storage_uri="") diff --git a/tests/test_limiter.py b/tests/test_limiter.py new file mode 100644 index 0000000..0bf9617 --- /dev/null +++ b/tests/test_limiter.py @@ -0,0 +1,54 @@ +"""Tests for app.limiter — verify the Limiter is wired with the right storage +mode based on settings.rate_limit_storage_uri.""" + +import importlib + +import pytest + + +def _reload_limiter(): + """Reload app.config and app.limiter so the module-level Limiter picks up + the current env. Returns the freshly-imported app.limiter module.""" + import app.config + import app.limiter + + importlib.reload(app.config) + importlib.reload(app.limiter) + return app.limiter + + +class TestLimiterStorageSelection: + def test_limiter_default_uses_memory_storage(self, monkeypatch): + """When no storage URI is set, the Limiter is constructed with the + slowapi default (in-process memory) and fallback is disabled. This is + the byte-for-byte current behaviour.""" + monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False) + monkeypatch.delenv("PC2NUTS_WORKERS", raising=False) + + mod = _reload_limiter() + + # slowapi stores the configured URI on _storage_uri; default is None. + assert mod.limiter._storage_uri is None + # in_memory_fallback_enabled is only meaningful when a URI is set. + assert mod.limiter._in_memory_fallback_enabled is False + + def test_limiter_with_redis_uri_enables_fallback(self, monkeypatch): + """When a storage URI is set, the Limiter is constructed with that URI + AND in_memory_fallback_enabled=True. No network call should happen at + construction time — slowapi builds the storage object lazily.""" + monkeypatch.setenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", "redis://localhost:6379/0") + monkeypatch.setenv("PC2NUTS_WORKERS", "2") + + mod = _reload_limiter() + + assert mod.limiter._storage_uri == "redis://localhost:6379/0" + assert mod.limiter._in_memory_fallback_enabled is True + + @pytest.fixture(autouse=True) + def _restore_default_after_each_test(self, monkeypatch): + """After each test, force a reload back to defaults so other tests + in the suite see the unmodified module.""" + yield + monkeypatch.delenv("PC2NUTS_RATE_LIMIT_STORAGE_URI", raising=False) + monkeypatch.delenv("PC2NUTS_WORKERS", raising=False) + _reload_limiter()