diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml deleted file mode 100644 index 5346d409..00000000 --- a/.github/workflows/nightly.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: recotem / nightly - -# Runs the full test suite including @pytest.mark.slow tests (e.g. MovieLens100K e2e). -# Scheduled daily at 06:00 UTC against the main branch. -# Also triggerable manually via workflow_dispatch for ad-hoc full runs. -# -# Guard: only runs on the canonical upstream repository. -# Forks inherit the workflow file but the `if` below prevents billing them. -on: - schedule: - - cron: "0 6 * * *" - workflow_dispatch: - -permissions: - contents: read - -jobs: - slow-tests: - name: pytest (full suite including slow) - # Prevent forks from running scheduled jobs against their own repo - # and consuming Actions minutes unintentionally. - if: github.repository == 'codelibs/recotem' - runs-on: ubuntu-24.04 - strategy: - fail-fast: false - matrix: - python-version: ["3.12"] - steps: - - uses: actions/checkout@v6 - with: - ref: main - - - uses: astral-sh/setup-uv@v8.1.0 - with: - enable-cache: true - python-version: ${{ matrix.python-version }} - - - name: Install dependencies (all extras) - run: uv sync --frozen --dev --extra bigquery --extra s3 --extra gcs --extra metrics --extra postgres --extra mysql --extra sqlite --extra ga4 - - - name: Run full test suite (including slow and fuzz) - run: | - uv run pytest tests/unit tests/integration tests/fuzz \ - --override-ini='addopts=' \ - --tb=short \ - -q \ - --cov=src/recotem \ - --cov-report=term-missing \ - --cov-report=xml:coverage-nightly.xml - - - name: Upload coverage - uses: actions/upload-artifact@v7 - if: always() - with: - name: coverage-nightly-${{ matrix.python-version }} - path: coverage-nightly.xml - - slow-e2e: - name: e2e slow (MovieLens100K) - if: github.repository == 'codelibs/recotem' - runs-on: ubuntu-24.04 - needs: slow-tests - steps: - - uses: actions/checkout@v6 - with: - ref: main - - - uses: astral-sh/setup-uv@v8.1.0 - with: - enable-cache: true - - - name: Install dependencies - run: uv sync --frozen --dev --extra bigquery --extra s3 --extra gcs --extra metrics --extra postgres --extra mysql --extra sqlite --extra ga4 - - - name: Install recotem - run: uv pip install --no-deps . - - - name: Run e2e script (including slow paths) - run: | - chmod +x tests/e2e/run.sh - uv run bash tests/e2e/run.sh 2>&1 | tee /tmp/recotem-e2e-nightly.log - exit "${PIPESTATUS[0]}" - env: - RECOTEM_LOG_FORMAT: json - # Test signing key — not a production value. - RECOTEM_SIGNING_KEYS: "test-key:0000000000000000000000000000000000000000000000000000000000000000" - RECOTEM_SLOW: "1" - - - name: Upload e2e logs - uses: actions/upload-artifact@v7 - if: always() - with: - name: e2e-nightly-logs - path: /tmp/recotem-e2e-nightly.log diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d66ed09..ac9d0715 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -145,10 +145,17 @@ jobs: echo "Scanning log files in $LOG_DIR ..." - # Pattern 1: raw sha256 hex digest (signing key or hash leaked) - if grep -rEq 'sha256:[0-9a-f]{64}' "$LOG_DIR"; then + # Pattern 1: raw sha256 hex digest (signing key or hash leaked). + # Excludes the public X-Recotem-Model-Version header and the + # corresponding JSON "model_version" response field, which carry + # the artifact content hash by design and are not secrets. + pat1_hits=$(grep -rEn 'sha256:[0-9a-f]{64}' "$LOG_DIR" \ + | grep -v '"model_version"[[:space:]]*:[[:space:]]*"sha256:' \ + | grep -vi 'x-recotem-model-version:[[:space:]]*sha256:' \ + || true) + if [ -n "$pat1_hits" ]; then echo "FAIL: Found sha256: in log output." - grep -rEn 'sha256:[0-9a-f]{64}' "$LOG_DIR" | head -5 + echo "$pat1_hits" | head -5 FAIL=1 fi diff --git a/.gitignore b/.gitignore index c5b4f071..cc14e8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,8 @@ az:/ # Tools .serena/ + +# Local working docs (plans, specs) — not for the public repo +docs/plans/ +docs/specs/ + diff --git a/CLAUDE.md b/CLAUDE.md index 0b5cb706..1e60ade2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ single Python package (`pip install recotem`) plus a single Docker image. ├──────────────────────────────────────────────────────────────────┤ │ CLI (Typer) │ │ ├─ recotem train batch: fetch→train→sign │ -│ ├─ recotem serve --recipes FastAPI /predict │ +│ ├─ recotem serve --recipes FastAPI /v1/recipes/:* │ │ ├─ recotem inspect read header (no payload) │ │ ├─ recotem validate schema + connectivity │ │ ├─ recotem schema emit JSON Schema for IDEs │ @@ -52,12 +52,12 @@ src/recotem/ tests/ ├── unit/ per-module tests (recipe, artifact, training, ...) -├── integration/ in-process train + serve + predict +├── integration/ in-process train + serve + recommend ├── fuzz/ hypothesis byte mutations on artifact / recipe loaders -└── e2e/ bash script: train → serve → curl /predict +└── e2e/ bash script: train → serve → curl /v1/recipes/{name}:recommend docs/ -├── getting-started.md Docker / pip walkthrough → train → /predict +├── getting-started.md Docker / pip walkthrough → train → /v1/recipes/{name}:recommend ├── recipe-reference.md every recipe field, type, default, validation ├── data-sources/ bigquery.md, csv.md, ga4.md, sql.md ├── deployment/ docker.md, k8s.md, cron.md @@ -90,16 +90,16 @@ uv run recotem train examples/tutorial-purchase-log/recipe.yaml # Serve from a directory of recipes uv run recotem serve --recipes ./recipes/ --port 8080 -# Predict -curl -X POST http://localhost:8080/predict/news_articles \ +# Recommend +curl -X POST http://localhost:8080/v1/recipes/news_articles:recommend \ -H "X-API-Key: " \ -H "Content-Type: application/json" \ - -d '{"user_id":"u1","cutoff":10}' + -d '{"user_id":"u1","limit":10}' ``` ## Recipe model -A recipe is the single source of truth: 1 YAML = 1 model = 1 `/predict/{name}`. +A recipe is the single source of truth: 1 YAML = 1 model = 1 `/v1/recipes/{name}:recommend` (plus the related/batch verbs). See `docs/recipe-reference.md` for the full schema. Highlights: - `source.type` is a discriminator (`csv` | `parquet` | `bigquery` | `sql` | `ga4` | plugins). @@ -146,9 +146,11 @@ Binary container `magic | version | reserved | kid | hmac | header_json | payloa `uv run ruff format src tests`). Line-length 88. Selected rules in `pyproject.toml`. - pytest 8 + hypothesis 6. `@pytest.mark.slow` deselected by default. -- `from __future__ import annotations` is used everywhere except where it - breaks FastAPI dependency introspection (e.g. `routes.py` uses - `kid: str = Depends(_require_auth)` instead of `Annotated[...]`). +- `from __future__ import annotations` is used everywhere, including the + serving router. FastAPI dependency arguments are written as + `kid: str = Depends(_require_auth)` (not `Annotated[...]`) in + `serving/routes.py` so that `Depends` is resolved as a runtime + default rather than a stringified annotation. - structlog logger per module; the redaction processor in `recotem.log_redaction` is first in the chain and strips API keys, signing keys, and cloud creds. Lives at the top level so `train`-only invocations do @@ -208,7 +210,7 @@ uv run ruff format --check src tests | `RECOTEM_MAX_PAYLOAD_BYTES` | 512 MiB | Per-payload cap (post-HMAC-verify) for serve-side deserialization. Clamped [1 MiB, 16 GiB]. Smaller than `RECOTEM_MAX_ARTIFACT_BYTES` to bound deserialization memory expansion. | | `RECOTEM_ARTIFACT_ROOT` | (empty) | If set, local `output.path` must lie under it. | | `RECOTEM_RECIPE_*` | — | Allow-listed for `${...}` recipe expansion. | -| `RECOTEM_METADATA_FIELD_DENY` | (empty) | Comma-separated columns stripped from `/predict` responses. | +| `RECOTEM_METADATA_FIELD_DENY` | (empty) | Comma-separated columns stripped from `/v1/recipes/{name}:recommend` and `:recommend-related` responses. | | `RECOTEM_METRICS_ENABLED` | (empty) | Opt-in Prometheus `/metrics` endpoint. Truthy values: `1`, `true`, `yes`, `on`. Requires `recotem[metrics]` extra. | | `RECOTEM_LOCK_DIR` | (empty) | Override directory for per-recipe training lock files. Local outputs always lock at `<output_path>.lock`; remote outputs (`s3://`, `gs://`, ...) need a host-local path and fall back to `<tempdir>/recotem-locks/`. `flock` is host-local — across hosts use scheduler-level mutex (`concurrencyPolicy: Forbid`). | | `RECOTEM_BQ_REQUIRE_STORAGE_API` | (empty) | When truthy (`1`/`true`/`yes`/`on`), the BigQuery source raises `DataSourceError` instead of falling back to the REST path when the Storage Read API fails. Requires the service account to hold `bigquery.readSessions.create`. | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 50699f6e..abfe0acc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,7 +8,7 @@ Recotem is a single Python package with two execution modes: - `recotem train recipe.yaml` — fetch → train → write a signed artifact. - `recotem serve --recipes <dir>` — FastAPI server that watches the dir and - serves `/predict/{name}` for every loaded recipe. + serves `/v1/recipes/{name}:*` for every loaded recipe. The two modes communicate only via the signed artifact file format, so the trainer and the server can run on completely separate hosts. diff --git a/README.md b/README.md index 48eacb1c..e2b7abe2 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,9 @@ Recipe-driven recommender training and serving, built on [irspack](https://github.com/tohtsky/irspack). One YAML recipe describes where the data lives, how to train, and where to write the result — `recotem train` produces a signed binary artifact, `recotem serve` -mounts it as a `/predict/{name}` HTTP endpoint and hot-swaps when a new -artifact appears. No database, no message broker, no admin UI. +mounts it under `/v1/recipes/{name}:recommend` (plus `:recommend-related` +and batch verbs) and hot-swaps when a new artifact appears. No database, +no message broker, no admin UI. ## Why Recotem @@ -32,7 +33,7 @@ moving parts to a recipe file and a binary artifact: ## Features -- Recipe-driven: 1 YAML = 1 model = 1 `/predict/{name}` endpoint +- Recipe-driven: 1 YAML = 1 model = 1 `/v1/recipes/{name}:recommend` endpoint (with related/batch verbs) - Hyperparameter search across irspack algorithms via Optuna - Pluggable data sources (built-in: CSV / Parquet / BigQuery / SQL / GA4; extend via Python entry points) - HMAC-signed artifacts with multi-key rotation and a deterministic @@ -84,21 +85,28 @@ recotem train examples/quickstart/recipe.yaml recotem serve --recipes examples/quickstart/ & # Wait for the server to become ready before sending traffic. -until curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health | grep -q "200"; do sleep 1; done +until curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/v1/health | grep -q "200"; do sleep 1; done -# 3. Predict -curl -X POST http://localhost:8080/predict/top_picks \ +# 3. Recommend +# 3a. Recommend for a known user +curl -X POST http://localhost:8080/v1/recipes/top_picks:recommend \ -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ -H "Content-Type: application/json" \ - -d '{"user_id": "u01", "cutoff": 5}' + -d '{"user_id": "u01", "limit": 5}' + +# 3b. Recommend items related to a seed item +curl -X POST http://localhost:8080/v1/recipes/top_picks:recommend-related \ + -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ + -H "Content-Type: application/json" \ + -d '{"seed_items": ["i00"], "limit": 5}' ``` ```json { - "items": [{"item_id": "i00", "score": 0.91}], - "model": {"recipe": "top_picks", "trained_at": "...", - "best_class": "TopPopRecommender", "kid": "dev"}, - "request_id": "..." + "request_id": "req_01HZX...", + "recipe": "top_picks", + "model_version": "sha256:abc...", + "items": [{"item_id": "i00", "score": 0.91}] } ``` @@ -112,8 +120,8 @@ for the source of truth and | Variable | Required by | Purpose | |---|---|---| | `RECOTEM_SIGNING_KEYS` | `train` and `serve` | HMAC sign / verify artifact files (server keeps plaintext; needed for both sides) | -| `RECOTEM_API_KEYS` | `serve` | Authenticate `/predict` callers (server keeps **hash** only) | -| `X-API-Key: <plaintext>` | HTTP clients | Sent by clients on every `/predict` call; server re-hashes and compares | +| `RECOTEM_API_KEYS` | `serve` | Authenticate `/v1/recipes/*` callers (server keeps **hash** only) | +| `X-API-Key: <plaintext>` | HTTP clients | Sent by clients on every `/v1/recipes/*` call; server re-hashes and compares | Both variables accept multiple comma-separated entries (`kid:value,kid2:value,…`) to enable zero-downtime key rotation — that is why they are pluralised. @@ -129,7 +137,7 @@ to enable zero-downtime key rotation — that is why they are pluralised. │ (batch job) (HMAC-signed) (FastAPI, │ │ hot-swap) │ │ │ -│ any scheduler local FS, S3, POST /predict/{name}│ +│ any scheduler local FS, S3, POST /v1/recipes/{name} │ │ (cron / k8s / …) GCS, fsspec X-API-Key auth │ │ │ └────────────────────────────────────────────────────────────────────────┘ diff --git a/compose.yaml b/compose.yaml index c9fc7d80..e02b9506 100644 --- a/compose.yaml +++ b/compose.yaml @@ -20,10 +20,10 @@ # 3. Serve + curl # $ RECOTEM_SIGNING_KEYS="..." RECOTEM_API_KEYS="..." \ # docker compose up -d serve -# $ curl -sX POST http://localhost:8080/predict/purchase_log \ +# $ curl -sX POST http://localhost:8080/v1/recipes/purchase_log:recommend \ # -H "X-API-Key: <api plaintext>" \ # -H "Content-Type: application/json" \ -# -d '{"user_id": "1", "cutoff": 5}' +# -d '{"user_id": "1", "limit": 5}' x-recotem-image: &recotem-image image: ghcr.io/codelibs/recotem:latest diff --git a/docs/README.md b/docs/README.md index c43aa002..b78b8a64 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,7 +2,7 @@ ## Getting started -- [Getting started](getting-started.md) — install (Docker or pip), train from a public CSV, curl `/predict` +- [Getting started](getting-started.md) — install (Docker or pip), train from a public CSV, curl `/v1/recipes/{name}:recommend` ## Reference diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 00000000..ed188718 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,175 @@ +# recotem v1 API Reference + +Authoritative reference for the v1 HTTP surface mounted under `/v1`. + +## Authentication + +All endpoints except `/v1/health` require the `X-API-Key` header. See +`docs/security.md` for key rotation procedures. + +## Endpoints + +### `POST /v1/recipes/{name}:recommend` +Single-user recommendation. + +**Path parameters:** `name` matches `^[A-Za-z0-9_-]{1,64}$` (same as the +recipe-name constraint enforced by the recipe loader). + +**Request body:** + +| field | type | required | default | notes | +|---|---|---|---|---| +| `user_id` | string | yes | – | 1-256 chars | +| `limit` | int | no | 10 | 1..1000 | +| `exclude_items` | string[] \| null | no | null | ≤1000 items | + +**Response body:** see `RecommendResponse` in `src/recotem/serving/schemas.py`. + +**Status codes:** 200, 401, 404 (`UNKNOWN_USER` | `RECIPE_NOT_FOUND`), 422 (`VALIDATION_ERROR`), 503 (`RECIPE_UNAVAILABLE`). + +### `POST /v1/recipes/{name}:recommend-related` +Seed-item → items. + +**Request body:** + +| field | type | required | default | notes | +|---|---|---|---|---| +| `seed_items` | string[] | yes | – | 1-100 items | +| `limit` | int | no | 10 | 1..1000 | +| `exclude_items` | string[] \| null | no | null | | + +**Status codes:** 200, 401, 404 (`UNKNOWN_SEED_ITEMS` | `NO_CANDIDATES` | `RECIPE_NOT_FOUND`), 422 (`VALIDATION_ERROR`), 503 (`RECIPE_UNAVAILABLE`). + +`UNKNOWN_SEED_ITEMS` means none of the supplied `seed_items` were known +to the model id-map (typically a client-side data issue). +`NO_CANDIDATES` means at least one seed was known but the ranker did not +produce any survivors after its internal filtering — typically a data +distribution issue rather than a client mistake. + +### `POST /v1/recipes/{name}:batch-recommend` +Multi-user batch. Body: `{ "requests": RecommendRequest[], "include_metadata": bool }` (1..256). +Response: `BatchRecommendResponse`. Per-element `status` ∈ {ok, error}. +HTTP 200 on partial failure; HTTP 503 only when the recipe itself is +unavailable. + +`include_metadata` (default `false`): when `true`, each `ok` result +includes per-item metadata fields (same join as the single-recommend +endpoint). Default `false` preserves the performance-first default for +bulk callers. + +The aggregate `sum(requests[].limit)` must not exceed **5000**. When a +sub-request would push the running aggregate over the cap, that element +surfaces as `status=error, code=VALIDATION_ERROR` and processing of +subsequent elements continues — earlier elements are unaffected. The +list size cap (1..256) is enforced at the schema level (whole-request +422 if violated); per-element schema failures are surfaced per-element +so a single bad entry never 422s the whole batch. + +**Status codes:** 200, 401, 404 (`RECIPE_NOT_FOUND`), 422 (`VALIDATION_ERROR` — only for whole-request shape, e.g. missing `requests` key, list too large), 503 (`RECIPE_UNAVAILABLE`). + +> **Note:** batch endpoints return `{item_id, score}` only by default +> (`include_metadata=false`). Set `include_metadata: true` to include +> per-item metadata fields (same join as single-recommend endpoints). +> Be aware that metadata enrichment increases response size; for bulk callers +> that do not need metadata the default `false` is recommended. + +### `POST /v1/recipes/{name}:batch-recommend-related` +Multi-seed batch. Body: `{ "requests": RecommendRelatedRequest[], "include_metadata": bool }` (1..256). +Same aggregate-limit, per-element validation rules, and `include_metadata` +semantics as `:batch-recommend`. + +**Status codes:** 200, 401, 404 (`RECIPE_NOT_FOUND`), 422 (`VALIDATION_ERROR` — only for whole-request shape), 503 (`RECIPE_UNAVAILABLE`). + +### `GET /v1/recipes` +Authenticated. Returns `RecipesListResponse` with one entry per loaded +recipe. + +### `GET /v1/recipes/{name}` +Authenticated. Returns `RecipeDetailResponse` or 404 (`RECIPE_NOT_FOUND`). + +**Status codes:** 200, 401, 404 (`RECIPE_NOT_FOUND`), 503 (`RECIPE_UNAVAILABLE`). + +### `GET /v1/health` +Unauthenticated. Returns `{status, total, loaded}`. Body status is +`"ok"` when every registered recipe is loaded, `"degraded"` otherwise. +The HTTP response code mirrors body status: **200 OK** when ok, **503 +Service Unavailable** when degraded — so K8s readiness probes pointing +at this endpoint mark the pod NotReady whenever any recipe is +unloaded. + +### `GET /v1/health/details` +Authenticated. Returns `{status, recipes: {name: health}}`. Same 200 +/ 503 status-code rule as `/v1/health`. + +### `GET /v1/metrics` +Prometheus exposition. Excluded from OpenAPI. Requires +`RECOTEM_METRICS_ENABLED` to be truthy at startup. + +**Requires `X-API-Key`** — configure your Prometheus scraper with an +`authorization` block or `http_headers` accordingly. + +## Headers + +- `X-Request-ID` — accepted (regex `^[A-Za-z0-9_-]{1,128}$`) or generated; + always echoed in the response. When missing or invalid the server + substitutes a 12-char hex string. Handlers read the validated value + from `request.state.request_id`, so the body field and response header + always agree. +- `X-Recotem-Model-Version` — present on every successful recommend + response; mirrors `model_version` in the body. +- `X-Recotem-Items-Degraded` — present on `:recommend` and + `:recommend-related` responses only when one or more items could not be + fully serialized with metadata. The value is the total count of items + that fell back to bare `{item_id, score}` (fallback) or were omitted + entirely (dropped) due to metadata serialization failures. Absent when + all items serialize cleanly. **Not sent** on `:batch-recommend` or + `:batch-recommend-related` endpoints. + +## Error body shape + +All v1 error responses share a flat envelope at the top of the body: + +```json +{"detail": "<human-readable message>", "code": "<MACHINE_CODE>"} +``` + +There is no nested `{"detail": {"detail": ..., "code": ...}}` form — +clients parse `body["detail"]` and `body["code"]` directly. + +**422 validation errors** add a per-field breakdown from FastAPI / +Pydantic and include the request ID so the body is correlatable with the +`X-Request-ID` response header: + +```json +{ + "request_id": "<id matching X-Request-ID>", + "detail": "Request validation failed", + "code": "VALIDATION_ERROR", + "errors": [{"loc": ["body", "limit"], "msg": "...", "type": "..."}] +} +``` + +**500 unhandled errors** flatten to: + +```json +{"detail": "internal error", "code": "INTERNAL_ERROR"} +``` + +Each endpoint above lists the status codes it can emit; the body shape +in every error case is one of the three forms above. + +## Error Code Table + +| code | HTTP | when | +|---|---|---| +| `RECIPE_UNAVAILABLE` | 503 | recipe not loaded | +| `RECIPE_NOT_FOUND` | 404 | no such recipe in registry | +| `UNKNOWN_USER` | 404 | user not in idmap | +| `UNKNOWN_SEED_ITEMS` | 404 | none of seed_items known to model | +| `NO_CANDIDATES` | 404 | seeds known, but ranker produced no survivors | +| `VALIDATION_ERROR` | 422 | Pydantic schema rejected the request (also used per-element inside batch responses) | +| `MISSING_API_KEY` | 401 | `X-API-Key` header missing | +| `INVALID_API_KEY` | 401 | `X-API-Key` header present but did not match any configured digest (also covers short-key / oversize-key rejections so callers cannot fingerprint the guard) | +| `INTERNAL_ERROR` | 500 / batch | unhandled server-side exception, or unexpected recommender internal layout (`recommender_layout_unexpected`) — status=500 on single endpoints; per-element `status=error` inside batch responses | + +All v1 codes use `UPPER_SNAKE_CASE`. diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index 68e440dd..6c71423a 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -158,7 +158,7 @@ docker run --rm \ | `RECOTEM_ENV` | no | `""` | `--insecure-no-auth` permitted when set to `development`, `dev`, or `test`; `--dev-allow-unsigned` permitted only when set to `development`. | | `RECOTEM_ARTIFACT_ROOT` | no | `""` | If set, local `output.path` must resolve under this directory (symlink-escape guard) | | `RECOTEM_LOCK_DIR` | no | `""` | Override directory for per-recipe training lock files. Needed when `output.path` is a remote URI (lock files must be host-local). Falls back to a temp dir under the system temp directory. | -| `RECOTEM_METADATA_FIELD_DENY` | no | `""` | Comma-separated column names stripped from `/predict` responses after the metadata join | +| `RECOTEM_METADATA_FIELD_DENY` | no | `""` | Comma-separated column names stripped from `/v1/recipes/{name}:recommend` and `:recommend-related` responses after the metadata join | | `RECOTEM_METRICS_ENABLED` | no | `""` | Set to `1`/`true`/`yes`/`on` to enable the Prometheus `/metrics` endpoint. Requires `recotem[metrics]` extra. | | `RECOTEM_STARTUP_PARALLELISM` | no | `""` (auto) | Number of parallel threads used to load artifacts at startup. Default is `min(len(recipes), 8)`. Clamped 1–32. Set to `1` for sequential loading (useful for memory-constrained environments or debugging). | diff --git a/docs/getting-started.md b/docs/getting-started.md index a6198347..22c4079c 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -67,32 +67,45 @@ docker compose logs --no-color -n 20 serve Health check: ```bash -curl http://localhost:8080/health -# {"status":"ok","recipes":{"purchase_log":{"loaded":true,...}}} +curl http://localhost:8080/v1/health +# {"status":"ok","total":1,"loaded":1} ``` -### 4. Predict +### 4. Recommend ```bash -curl -sX POST http://localhost:8080/predict/purchase_log \ +curl -sX POST http://localhost:8080/v1/recipes/purchase_log:recommend \ -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ -H "Content-Type: application/json" \ - -d '{"user_id": "1", "cutoff": 5}' | jq . + -d '{"user_id": "1", "limit": 5}' | jq . ``` Expected (the exact items / scores depend on training): ```json { + "request_id": "req_01HZX...", + "recipe": "purchase_log", + "model_version": "sha256:abc...", "items": [ {"item_id": "...", "score": 0.91}, ... - ], - "model": {"recipe": "purchase_log", "best_class": "IALSRecommender", "kid": "dev"}, - "request_id": "..." + ] } ``` +### 4b. Recommend related items + +`:recommend-related` returns items similar to one or more seed items — +useful for "related products" widgets or content carousels: + +```bash +curl -sX POST http://localhost:8080/v1/recipes/purchase_log:recommend-related \ + -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ + -H "Content-Type: application/json" \ + -d '{"seed_items": ["<item_id>"], "limit": 5}' | jq . +``` + ### 5. Tear down ```bash @@ -147,13 +160,13 @@ recotem train examples/tutorial-purchase-log/recipe.yaml recotem serve --recipes examples/tutorial-purchase-log/ ``` -### 4. Predict +### 4. Recommend ```bash -curl -sX POST http://127.0.0.1:8080/predict/purchase_log \ +curl -sX POST http://127.0.0.1:8080/v1/recipes/purchase_log:recommend \ -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ -H "Content-Type: application/json" \ - -d '{"user_id": "1", "cutoff": 5}' | jq . + -d '{"user_id": "1", "limit": 5}' | jq . ``` ## What just happened @@ -163,8 +176,8 @@ curl -sX POST http://127.0.0.1:8080/predict/purchase_log \ IALS and TopPop, and wrote a binary artifact signed with your signing key. - `recotem serve` watched the artifact directory, picked up the new file, HMAC-verified it against the same signing key, and registered the - `/predict/purchase_log` endpoint. -- The `/predict` request was authenticated by the API key allow-list and + `/v1/recipes/purchase_log:recommend` endpoint. +- The recommend request was authenticated by the API key allow-list and scored using the trained model. ## Train from SQLite (zero cloud, zero Docker) @@ -194,8 +207,8 @@ See `docs/data-sources/sql.md` for PostgreSQL / MySQL recipes. | `DataSourceError: sha256 mismatch` | Upstream rotated the file | Re-compute with `curl -sL <url> \| shasum -a 256` and update the recipe | | `DataSourceError: HTTP 404 fetching …` | URL changed | Verify the URL in a browser; restore the v1.0.0 tag | | `ArtifactError: RECOTEM_SIGNING_KEYS not set` | Step 1 not exported | Re-run the export and try again | -| `401 Unauthorized` on /predict | Wrong API key plaintext | Use the `plaintext` line from `keygen --type api`, not the `hash` | -| `503 recipe_unavailable` on /predict immediately after train | Watcher has not polled yet | Wait up to `RECOTEM_WATCH_INTERVAL` seconds (default 5; tutorial sets 10). Check `/health`. | +| `401 Unauthorized` on `:recommend` | Wrong API key plaintext | Use the `plaintext` line from `keygen --type api`, not the `hash` | +| `503 recipe_unavailable` on `:recommend` immediately after train | Watcher has not polled yet | Wait up to `RECOTEM_WATCH_INTERVAL` seconds (default 5; tutorial sets 10). Check `/v1/health`. | | Path B: artifact written to wrong directory | Recipe `output.path` is CWD-relative | Run `recotem train` from the repo root (or edit `output.path` to an absolute path). | | `recotem: command not found` after pip install | `pip` installed to a venv not on `PATH` | Use `python -m recotem ...`, or activate the venv (`uv run recotem ...`). | diff --git a/docs/operations.md b/docs/operations.md index 663132e7..76f1a202 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -44,16 +44,16 @@ This multi-kid pattern enables zero-downtime rotation: RECOTEM_SIGNING_KEYS="prod-2026-q3:ddeeff..." ``` - Restart `recotem serve`. Any artifact still signed with the old kid will fail to load and will show up as `loaded: false` in `/health/details`. Retrain those recipes. + Restart `recotem serve`. Any artifact still signed with the old kid will fail to load and will show up as `loaded: false` in `/v1/health/details`. Retrain those recipes. - Confirm all recipes loaded successfully. Per-recipe state lives behind the authenticated `/health/details` endpoint — the public `/health` returns only `{status, total, loaded}` aggregates, not the `recipes` map: + Confirm all recipes loaded successfully. Per-recipe state lives behind the authenticated `/v1/health/details` endpoint — the public `/v1/health` returns only `{status, total, loaded}` aggregates, not the `recipes` map: ```bash # -f / --fail returns exit 22 on 4xx/5xx, which would mask a 503. # Use -w to capture the status code instead. HTTP_STATUS=$(curl -s -o /tmp/health.json -w "%{http_code}" \ -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ - http://localhost:8080/health/details) + http://localhost:8080/v1/health/details) echo "HTTP $HTTP_STATUS" jq '.recipes | to_entries[] | select(.value.loaded == false)' /tmp/health.json ``` @@ -127,7 +127,9 @@ wrong-magic file of the expected length, the parsed kid string is shown verbatim instead — useful for grepping which signing key the offending artifact was written with. -The server continues running and returns 503 for that recipe's `/predict/{name}` endpoint. +The server continues running and returns 503 (`RECIPE_UNAVAILABLE`) for +that recipe's `/v1/recipes/{name}:recommend` (and sibling verbs) +endpoints. **Recovery steps:** @@ -154,8 +156,9 @@ The server continues running and returns 503 for that recipe's `/predict/{name}` 3. **Verify.** ```bash - curl http://localhost:8080/health | jq '.recipes.my_recipe' - # {"loaded": true, "best_class": "IALSRecommender", ...} + curl -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ + http://localhost:8080/v1/health/details | jq '.recipes.my_recipe' + # {"loaded": true, ...} ``` If the artifact was written with `versioning: append_sha`, the old corrupt file is still present with its sha-suffix name. You can delete it after confirming the new artifact loaded: @@ -269,13 +272,27 @@ Additional events emitted by the watcher, recipe loader, and size-cap helper tha | `recipe_security_violation_skipped` | ERROR | `recipe/loader.py` lenient loader | A recipe file contains a security-category error (path traversal, disallowed scheme, embedded credentials). The recipe is skipped but the server keeps running. **Alertable** — indicates a misconfigured or potentially hostile recipe file. | | `recipe_load_error_skipped` | WARN | `recipe/loader.py` lenient loader | A recipe file failed to load for non-security reasons (schema error, YAML parse error). The recipe is skipped. | | `size_cap_probe_failed` | WARN | `_size_cap.py` | An fsspec `info()` call on an object-store path failed unexpectedly (not `FileNotFoundError` / `PermissionError`). The size cap check was skipped; the subsequent read proceeds but is unbounded by the pre-read cap. Indicates degraded-but-bounded behavior. | -| `auth_anonymous_bypass` | DEBUG | `serving/auth.py` | Every request that passes without an API key (when `RECOTEM_API_KEYS` is empty). Emitted on every request for access-log correlation. | +| `auth_anonymous_bypass` | DEBUG | `serving/auth.py` | Every request that passes without an API key (when `RECOTEM_API_KEYS` is empty). Emitted on every request for access-log correlation. The `mode` field distinguishes `"insecure_no_auth"` (explicit flag) from `"loopback_no_keys"` (no keys configured). | | `auth_anonymous_bypass_first_seen` | INFO | `serving/auth.py` | First anonymous request from a given `client_host` (per process). The LRU cache tracking first-seen IPs is bounded to 1024 entries to prevent unbounded memory growth. | | `kid_extraction_failed` | WARN | `serving/watcher.py` | An artifact's kid bytes could not be parsed from the raw bytes (too short, out-of-range length, decode error). The kid shown in subsequent log fields is `\x00<unparseable>` — intentionally not collidable with any real kid. | | `artifact_stat_timeout` | WARN | `serving/watcher.py` | A stat() future did not complete within the per-future timeout (`min(watch_interval, 30)` seconds). Hung object-store stats no longer block tick progress or delay SIGTERM handling. | +| `recommender_layout_unexpected` | WARN | `serving/routes.py` | `_any_seed_known` encountered an `AttributeError` on `recommender._mapper.item_id_to_index`. The request is treated as `INTERNAL_ERROR`. Increment counter: `recotem_recommender_layout_unexpected_total`. | +| `set_load_error_no_entry` | WARN | `serving/watcher.py` | The watcher tried to mark a load error on a recipe with no registry entry. Counter: `recotem_watcher_state_divergence_total`. | +| `sidecar_disappeared` | WARN | `serving/watcher.py` | A `.sha256` sidecar file was present on the previous poll but raised ENOENT on the current read — emitted once per disappearance transition. | +| `metadata_index_row_error` | WARN | `metadata/loader.py` | A per-row exception occurred during `build_metadata_index`. The row is skipped. Counted by `recotem_metadata_index_build_errors_total{recipe}`. | The `train_error` event uses `name=` (not `recipe=`) for the recipe name field and includes `kid=` when the signing kid is known, matching the `train_done` event's field names. +> **Note.** Metadata enrichment is indexed at artifact-load time. +> Use `recotem_metadata_index_build_errors_total{recipe}` for load-time +> per-row build failures and `recotem_metadata_serialization_errors_total{recipe,verb}` +> for request-time per-item serialization failures. When per-item +> metadata enrichment fails at request time, the item is served with +> `item_id` and `score` only (fallback) or dropped; the +> `X-Recotem-Items-Degraded` response header indicates how many items +> were degraded, and `recotem_v1_metadata_degraded_items_total{kind}` counts +> them by kind (`fallback` / `dropped`). + ## Concurrent training and persistent search storage `recotem train` acquires a per-recipe POSIX `flock` at @@ -386,7 +403,7 @@ Full list of environment variables recognised by Recotem. Variables marked `serv | `RECOTEM_ENV` | (empty) | serve | Deployment environment tag. `--insecure-no-auth` is permitted only when set to `development`, `dev`, or `test`; `--dev-allow-unsigned` only when set to `development`. When set to `production`, `prod`, or `staging`, the `/docs`, `/redoc`, and `/openapi.json` endpoints are disabled. | | `RECOTEM_DRAIN_SECONDS` | 30 | serve | SIGTERM graceful drain window (clamped [1, 300]). Set `terminationGracePeriodSeconds` ≥ this + 5 in Kubernetes. | | `RECOTEM_LOG_FORMAT` | auto | train + serve | `auto` / `json` / `console`. | -| `RECOTEM_METADATA_FIELD_DENY` | (empty) | serve | Comma-separated columns stripped from `/predict` responses after the metadata join. | +| `RECOTEM_METADATA_FIELD_DENY` | (empty) | serve | Comma-separated columns stripped from `/v1/recipes/{name}:recommend` and `:recommend-related` responses after the metadata join. | | `RECOTEM_METRICS_ENABLED` | (unset) | serve | Truthy enables the Prometheus `/metrics` endpoint. Requires `recotem[metrics]` extra. | | `RECOTEM_ARTIFACT_ROOT` | (empty) | train | Local `output.path` must lie under this directory (symlink escapes rejected). | | `RECOTEM_LOCK_DIR` | (empty) | train | Override directory for per-recipe training lock files. Needed when `output.path` is a remote URI (`s3://`, `gs://`, …); falls back to `<tempdir>/recotem-locks/`. | @@ -404,12 +421,20 @@ Recotem does not enforce SLOs internally. Recommended baseline targets for produ | Metric | Target | |--------|--------| -| `/predict/{name}` p99 latency | < 50 ms (pure recommender, no metadata join) | -| `/health` p99 latency | < 5 ms | +| `/v1/recipes/{name}:recommend` p99 latency | < 50 ms (pure recommender, no metadata join) | +| `/v1/recipes/{name}:recommend-related` p99 latency | < 50 ms | +| `/v1/recipes/{name}:batch-recommend` and `:batch-recommend-related` p99 latency | budget separately per verb — track via `recotem_v1_request_latency_seconds{recipe,verb}` | +| `/v1/health` p99 latency | < 5 ms | | Availability (per recipe) | Measure via `recotem_model_loaded{recipe}` Prometheus gauge | | Artifact hot-swap time | ≤ `RECOTEM_WATCH_INTERVAL` + model load time | | Train-to-serve lag | Schedule train; serve detects in ≤ `RECOTEM_WATCH_INTERVAL` seconds | +SLO budgets above describe each v1 verb individually (`recommend`, +`recommend-related`, `batch-recommend`, `batch-recommend-related`). Use +the `verb` label on `recotem_v1_requests_total` / +`recotem_v1_request_latency_seconds` to break out per-verb rates and +quantiles. + Enable Prometheus metrics: ```bash @@ -418,32 +443,40 @@ pip install "recotem[metrics]" The `/metrics` endpoint is opt-in and off by default. Set `RECOTEM_METRICS_ENABLED` to a truthy value (`1`, `true`, `yes`, `on`) to activate. -> **Network exposure.** Both `/metrics` and `/health` are unauthenticated by -> design — the same posture Prometheus and Kubernetes liveness/readiness -> probes expect. The endpoints surface recipe names, kid IDs, load-error -> strings, model-load timestamps, and predict-latency histograms. -> **Restrict them with the cluster's NetworkPolicy** (`/metrics` to the -> Prometheus namespace, `/health` to kubelet probes) rather than relying -> on the API-key middleware. The `helm/recotem` chart's NetworkPolicy -> template ships with a deny-all baseline; allow only the scrapers and -> probes you actually need. +> **Network exposure.** Both `/v1/metrics` and `/v1/health` are +> unauthenticated by design — the same posture Prometheus and Kubernetes +> liveness/readiness probes expect. The endpoints surface recipe names, +> kid IDs, load-error strings, model-load timestamps, and per-verb +> latency histograms. +> **Restrict them with the cluster's NetworkPolicy** (`/v1/metrics` to +> the Prometheus namespace, `/v1/health` to kubelet probes) rather than +> relying on the API-key middleware. The `helm/recotem` chart's +> NetworkPolicy template ships with a deny-all baseline; allow only the +> scrapers and probes you actually need. Available metrics: -| Metric | Type | Labels | -|--------|------|--------| -| `recotem_predict_total` | Counter | `recipe`, `status` | -| `recotem_predict_latency_seconds` | Histogram | `recipe` | -| `recotem_model_loaded` | Gauge | `recipe` | -| `recotem_artifact_load_failures_total` | Counter | `recipe` | -| `recotem_active_recipes` | Gauge | — | -| `recotem_swap_total` | Counter | `recipe`, `result` | -| `recotem_artifact_stat_failures_total` | Counter | `recipe` | -| `recotem_watcher_unhandled_errors_total` | Counter | — | -| `recotem_metadata_lookup_errors_total` | Counter | `recipe` | -| `recotem_recipe_rescan_errors_total` | Counter | `recipe` | -| `recotem_bigquery_storage_fallback_total` | Counter | `reason` | -| `recotem_recipes_dir_scan_failures_total` | Counter | `error_class` | +| Metric | Type | Labels | Purpose | +|--------|------|--------|---------| +| `recotem_v1_requests_total` | Counter | `recipe`, `verb`, `status` | v1 request volume; `status` ∈ {`ok`, `unknown_user`, `unknown_seed_items`, `no_candidates`, `recipe_not_found`, `unavailable`, `validation_error`, `error`} | +| `recotem_v1_request_latency_seconds` | Histogram | `recipe`, `verb` | per-verb end-to-end latency | +| `recotem_v1_batch_size` | Histogram | `recipe`, `verb` | observed batch fan-out (only for `batch-recommend` / `batch-recommend-related`) | +| `recotem_v1_batch_element_errors_total` | Counter | `recipe`, `verb`, `code` | per-element errors inside batch HTTP-200 responses; `code` ∈ {`UNKNOWN_USER`, `UNKNOWN_SEED_ITEMS`, `NO_CANDIDATES`, `VALIDATION_ERROR`, `INTERNAL_ERROR`} | +| `recotem_v1_metadata_degraded_items_total` | Counter | `recipe`, `verb`, `kind` | items served with degraded metadata; `kind` ∈ {`fallback` (item_id/score only), `dropped` (omitted entirely)} | +| `recotem_v1_validation_errors_outside_verb_total` | Counter | — | 422 errors on non-inference paths (e.g. `/v1/recipes` list with bad query) | +| `recotem_model_loaded` | Gauge | `recipe` | 1 if the recipe is currently loaded | +| `recotem_artifact_load_failures_total` | Counter | `recipe`, `reason` | artifact-load failures since process start; `reason` ∈ {`read`, `parse`, `hmac`, `header_json`, `deserialize`, `metadata`, `yaml`, `unexpected`, `dir_scan`} | +| `recotem_active_recipes` | Gauge | — | total recipes in the registry | +| `recotem_swap_total` | Counter | `recipe`, `result` | hot-swap attempts (`ok` / `error`) | +| `recotem_artifact_stat_failures_total` | Counter | `recipe` | watcher stat() failures | +| `recotem_watcher_unhandled_errors_total` | Counter | — | watcher loop crashes | +| `recotem_metadata_index_build_errors_total` | Counter | `recipe` | per-row errors during `build_metadata_index` at artifact-load time (load-time) | +| `recotem_metadata_serialization_errors_total` | Counter | `recipe`, `verb` | per-item metadata serialization failures during response building (request-time) | +| `recotem_recipe_rescan_errors_total` | Counter | `recipe` | recipe rescan failures | +| `recotem_bigquery_storage_fallback_total` | Counter | `reason` | BQ Storage Read API fell back to REST | +| `recotem_recipes_dir_scan_failures_total` | Counter | `error_class` | recipes-dir scan failures | +| `recotem_recommender_layout_unexpected_total` | Counter | `recipe` | `AttributeError` on `recommender._mapper.item_id_to_index` — indicates irspack API incompatibility | +| `recotem_watcher_state_divergence_total` | Counter | — | watcher tried to mark an error on a non-existent registry entry (ordering bug) | --- @@ -474,18 +507,18 @@ Available metrics: `recipe_removed` and the entry is dropped from the registry. - On any failure during reload (`artifact_load_failed`, `artifact_load_unexpected_error`), the existing entry remains served and - its `last_load_error` field is set so `/health` shows the staleness while - `/predict` continues to return the previous good model. + its `last_load_error` field is set so `/v1/health/details` shows the staleness while + `/v1/recipes/{name}:recommend` continues to return the previous good model. - On `_stat_marker` returning None (file disappeared), the existing entry keeps serving and an `artifact_disappeared` warning is logged once. ### Initial load failure When an artifact fails to load at startup the recipe is still registered as -a stub (`loaded=false`, `error=<reason>`). The server starts, `/health` -reports `degraded`, and `/predict/{name}` returns 503. This is intentional: -a partial outage is recoverable by retraining without restarting the -process. +a stub (`loaded=false`, `error=<reason>`). The server starts, `/v1/health` +reports `degraded`, and `/v1/recipes/{name}:recommend` (and sibling verbs) +return 503 (`RECIPE_UNAVAILABLE`). This is intentional: a partial outage +is recoverable by retraining without restarting the process. The startup-only event variants are: @@ -524,10 +557,13 @@ The high-signal metrics for production alerting: | Recipe is unloaded | `recotem_model_loaded{recipe=...} == 0` for > `RECOTEM_WATCH_INTERVAL × 3` | page on-call | | Hot-swap failures | `rate(recotem_swap_total{result="error"}[5m]) > 0` | warn | | Artifact load failures since restart | `recotem_artifact_load_failures_total{recipe=...}` increase | warn (often paired with the unloaded alert above) | +| HMAC verification failures | `rate(recotem_artifact_load_failures_total{reason="hmac"}[5m])` | page — security signal (wrong key or tampered artifact) | +| Batch per-element error rate | `rate(recotem_v1_batch_element_errors_total[5m]) / rate(recotem_v1_requests_total{verb=~"batch-.*"}[5m])` | warn at sustained > 1% per recipe | | Artifact stat failures (watcher poll) | `recotem_artifact_stat_failures_total{recipe=...}` increase | warn | | Watcher unhandled errors | `recotem_watcher_unhandled_errors_total` increase | warn | -| Predict error rate | `rate(recotem_predict_total{status="error"}[5m]) / rate(recotem_predict_total[5m])` | warn at 1%, page at 10% | -| Predict latency | `histogram_quantile(0.99, recotem_predict_latency_seconds_bucket)` | per-recipe SLO | +| Recommend error rate | `rate(recotem_v1_requests_total{status="error"}[5m]) / rate(recotem_v1_requests_total[5m])` | warn at 1%, page at 10% | +| Recommend latency | `histogram_quantile(0.99, sum by (le, recipe, verb) (rate(recotem_v1_request_latency_seconds_bucket[5m])))` | per-recipe, per-verb SLO | +| Batch fan-out | `histogram_quantile(0.95, sum by (le, recipe, verb) (rate(recotem_v1_batch_size_bucket[5m])))` | watch for clients approaching the 256-element cap | | Active recipes | `recotem_active_recipes` drop > 0 since last scrape | warn (recipe removed or all stub) | | BigQuery Storage API fallback | `rate(recotem_bigquery_storage_fallback_total{reason="api_error"}[5m]) > 0` | warn — grant `bigquery.readSessions.create` to restore fast path | | Recipes-dir scan failures | `rate(recotem_recipes_dir_scan_failures_total[5m]) > 0` | warn — broken recipe YAML or artifact path; check `error_class` label for `RecipeError` (schema), `OSError` (permissions), or `sidecar_stale` (artifact read failed after sidecar change) | @@ -544,9 +580,8 @@ Recotem follows semver. Within a major version (`2.x`): - The artifact format version is `1`. Older readers refuse newer formats with `unsupported format version`. When the format bumps, retrain after upgrading the writer; readers can be upgraded first. -- The FQCN allow-list is frozen per release; changes appear in the - CHANGELOG. Re-train if your artifacts encode a class that has been - removed. +- The FQCN allow-list is frozen per release. Re-train if your artifacts + encode a class that has been removed. For zero-downtime upgrade of the serve fleet, deploy new pods with both the old and new signing kids configured (rotation-style), let new pods @@ -557,7 +592,8 @@ become healthy, then drain old pods (relying on `RECOTEM_DRAIN_SECONDS`). ### `recotem serve` starts but recipe is `loaded: false` ```bash -curl http://localhost:8080/health | jq '.recipes' +curl -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ + http://localhost:8080/v1/health/details | jq '.recipes' ``` ```json @@ -613,18 +649,44 @@ All Optuna trials scored 0.0. Common causes: - The split produced an empty test set (too few users or interactions). Try `split.scheme: random` or lower `split.heldout_ratio`. - The data after cleansing has too few items for the cutoff. Lower `training.cutoff`. -### 401 on `/predict` +### 401 on `/v1/recipes/{name}:recommend` - Trailing or leading whitespace in the `X-API-Key` header is treated as part of the key and will not match. Trim client-side. - Confirm the hash in `RECOTEM_API_KEYS` was produced by `recotem keygen --type api` for the plaintext you are sending. The wire prefix is `sha256:` but the digest is **scrypt** (`hashlib.scrypt(plaintext, salt=b"recotem.api-key.v1", n=2, r=8, p=1, dklen=32)`). A plain `sha256(plaintext)` will not match. -### 503 on `/predict/{name}` +### 503 on `/v1/recipes/{name}:recommend` (or any sibling verb) + +The recipe is unhealthy (`loaded: false`) — response body carries +`{"detail": "...", "code": "RECIPE_UNAVAILABLE"}`. See +`/v1/health/details` for the underlying error. Usually a signing +mismatch or corrupt artifact. + +### 404 on `/v1/recipes/{name}:recommend` + +Response body carries `{"detail": "...", "code": "UNKNOWN_USER"}` — the +`user_id` was not present in training data. This is expected for new +users; handle it in your application layer (fall back to popularity-based +recommendations, for example). + +### 404 on `/v1/recipes/{name}:recommend-related` + +Response body carries `{"detail": "...", "code": "UNKNOWN_SEED_ITEMS"}` — +none of the supplied `seed_items` are known to the trained model. + +### 422 on any `/v1/recipes/{name}:*` verb -The recipe is unhealthy (`loaded: false`). See `/health` for the error. Usually a signing mismatch or corrupt artifact. +Request validation failed before the handler executed. The body is +`{"detail": "Request validation failed", "code": "VALIDATION_ERROR", +"errors": [...]}` and the request is counted as `status="validation_error"` +in `recotem_v1_requests_total`. -### 404 on `/predict/{name}` +### Partial failure in `/v1/recipes/{name}:batch-recommend` / `:batch-recommend-related` -The `user_id` in the request was not present in training data. This is expected for new users. Handle it in your application layer (fall back to popularity-based recommendations, for example). +Batch endpoints accept up to 256 requests per call and return per-element +`status` so a single bad input does not fail the whole batch. The HTTP +response is **200** when *any* element succeeded (failed elements carry +`status: "error"` with a `code` field). HTTP **503** is reserved for the +case where the recipe itself is unavailable (no element can be served). ### Watcher does not pick up new artifact diff --git a/docs/recipe-reference.md b/docs/recipe-reference.md index e4bfca50..9a2c9311 100644 --- a/docs/recipe-reference.md +++ b/docs/recipe-reference.md @@ -1,12 +1,12 @@ # Recipe Reference -A recipe is a YAML file that defines what data to fetch, how to train, and where to write the artifact. One recipe produces one model and one `/predict/{name}` endpoint. +A recipe is a YAML file that defines what data to fetch, how to train, and where to write the artifact. One recipe produces one model and one set of `/v1/recipes/{name}:*` endpoints. ## Top-level fields | Field | Type | Required | Description | |-------|------|----------|-------------| -| `name` | string | yes | Endpoint name. Pattern: `^[A-Za-z0-9_-]{1,64}$`. Becomes `/predict/{name}`. | +| `name` | string | yes | Endpoint name. Pattern: `^[A-Za-z0-9_-]{1,64}$`. Becomes `/v1/recipes/{name}:*`. | | `source` | object | yes | Data source config. `type` field is the discriminator (`csv`, `parquet`, `bigquery`, `sql`, `ga4`, or any plugin). Validated in two stages: the rest of the recipe is parsed first, then the source dict is dispatched to the plugin's `Config` class. As a result, errors in `source.*` surface *after* errors elsewhere in the recipe; an unknown `source.type` raises a `DataSourceError` listing all registered type names. | | `schema` | object | yes | Column mapping. | | `cleansing` | object | no | Data quality gates. | diff --git a/docs/security.md b/docs/security.md index 1d025518..3bd97bda 100644 --- a/docs/security.md +++ b/docs/security.md @@ -12,8 +12,8 @@ │ recotem serve │ │ binds to RECOTEM_HOST:RECOTEM_PORT │ API clients │ │ - (authenticated) ─────►│ POST /predict/{name} X-API-Key header │ - │ GET /health │ + (authenticated) ─────►│ POST /v1/recipes/{name}:* X-API-Key │ + │ GET /v1/health │ └──────────────┬────────────────────────────┘ │ reads (signed) ┌──────────────▼────────────────────────────┐ @@ -183,7 +183,7 @@ The four layered controls: The FQCN allow-list in `SafeUnpickler.find_class` is a secondary layer that operates independently of HMAC. Its purpose is to bound the blast radius if HMAC is ever bypassed (e.g. a signing-key compromise that has not yet been rotated, or a future HMAC vulnerability). It does **not** guarantee safety by itself: a sufficiently broad allow-list still exposes whatever API surface the permitted libraries expose. -The allow-list is frozen per irspack 0.4.x. If irspack adds or renames recommender classes, the list and the CHANGELOG entry are updated together. +The allow-list is frozen per irspack 0.4.x. If irspack adds or renames recommender classes, the list is updated with the corresponding Recotem release. The FQCN allow-list permits only these classes. Any other class outside both this list and the module-prefix allow-list triggers `ArtifactError` before construction: @@ -220,7 +220,7 @@ builtins.frozenset collections.OrderedDict ``` -This list is frozen per Recotem release. Changes ship with a CHANGELOG entry. +This list is frozen per Recotem release. In addition to the FQCN list, classes whose defining module sits under one of the following narrow prefixes are permitted via the prefix @@ -471,15 +471,18 @@ Both `auth_missing_header` and `auth_invalid_key` log `path=<request.url.path>` When `RECOTEM_API_KEYS` is empty, `auth_anonymous_bypass` fires on **every** request (DEBUG) so access-log correlation is possible. `auth_anonymous_bypass_first_seen` fires once per unique `client_host` (INFO) for a first-seen audit trail. The LRU cache tracking first-seen client IPs is bounded to 1024 entries to prevent unbounded memory growth under high IP churn (e.g. rotating CI IPs or attacker scanning). -## Predict response: information leakage +## Inference response: information leakage -`POST /predict/{name}` returns: +`POST /v1/recipes/{name}:recommend` (and its siblings `:recommend-related`, +`:batch-recommend`, `:batch-recommend-related`) returns: -- 503 (`recipe_unavailable`) — recipe stub or stale entry; visible without auth context only at `/health`. -- 404 (`user_not_found`) — `user_id` was not in training data. This response distinguishes "known user, no recommendations" from "unknown user". If user-existence is sensitive in your application, mask 404 responses at your reverse proxy and return a generic empty-recommendation body. +- 503 (`RECIPE_UNAVAILABLE`) — recipe stub or stale entry; visible without auth context only at `/v1/health`. +- 404 (`RECIPE_NOT_FOUND`) — the recipe name is not registered at all. Distinct from `UNKNOWN_USER` (same status, different `code`). +- 404 (`UNKNOWN_USER`) on `:recommend` — `user_id` was not in training data. This response distinguishes "known user, no recommendations" from "unknown user". If user-existence is sensitive in your application, mask 404 responses at your reverse proxy and return a generic empty-recommendation body. +- 404 (`UNKNOWN_SEED_ITEMS`) on `:recommend-related` — none of the supplied `seed_items` are known to the trained model. - 200 — recommendations, optionally joined with item metadata. Field stripping is configured via `RECOTEM_METADATA_FIELD_DENY` (case-**insensitive** column names — `"Internal_ID"` in metadata is stripped if `"internal_id"` is in the deny list). Use this to keep PII columns out of API responses even when they are present in the metadata file. -`cutoff` is bounded at `[1, 1000]` by the request schema; oversized requests +`limit` is bounded at `[1, 1000]` by the request schema; oversized requests receive a 422 from FastAPI before reaching the recommender. ## Rate limiting and DoS @@ -487,7 +490,7 @@ receive a 422 from FastAPI before reaching the recommender. Recotem itself does not implement request-rate limiting. Operators **must** front `recotem serve` with a reverse proxy (nginx `limit_req`, Caddy `rate_limit`, ALB / Cloud Armor) and apply per-IP or per-API-key quotas on -`/predict/*`. This is not optional in production. +the `/v1/` surface. This is not optional in production. **Why the proxy layer is responsible — scrypt amplification.** Every authentication attempt (valid or not) runs a scrypt key-derivation check @@ -497,27 +500,33 @@ therefore trigger CPU-bound scrypt work on every failed authentication, at a rate bounded only by the network rather than by the application. Recotem does not implement its own rate limiter; that is the proxy's responsibility. -`/predict` is also CPU-bound for recommendation inference; sustained request -rates above the recommender's inference throughput will queue under uvicorn -and cause request latency to climb. Measure and cap at the proxy. +The v1 inference verbs (`:recommend`, `:recommend-related`, +`:batch-recommend`, `:batch-recommend-related`) are also CPU-bound for +recommendation inference; sustained request rates above the recommender's +inference throughput will queue under uvicorn and cause request latency to +climb. Measure and cap at the proxy. **Recommended nginx configuration:** ```nginx # Define a rate-limit zone keyed by IP address (adjust burst/rate as needed). -limit_req_zone $binary_remote_addr zone=recotem_predict:10m rate=20r/s; +limit_req_zone $binary_remote_addr zone=recotem_v1:10m rate=20r/s; server { # ... TLS and upstream configuration ... - location /predict/ { - limit_req zone=recotem_predict burst=40 nodelay; + location /v1/ { + limit_req zone=recotem_v1 burst=40 nodelay; limit_req_status 429; proxy_pass http://recotem_backend; } } ``` +Operators who want to exempt `/v1/health` or `/v1/metrics` from the limit +can carve them out with a more specific `location` block; the recommended +default is to rate-limit the entire `/v1/` surface. + For per-API-key limiting, key on the `$http_x_api_key` variable or use a WAF (AWS WAF, GCP Cloud Armor, Cloudflare) that can enforce quotas per header value. diff --git a/examples/csv-local/recipe.yaml b/examples/csv-local/recipe.yaml index d814c26a..062e7af6 100644 --- a/examples/csv-local/recipe.yaml +++ b/examples/csv-local/recipe.yaml @@ -29,7 +29,7 @@ schema: item_column: item_id time_column: timestamp # optional; remove if the CSV has no time column -# Optional `item_metadata:` block enriches /predict responses with item fields; see docs/recipe-reference.md. +# Optional `item_metadata:` block enriches `:recommend` / `:recommend-related` responses with item fields; see docs/recipe-reference.md. cleansing: drop_null_ids: true diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md index 5811eef8..2726c4f7 100644 --- a/examples/quickstart/README.md +++ b/examples/quickstart/README.md @@ -30,10 +30,10 @@ recotem train examples/quickstart/recipe.yaml recotem serve --recipes examples/quickstart/ # 4. Predict (in another terminal) -curl -X POST http://localhost:8080/predict/top_picks \ +curl -X POST http://localhost:8080/v1/recipes/top_picks:recommend \ -H "X-API-Key: $RECOTEM_API_PLAINTEXT" \ -H "Content-Type: application/json" \ - -d '{"user_id": "u01", "cutoff": 5}' + -d '{"user_id": "u01", "limit": 10}' ``` ## What's next diff --git a/src/recotem/artifact/signing.py b/src/recotem/artifact/signing.py index baa46266..3df0712c 100644 --- a/src/recotem/artifact/signing.py +++ b/src/recotem/artifact/signing.py @@ -61,8 +61,7 @@ ("irspack.utils.id_mapping", "IDMapper"), # irspack recommenders. Pickle records the original defining # submodule, not the package re-export. The set is frozen per - # release and tracked in CHANGELOG when irspack adds / renames - # recommenders. + # release and updated when irspack adds / renames recommenders. ("irspack.recommenders.ials", "IALSRecommender"), ("irspack.recommenders.knn", "CosineKNNRecommender"), ("irspack.recommenders.toppop", "TopPopRecommender"), diff --git a/src/recotem/datasource/ga4.py b/src/recotem/datasource/ga4.py index c4d677cb..e4a2e741 100644 --- a/src/recotem/datasource/ga4.py +++ b/src/recotem/datasource/ga4.py @@ -16,6 +16,8 @@ _EVENT_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]{0,39}$") _log = structlog.get_logger(__name__) +_PAGE_SIZE = 100_000 + class GA4Config(BaseModel): type: Literal["ga4"] @@ -255,7 +257,7 @@ def fetch(self, ctx: FetchContext) -> pd.DataFrame: raise DataSourceError("google.api_core is required for GA4Source") from exc client = self._get_client() - page_size = 100_000 + page_size = _PAGE_SIZE max_pages = get_ga4_max_pages() page_frames: list[pd.DataFrame] = [] offset = 0 diff --git a/src/recotem/metadata/loader.py b/src/recotem/metadata/loader.py index 80eb2906..565b9741 100644 --- a/src/recotem/metadata/loader.py +++ b/src/recotem/metadata/loader.py @@ -18,13 +18,15 @@ ``build_metadata_index`` converts a loaded DataFrame into a ``dict[str, dict[str, Any]]`` keyed by item_id for O(1) per-item lookups -during ``/predict`` — NaN values are converted to ``None`` for JSON safety -and deny-listed fields are stripped once at build time. +during ``/v1/recipes/{name}:recommend`` and ``:recommend-related`` — NaN +values are converted to ``None`` for JSON safety and deny-listed fields are +stripped once at build time. """ from __future__ import annotations import math +from collections.abc import Callable from io import BytesIO from typing import Any, Literal from urllib.parse import urlparse @@ -201,8 +203,8 @@ def load_item_metadata( # ----------------------------------------------------------------------- df = df[[item_id_col, *fields]].copy() # Drop duplicate item-ids before set_index — a non-unique index turns - # df.loc[item_id] from a Series into a DataFrame slice, which silently - # zeros out the metadata join in routes._lookup_metadata. + # df.loc[item_id] from a Series into a DataFrame slice, which would + # silently corrupt the metadata index built later by build_metadata_index. dup_count = int(df[item_id_col].duplicated().sum()) if dup_count > 0: logger.warning( @@ -227,13 +229,15 @@ def load_item_metadata( def build_metadata_index( df: pd.DataFrame, deny_set: frozenset[str] | None = None, + on_row_error: Callable[[], None] | None = None, ) -> dict[str, dict[str, Any]]: """Convert a metadata DataFrame into a pre-flattened dict for O(1) lookups. This function is called once at model-load time (in the watcher's - ``_build_entry``) so that ``/predict`` can perform an O(1) dict ``.get()`` - per recommended item rather than an O(n) DataFrame index lookup followed by - row serialisation. + ``_build_entry``) so that ``/v1/recipes/{name}:recommend`` and + ``:recommend-related`` can perform an O(1) dict ``.get()`` per recommended + item rather than an O(n) DataFrame index lookup followed by row + serialisation. Parameters ---------- @@ -245,6 +249,14 @@ def build_metadata_index( per-item dict. Filtering is applied here once rather than on every request. Pass ``frozenset(s.lower() for s in deny_list)`` (the same normalisation used in :func:`~recotem.serving.routes.make_router`). + on_row_error: + Optional zero-argument callback invoked once per row that raises + an unexpected exception during flattening (e.g. ``AttributeError`` + from a non-unique index returning a DataFrame slice instead of a + Series, or ``TypeError`` from a non-string column name that + bypasses the ``isinstance`` guard). Callers pass this to increment + a Prometheus counter without coupling this module to the serving + layer directly. The row is skipped and processing continues. Returns ------- @@ -259,8 +271,7 @@ def build_metadata_index( safe to pass directly to ``json.dumps`` or Pydantic's ``model_construct``. - Fields whose lowercased name appears in *deny_set* are omitted. - - Non-string column names are omitted (same guard as - :func:`~recotem.serving.routes._lookup_metadata`). + - Non-string column names are omitted defensively. """ _deny: frozenset[str] = deny_set or frozenset() @@ -271,20 +282,36 @@ def build_metadata_index( index: dict[str, dict[str, Any]] = {} for item_id, row in raw.items(): - item_dict: dict[str, Any] = {} - for col, val in row.items(): - if not isinstance(col, str): - continue - if col.lower() in _deny: - continue - # Convert float NaN to None for JSON-safety. Pandas uses float - # NaN for missing values even in object-typed columns; standard - # json.dumps raises on NaN by default (or silently emits 'NaN' - # which is not valid JSON). - if isinstance(val, float) and math.isnan(val): - val = None - item_dict[col] = val - index[str(item_id)] = item_dict + try: + item_dict: dict[str, Any] = {} + for col, val in row.items(): + if not isinstance(col, str): + continue + if col.lower() in _deny: + continue + # Convert float NaN to None for JSON-safety. Pandas uses float + # NaN for missing values even in object-typed columns; standard + # json.dumps raises on NaN by default (or silently emits 'NaN' + # which is not valid JSON). + if isinstance(val, float) and math.isnan(val): + val = None + item_dict[col] = val + index[str(item_id)] = item_dict + except (MemoryError, RecursionError): + raise + except Exception as exc: + # Unexpected per-row error (e.g. AttributeError from a non-unique + # index, TypeError from a non-string column name). Skip the row + # and invoke the caller's counter callback so the issue is + # observable in metrics without coupling this module to serving. + if on_row_error is not None: + on_row_error() + logger.warning( + "metadata_index_row_error", + item_id=str(item_id), + exc_type=type(exc).__name__, + error=str(exc)[:200], + ) logger.debug( "metadata_index_built", diff --git a/src/recotem/recipe/models.py b/src/recotem/recipe/models.py index 20389742..0f94d0bb 100644 --- a/src/recotem/recipe/models.py +++ b/src/recotem/recipe/models.py @@ -206,7 +206,7 @@ class Recipe(BaseModel, extra="forbid"): """Top-level recipe model. Represents a single training + serving unit. One recipe → one model → - one ``/predict/{name}`` endpoint. + one ``/v1/recipes/{name}:*`` set of endpoints. """ name: Annotated[ diff --git a/src/recotem/serving/_header_utils.py b/src/recotem/serving/_header_utils.py new file mode 100644 index 00000000..6aa43fa9 --- /dev/null +++ b/src/recotem/serving/_header_utils.py @@ -0,0 +1,36 @@ +"""Helpers for extracting and normalising values from artifact header dicts.""" + +from __future__ import annotations + +from typing import Any + + +def extract_algorithms(header_dict: dict[str, Any]) -> list[str]: + """Return the algorithm list from *header_dict*. + + Prefers ``header_dict["algorithms"]``; falls back to + ``header_dict["tuning"]["tried_algorithms"]`` when the primary key is + absent or empty. Returns an empty list when neither is available. + """ + algorithms = header_dict.get("algorithms") + if algorithms: + return list(algorithms) + tuning = header_dict.get("tuning") or {} + tried = tuning.get("tried_algorithms") + if tried: + return list(tried) + return [] + + +def normalize_config_digest(raw: str | None) -> str | None: + """Normalize a raw config_digest value to ``sha256:<hex>`` or ``None``. + + - Empty string or ``None`` → ``None`` + - Already prefixed with ``sha256:`` → returned as-is + - Bare hex string without prefix → ``sha256:<raw>`` added + """ + if not raw: + return None + if raw.startswith("sha256:"): + return raw + return f"sha256:{raw}" diff --git a/src/recotem/serving/_naming.py b/src/recotem/serving/_naming.py new file mode 100644 index 00000000..6f460968 --- /dev/null +++ b/src/recotem/serving/_naming.py @@ -0,0 +1,26 @@ +"""Shared naming helpers for the Recotem serving layer.""" + +from __future__ import annotations + +from collections.abc import Callable + + +def dedup_stub_name(base: str, taken: Callable[[str], bool]) -> str: + """Return a name derived from *base* that is not already taken. + + Appends ``_1``, ``_2``, … until ``taken(candidate)`` returns ``False``. + If ``taken(base)`` is already ``False`` the base name is returned as-is. + + Parameters + ---------- + base: + The preferred name (typically a YAML file stem). + taken: + A callable that returns ``True`` when *candidate* is already in use. + """ + candidate = base + suffix = 0 + while taken(candidate): + suffix += 1 + candidate = f"{base}_{suffix}" + return candidate diff --git a/src/recotem/serving/app.py b/src/recotem/serving/app.py index 34a63b0e..0d6aca71 100644 --- a/src/recotem/serving/app.py +++ b/src/recotem/serving/app.py @@ -7,7 +7,7 @@ 4. Attempts initial artifact load for each recipe. 5. Builds the ``ModelRegistry``. 6. Registers FastAPI middlewares (TrustedHost, CORS). -7. Registers routes (via ``make_router``). +7. Registers routes (via ``make_router`` mounted at ``/v1``). 8. Wires the app lifespan to start the ``ArtifactWatcher`` and stop it gracefully on shutdown. @@ -21,24 +21,31 @@ from __future__ import annotations import json +import re import time +import uuid from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import asynccontextmanager from pathlib import Path from typing import Any import structlog +import structlog.contextvars from fastapi import FastAPI, Request -from fastapi.exceptions import HTTPException +from fastapi.exceptions import HTTPException, RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.trustedhost import TrustedHostMiddleware from fastapi.responses import JSONResponse +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.types import ASGIApp from recotem.artifact.format import ArtifactError, parse_header_from_bytes from recotem.artifact.signing import KeyRing, unpickle_payload, verify_hmac from recotem.config import ConfigError, ServeConfig from recotem.recipe.loader import load_recipes_directory_lenient from recotem.serving import metrics as _metrics +from recotem.serving._header_utils import extract_algorithms, normalize_config_digest +from recotem.serving._naming import dedup_stub_name from recotem.serving.registry import ModelEntry, ModelRegistry from recotem.serving.routes import make_router from recotem.serving.watcher import ( @@ -53,6 +60,77 @@ logger = structlog.get_logger(__name__) +# Allowed characters and length for echoing a client-supplied X-Request-ID. +# 128 chars matches common tracing-vendor IDs (e.g. W3C traceparent excluding +# hyphens, Datadog dd-trace UUIDs). +_REQUEST_ID_RE = re.compile(r"^[A-Za-z0-9_\-]{1,128}$") + +# Pattern matching v1 inference verbs on the request path. Used by the +# RequestValidationError handler to record a ``validation_error`` metric for +# the appropriate (recipe, verb) tuple when 422 is returned for a malformed +# inference body. The name character class must mirror the recipe-name +# regex in ``recotem.recipe.models`` so a YAML recipe whose name starts with +# ``_`` or ``-`` still produces (recipe, verb)-labelled validation_error +# metrics rather than falling through to the unlabelled path. +_V1_VERB_PATH_RE = re.compile( + r"^/v1/recipes/(?P<name>[A-Za-z0-9_-]{1,64}):" + r"(?P<verb>recommend|recommend-related|batch-recommend|batch-recommend-related)$" +) + +# Default ``detail`` strings used by the HTTPException handler when callers +# raise ``HTTPException(detail={...})`` with a dict that omits a ``detail`` +# key. Keeps the response body well-formed (every error body has a string +# ``detail`` field) even if a handler forgets to set one. +_DEFAULT_DETAIL_FOR: dict[int, str] = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 405: "Method Not Allowed", + 422: "Unprocessable Entity", + 500: "Internal Server Error", + 503: "Service Unavailable", +} + + +# --------------------------------------------------------------------------- +# Request-ID middleware +# --------------------------------------------------------------------------- + + +class RequestIDMiddleware(BaseHTTPMiddleware): + """Attach a request-scoped ID to every response. + + - Reads ``X-Request-ID`` from the incoming request. If the value passes + the allow-list (``[A-Za-z0-9_-]``, 1–128 chars) it is echoed back + verbatim; otherwise a server-generated 12-hex-char ID is used. + - Binds ``request_id`` into structlog's context-var store so all log + records emitted during the request carry the ID automatically. + - Writes ``X-Request-ID`` onto the response regardless of status code, + including 404/503 responses raised via ``HTTPException``. + - Stores the resolved ID on ``request.state.request_id`` so handlers that + need explicit access can read it without re-parsing the header. + """ + + def __init__(self, app: ASGIApp) -> None: + super().__init__(app) + + async def dispatch(self, request: Request, call_next): # type: ignore[override] + raw = request.headers.get("x-request-id", "") + if _REQUEST_ID_RE.match(raw): + request_id = raw + else: + request_id = uuid.uuid4().hex[:12] + + request.state.request_id = request_id + structlog.contextvars.bind_contextvars(request_id=request_id) + try: + response = await call_next(request) + response.headers["X-Request-ID"] = request_id + return response + finally: + structlog.contextvars.unbind_contextvars("request_id") + # --------------------------------------------------------------------------- # Application factory @@ -80,7 +158,13 @@ def create_app(serve_config: ServeConfig) -> FastAPI: and dev_allow_unsigned is False. """ # 1. Validate unsafe flags. - serve_config.validate_insecure_flags() + try: + serve_config.validate_insecure_flags() + except Exception: + # Emit security.posture before propagating so SIEM rules fire even + # when the flag validation rejects the configuration. + _emit_security_posture(serve_config, None) + raise # 2. Enforce host binding based on auth posture. serve_config.apply_auth_posture() @@ -88,16 +172,29 @@ def create_app(serve_config: ServeConfig) -> FastAPI: # 3. Build KeyRing (or None for dev-unsigned path). # Always emit security.posture even when key-ring construction fails so # SIEM rules that look for the posture event still fire and operators see - # the "missing" status in the log before the ConfigError propagates. + # the status in the log before the ConfigError propagates. _key_ring_build_exc: Exception | None = None key_ring: KeyRing | None = None + _key_ring_status: str | None = None try: key_ring = _build_key_ring(serve_config) except Exception as _exc: _key_ring_build_exc = _exc + # Distinguish "keys not configured" (missing) from "keys provided but + # construction failed" (construction_failed). The former is a routine + # operator configuration omission; the latter is a programming error or + # corrupt key material that warrants a different alert. + if not serve_config.signing_keys_raw and not serve_config.dev_allow_unsigned: + _key_ring_status = "missing" + else: + _key_ring_status = "construction_failed" + logger.exception( + "signing_key_construction_failed", + error=str(_exc), + ) # 4. Emit security.posture log line (always, even on key-ring failure). - _emit_security_posture(serve_config, key_ring) + _emit_security_posture(serve_config, key_ring, key_ring_status=_key_ring_status) if _key_ring_build_exc is not None: raise _key_ring_build_exc @@ -129,11 +226,7 @@ def create_app(serve_config: ServeConfig) -> FastAPI: stem = yaml_path.stem # Guard against duplicate stems in edge cases (two files whose stems # collide after the recipe name cannot be read). - stub_name = stem - _suffix = 0 - while stub_name in _yaml_names_seen: - _suffix += 1 - stub_name = f"{stem}_{_suffix}" + stub_name = dedup_stub_name(stem, lambda n: n in _yaml_names_seen) _yaml_names_seen[stub_name] = yaml_path.name yaml_failed_stub_paths[stub_name] = yaml_path logger.warning( @@ -164,7 +257,8 @@ def create_app(serve_config: ServeConfig) -> FastAPI: # On successful load we insert a fully populated ModelEntry; on failure # we still insert a stub (loaded=False, last_load_error=<reason>) so # /health returns degraded and operators can see which recipes are not - # serving. /predict checks and returns 503 for stubs. + # serving. The v1 inference endpoints check ``entry.loaded`` and return + # 503 for stubs. # # Loads are parallelised via a ThreadPoolExecutor so startup time is # bounded by the slowest single artifact rather than the sum of all @@ -176,7 +270,7 @@ def create_app(serve_config: ServeConfig) -> FastAPI: # Register YAML-parse-failed stubs first so they appear in /health. for stub in yaml_failed_stubs: registry.replace(stub.name, stub) - _metrics.inc_artifact_load_failure(stub.name) + _metrics.inc_artifact_load_failure(stub.name, reason="yaml") _metrics.set_model_loaded(stub.name, False) n_recipes = len(recipes) @@ -215,25 +309,29 @@ def create_app(serve_config: ServeConfig) -> FastAPI: # _try_load_artifact never raises (it catches internally and # returns a stub), but guard defensively. try: - entry = future.result() + entry, load_reason = future.result() except Exception as exc: # pragma: no cover — defensive only - logger.warning( + logger.error( "recipe_load_future_error", name=recipe.name, error=str(exc), + exc_info=True, ) + _metrics.inc_artifact_load_failure(recipe.name, reason="unexpected") entry = _failed_entry(recipe, f"unexpected error: {exc}") + load_reason = "unexpected" registry.replace(recipe.name, entry) _metrics.set_model_loaded(recipe.name, entry.loaded) if entry.loaded: loaded_entries[recipe.name] = entry else: - _metrics.inc_artifact_load_failure(recipe.name) + _metrics.inc_artifact_load_failure(recipe.name, reason=load_reason) logger.warning( "recipe_not_loaded_at_startup", name=recipe.name, error=entry.last_load_error, + reason=load_reason, ) _wall_seconds = time.perf_counter() - _startup_t0 @@ -247,7 +345,7 @@ def create_app(serve_config: ServeConfig) -> FastAPI: max_workers=max_workers, ) - _metrics.set_active_recipes(len(loaded_entries)) + _metrics.set_active_recipes(registry.loaded_count()) # Build watcher initial states — captures mtime/sha to avoid re-load on # first tick (spec: "capture initial mtime/sha inside the watcher's own @@ -274,12 +372,20 @@ async def lifespan(app: FastAPI): # type: ignore[type-arg] if serve_config.insecure_no_auth or serve_config.dev_allow_unsigned: import asyncio + _warn_interval = 60 if (serve_config.env or "").lower() == "test" else 300 + + # Emit at most one combined banner per interval regardless of + # how many insecure flags are set (M5: prevent double-fire when + # both --insecure-no-auth and --dev-allow-unsigned are active). + _do_insecure = serve_config.insecure_no_auth + _do_unsigned = serve_config.dev_allow_unsigned + async def _warn_loop() -> None: while True: - await asyncio.sleep(60) - if serve_config.insecure_no_auth: + await asyncio.sleep(_warn_interval) + if _do_insecure: _emit_insecure_banner(serve_config) - if serve_config.dev_allow_unsigned: + if _do_unsigned: _emit_dev_unsigned_banner(serve_config) banner_task = asyncio.create_task(_warn_loop()) @@ -331,25 +437,130 @@ async def _warn_loop() -> None: openapi_url=_openapi_url, ) - # 9. Structured exception handler for unhandled non-HTTP exceptions. + # 9a. Flat-body HTTPException handler. + # Handlers raise ``HTTPException(detail={"detail": "...", "code": "..."})`` + # so callers can attach a machine-readable code alongside the human + # message. FastAPI's default would wrap that into + # ``{"detail": {"detail": "...", "code": "..."}}`` (double-detail). We + # flatten dict-shaped details to the top level so the response body is + # a single flat object — ``{"detail": "...", "code": "..."}`` — while + # string-shaped details fall through to FastAPI's default shape. + @app.exception_handler(HTTPException) + async def _http_exception_handler( + request: Request, exc: HTTPException + ) -> JSONResponse: + headers = getattr(exc, "headers", None) + if isinstance(exc.detail, dict): + content: dict[str, Any] = dict(exc.detail) + # Defensive: if a handler raised HTTPException(detail={...}) + # without including a "detail" key, fill in a sensible default + # so the response body always has a string ``detail`` field. + content.setdefault( + "detail", _DEFAULT_DETAIL_FOR.get(exc.status_code, "Error") + ) + else: + content = {"detail": exc.detail} + return JSONResponse( + status_code=exc.status_code, + content=content, + headers=headers, + ) + + # 9b. Flat-body RequestValidationError handler. + # FastAPI's default 422 body is ``{"detail": [errors...]}`` which clashes + # with our flat error shape. Wrap it in our standard ``{"detail", + # "code", "errors"}`` envelope, and record a ``validation_error`` metric + # for the matching (recipe, verb) tuple when the request path is a v1 + # inference verb. If the path does not match (e.g. /v1/recipes listing + # with bad query), the metric is skipped but the 422 body is still + # returned. + @app.exception_handler(RequestValidationError) + async def _validation_error_handler( + request: Request, exc: RequestValidationError + ) -> JSONResponse: + match = _V1_VERB_PATH_RE.match(request.url.path) + if match is not None: + _metrics.record_v1_request( + recipe=match.group("name"), + verb=match.group("verb"), + status="validation_error", + latency_seconds=0.0, + ) + else: + _metrics.inc_validation_error_outside_verb() + # Include request_id so 422 responses are correlatable with the + # X-Request-ID header set by RequestIDMiddleware. If the middleware + # was bypassed (e.g. in a stripped-down test app), fall back to "". + request_id = getattr(request.state, "request_id", "") + sanitized_errors = [ + {k: v for k, v in err.items() if k not in ("input", "ctx")} + for err in exc.errors() + ] + # Always emit a structured WARN log so non-v1-verb paths (e.g. a + # malformed query string on ``/v1/recipes``) still produce an + # operational signal — the v1 metric counter only covers paths that + # match _V1_VERB_PATH_RE. Include the sanitised errors so operators + # can grep by request_id and see which field failed without raw input. + logger.warning( + "validation_failed", + path=request.url.path, + method=request.method, + request_id=request_id, + error_count=len(sanitized_errors), + matched_v1_verb=match is not None, + errors=sanitized_errors, + ) + return JSONResponse( + status_code=422, + content={ + "request_id": request_id, + "detail": "Request validation failed", + "code": "VALIDATION_ERROR", + "errors": sanitized_errors, + }, + ) + + # 9c. Structured exception handler for unhandled non-HTTP exceptions. # FastAPI's default 500 response is a plain text "Internal Server Error" # string which leaks no details. We register our own handler to ensure # the response is JSON-formatted with a stable structure that clients can - # parse, while still NOT leaking stack traces. HTTPException is - # intentionally excluded so FastAPI's own handler keeps it. + # parse, while still NOT leaking stack traces. + # Note: Starlette dispatches HTTPException to its dedicated handler first + # so we never receive HTTPException here — no isinstance guard needed. @app.exception_handler(Exception) async def _unhandled_exception_handler( request: Request, exc: Exception ) -> JSONResponse: - if isinstance(exc, HTTPException): - # Let FastAPI's built-in HTTPException handler deal with it. - raise exc + # Starlette's ServerErrorMiddleware sits OUTSIDE RequestIDMiddleware, + # so the middleware's normal X-Request-ID injection does not run for + # 500 responses produced here. Read the value our middleware already + # stashed on ``request.state`` and re-attach it so every error + # response — including 500s — carries a correlatable ID. + request_id = getattr(request.state, "request_id", "") + logger.exception( + "unhandled_500", + path=str(request.url.path), + request_id=request_id, + exc_type=type(exc).__name__, + ) + headers = {"X-Request-ID": request_id} if request_id else None return JSONResponse( status_code=500, - content={"detail": "internal error", "code": "internal_error"}, + content={ + "detail": "internal error", + "code": "INTERNAL_ERROR", + "request_id": request_id, + }, + headers=headers, ) # 10. Middlewares. + # Starlette processes add_middleware calls in LIFO order: the last one added + # is the outermost wrapper (first to process the request, last to process + # the response). We want RequestIDMiddleware to be outermost so it sets + # X-Request-ID on every response regardless of what inner layers do. + # Therefore RequestIDMiddleware is added LAST (after TrustedHost and CORS). + # allowed_hosts is always non-empty after ServeConfig.from_env() because # _split_csv_env falls back to _DEFAULT_ALLOWED_HOSTS on empty/unset input. app.add_middleware( @@ -364,19 +575,27 @@ async def _unhandled_exception_handler( allow_credentials=False, allow_methods=["GET", "POST", "OPTIONS"], allow_headers=["*"], + expose_headers=[ + "X-Request-ID", + "X-Recotem-Model-Version", + "X-Recotem-Items-Degraded", + ], ) + # Added last so it is outermost: ensures X-Request-ID is on every response. + app.add_middleware(RequestIDMiddleware) + # 11. Routes. # ``--insecure-no-auth`` must short-circuit the X-API-Key check even when # ``RECOTEM_API_KEYS`` is still set in the environment, otherwise the flag # is documented but silently ineffective. router_api_keys = [] if serve_config.insecure_no_auth else serve_config.api_keys - router = make_router( + api_router = make_router( registry=registry, api_keys=router_api_keys, - metadata_field_deny=serve_config.metadata_field_deny, + insecure_no_auth=serve_config.insecure_no_auth, ) - app.include_router(router) + app.include_router(api_router, prefix="/v1") return app @@ -411,7 +630,12 @@ def _build_key_ring(serve_config: ServeConfig) -> KeyRing | None: # --------------------------------------------------------------------------- -def _emit_security_posture(serve_config: ServeConfig, key_ring: KeyRing | None) -> None: +def _emit_security_posture( + serve_config: ServeConfig, + key_ring: KeyRing | None, + *, + key_ring_status: str | None = None, +) -> None: """Emit the canonical security.posture log line. The ``signing_keys`` field is a list of ``{"kid", "fingerprint"}`` pairs @@ -421,25 +645,31 @@ def _emit_security_posture(serve_config: ServeConfig, key_ring: KeyRing | None) against the earlier schema. The signing_key_status field reflects: - - "configured" — keys are present and the KeyRing was built. - - "dev_allow_unsigned" — dev-unsigned mode, no keys required. - - "missing" — keys absent and dev-unsigned not set; startup - will fail after this log line. + - "configured" — keys are present and the KeyRing was built. + - "dev_allow_unsigned" — dev-unsigned mode, no keys required. + - "missing" — keys absent and dev-unsigned not set; startup + will fail after this log line. + - "construction_failed" — keys were provided but KeyRing construction + raised an exception; startup will fail. + + *key_ring_status* overrides the auto-derived value when provided (used + when the caller detects ``construction_failed`` before passing None as + *key_ring*). """ if key_ring is not None: kids = key_ring.kids() signing_keys = [ {"kid": kid, "fingerprint": key_ring.fingerprint(kid)} for kid in kids ] - signing_key_status = "configured" + signing_key_status = key_ring_status or "configured" elif serve_config.dev_allow_unsigned: kids = [] signing_keys = [] - signing_key_status = "dev_allow_unsigned" + signing_key_status = key_ring_status or "dev_allow_unsigned" else: kids = [] signing_keys = [] - signing_key_status = "missing" + signing_key_status = key_ring_status or "missing" logger.info( "security.posture", @@ -500,6 +730,14 @@ def _emit_dev_unsigned_banner(serve_config: ServeConfig) -> None: # --------------------------------------------------------------------------- +_URI_RE = re.compile(r"\b(s3|gs|az|abfs|abfss|https?)://\S+") + + +def _sanitize_error(reason: str) -> str: + truncated = reason[:200] + return _URI_RE.sub("<redacted-uri>", truncated) + + def _failed_entry(recipe: Any, reason: str) -> ModelEntry: """Stub ModelEntry inserted at startup when an artifact failed to load. @@ -513,7 +751,7 @@ def _failed_entry(recipe: Any, reason: str) -> ModelEntry: header={}, kid="", metadata_df=None, - last_load_error=reason, + last_load_error=_sanitize_error(reason), artifact_path=recipe.output.path, loaded=False, ) @@ -523,12 +761,13 @@ def _try_load_artifact( recipe: Any, key_ring: KeyRing | None, serve_config: ServeConfig, -) -> ModelEntry: +) -> tuple[ModelEntry, str]: """Attempt to load the artifact for *recipe* at startup. - Returns a fully-populated ModelEntry on success, or a stub entry with - ``loaded=False`` and ``last_load_error`` set on any failure. Either way - the caller registers the entry so ``/health`` reports the recipe. + Returns ``(entry, reason)``. On success *reason* is ``"ok"`` and *entry* + is fully populated. On failure *reason* is the load-failure category + and *entry* is a stub with ``loaded=False`` and ``last_load_error`` set. + Either way the caller registers the entry so ``/health`` reports the recipe. """ artifact_path = recipe.output.path max_artifact_bytes = serve_config.max_artifact_bytes @@ -538,12 +777,12 @@ def _try_load_artifact( data = read_artifact_bytes(artifact_path, max_artifact_bytes) except ArtifactError as exc: logger.warning("initial_artifact_read_failed", name=recipe.name, error=str(exc)) - return _failed_entry(recipe, f"read failed: {exc}") + return _failed_entry(recipe, f"read failed: {exc}"), "read" except (MemoryError, RecursionError): raise except Exception as exc: logger.warning("initial_artifact_read_error", name=recipe.name, error=str(exc)) - return _failed_entry(recipe, f"read error: {exc}") + return _failed_entry(recipe, f"read error: {exc}"), "read" sha256 = sha256_bytes(data) @@ -556,7 +795,7 @@ def _try_load_artifact( logger.warning( "initial_artifact_parse_failed", name=recipe.name, error=str(exc) ) - return _failed_entry(recipe, f"parse failed: {exc}") + return _failed_entry(recipe, f"parse failed: {exc}"), "parse" payload_bytes = data[hdr.payload_offset :] @@ -571,13 +810,18 @@ def _try_load_artifact( hdr.hmac_digest, ) except ArtifactError as exc: - logger.warning( + # HMAC failure is a security signal (wrong key, tampered artifact); + # log at ERROR with traceback so SIEM rules filtering on level + # >= ERROR fire. Other startup failure modes stay at WARNING since + # they are operational rather than security. + logger.error( "initial_artifact_hmac_failed", name=recipe.name, kid=hdr.kid, error=str(exc), + exc_info=True, ) - return _failed_entry(recipe, f"HMAC verify failed: {exc}") + return _failed_entry(recipe, f"HMAC verify failed: {exc}"), "hmac" else: logger.warning( "initial_artifact_hmac_skipped_dev", @@ -598,7 +842,10 @@ def _try_load_artifact( kid=hdr.kid, error=str(exc), ) - return _failed_entry(recipe, f"header JSON decode failed: {exc}") + return ( + _failed_entry(recipe, f"header JSON decode failed: {exc}"), + "header_json", + ) try: recommender = unpickle_payload(payload_bytes) @@ -609,7 +856,7 @@ def _try_load_artifact( kid=hdr.kid, error=str(exc), ) - return _failed_entry(recipe, f"deserialize failed: {exc}") + return _failed_entry(recipe, f"deserialize failed: {exc}"), "deserialize" metadata_df = None metadata_index = None @@ -621,7 +868,14 @@ def _try_load_artifact( deny_set: frozenset[str] = frozenset( s.lower() for s in (serve_config.metadata_field_deny or []) ) - metadata_index = build_metadata_index(metadata_df, deny_set) + _recipe_name = recipe.name + + def _on_row_error() -> None: + _metrics.inc_metadata_index_build_error(_recipe_name) + + metadata_index = build_metadata_index( + metadata_df, deny_set, on_row_error=_on_row_error + ) except (MemoryError, RecursionError): raise except Exception as exc: @@ -630,7 +884,7 @@ def _try_load_artifact( name=recipe.name, error=str(exc), ) - return _failed_entry(recipe, f"metadata load failed: {exc}") + return _failed_entry(recipe, f"metadata load failed: {exc}"), "metadata" marker = stat_marker(artifact_path) entry = ModelEntry( @@ -643,6 +897,9 @@ def _try_load_artifact( last_load_error=None, artifact_path=artifact_path, _loaded_marker=(marker, sha256), + loaded_at_unix=time.time(), + config_digest=normalize_config_digest(header_dict.get("config_digest")) or "", + algorithms=extract_algorithms(header_dict), ) logger.info( @@ -652,4 +909,4 @@ def _try_load_artifact( trained_at=header_dict.get("trained_at"), best_class=header_dict.get("best_class"), ) - return entry + return entry, "ok" diff --git a/src/recotem/serving/auth.py b/src/recotem/serving/auth.py index 71aeacd1..d40e82c1 100644 --- a/src/recotem/serving/auth.py +++ b/src/recotem/serving/auth.py @@ -119,7 +119,11 @@ def _hash_api_key(value: str) -> str: ).hex() -def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: +def verify_api_key( + request: Request, + api_keys: list[ApiKeyEntry], + bypass_mode: str = "loopback_no_keys", +) -> str: """Verify the ``X-API-Key`` header and return the matching ``kid``. Parameters @@ -131,6 +135,13 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: The list of configured :class:`~recotem.config.ApiKeyEntry` objects. If the list is empty, all requests are allowed and ``kid`` is set to ``"anonymous"``. + bypass_mode: + Label for the ``mode`` field on ``auth_anonymous_bypass`` / ``_first_seen`` + log events when ``api_keys`` is empty. Callers should pass + ``"insecure_no_auth"`` when the server was explicitly started with + ``--insecure-no-auth``, or ``"loopback_no_keys"`` (default) when + auth is absent because no API keys are configured (loopback-only + bind is enforced by ``apply_auth_posture`` in that case). Returns ------- @@ -161,11 +172,20 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: else: client_host = str(getattr(_client, "host", "unknown")) + # Distinguish why auth is bypassed: operator explicitly passed + # ``insecure_no_auth=True`` (intentional dev mode) versus the keys + # list being empty for another reason (e.g. loopback-only bind when + # no API keys are configured — apply_auth_posture handles the bind + # restriction). The ``bypass_mode`` kwarg is forwarded from the + # ``_require_auth`` closure in ``make_router``. + _bypass_mode = bypass_mode + # DEBUG log on EVERY anonymous bypass — correlates with access logs. logger.debug( "auth_anonymous_bypass", client_host=client_host, path=request.url.path, + mode=_bypass_mode, ) # INFO log only on the FIRST bypass per client_host (process-local). @@ -178,6 +198,7 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: "auth_anonymous_bypass_first_seen", client_host=client_host, path=request.url.path, + mode=_bypass_mode, ) else: # Move to tail so it stays in the LRU (recently used). @@ -197,7 +218,7 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: _hash_api_key("\x00" * _API_KEY_MAX_LEN) # constant-time equalisation raise HTTPException( status_code=401, - detail={"detail": "X-API-Key header required", "code": "missing_api_key"}, + detail={"detail": "X-API-Key header required", "code": "MISSING_API_KEY"}, ) # Reject oversized headers BEFORE invoking the scrypt KDF on the real @@ -217,7 +238,7 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: _hash_api_key("\x00" * _API_KEY_MAX_LEN) # constant-time equalisation raise HTTPException( status_code=401, - detail={"detail": "Invalid API key", "code": "invalid_api_key"}, + detail={"detail": "Invalid API key", "code": "INVALID_API_KEY"}, ) # Reject plaintexts shorter than _API_KEY_MIN_LEN. @@ -241,7 +262,7 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: _hash_api_key("\x00" * _API_KEY_MAX_LEN) # constant-time equalisation raise HTTPException( status_code=401, - detail={"detail": "Invalid API key", "code": "invalid_api_key"}, + detail={"detail": "Invalid API key", "code": "INVALID_API_KEY"}, ) # No stripping — whitespace is part of the key. @@ -273,5 +294,5 @@ def verify_api_key(request: Request, api_keys: list[ApiKeyEntry]) -> str: logger.warning("auth_invalid_key", path=request.url.path) raise HTTPException( status_code=401, - detail={"detail": "Invalid API key", "code": "invalid_api_key"}, + detail={"detail": "Invalid API key", "code": "INVALID_API_KEY"}, ) diff --git a/src/recotem/serving/metrics.py b/src/recotem/serving/metrics.py index 65b630f8..2868e094 100644 --- a/src/recotem/serving/metrics.py +++ b/src/recotem/serving/metrics.py @@ -15,20 +15,31 @@ Metric inventory (matches docs/operations.md): -| Name | Type | Labels | -|------------------------------------------------|------------|--------------------| -| ``recotem_predict_total`` | Counter | recipe, status | -| ``recotem_predict_latency_seconds`` | Histogram | recipe | -| ``recotem_model_loaded`` | Gauge | recipe | -| ``recotem_artifact_load_failures_total`` | Counter | recipe | -| ``recotem_active_recipes`` | Gauge | — | -| ``recotem_swap_total`` | Counter | recipe, result | -| ``recotem_artifact_stat_failures_total`` | Counter | recipe | -| ``recotem_watcher_unhandled_errors_total`` | Counter | — | -| ``recotem_metadata_lookup_errors_total`` | Counter | recipe | -| ``recotem_recipe_rescan_errors_total`` | Counter | recipe | -| ``recotem_bigquery_storage_fallback_total`` | Counter | reason | -| ``recotem_recipes_dir_scan_failures_total`` | Counter | error_class | +| Name | Type | Labels | +|----------------------------------------------------|------------|-------------------------| +| ``recotem_v1_requests_total`` | Counter | recipe, verb, status | +| ``recotem_v1_request_latency_seconds`` | Histogram | recipe, verb | +| ``recotem_v1_batch_size`` | Histogram | recipe, verb | +| ``recotem_v1_batch_element_errors_total`` | Counter | recipe, verb, code | +| ``recotem_v1_metadata_degraded_items_total`` | Counter | recipe, verb, kind | +| ``recotem_v1_validation_errors_outside_verb_total``| Counter | — | +| ``recotem_model_loaded`` | Gauge | recipe | +| ``recotem_artifact_load_failures_total`` | Counter | recipe, reason | +| ``recotem_active_recipes`` | Gauge | — | +| ``recotem_swap_total`` | Counter | recipe, result | +| ``recotem_artifact_stat_failures_total`` | Counter | recipe | +| ``recotem_watcher_unhandled_errors_total`` | Counter | — | +| ``recotem_metadata_index_build_errors_total`` | Counter | recipe | +| ``recotem_metadata_serialization_errors_total`` | Counter | recipe, verb | +| ``recotem_recipe_rescan_errors_total`` | Counter | recipe | +| ``recotem_bigquery_storage_fallback_total`` | Counter | reason | +| ``recotem_recipes_dir_scan_failures_total`` | Counter | error_class | +| ``recotem_recommender_layout_unexpected_total`` | Counter | recipe | +| ``recotem_watcher_state_divergence_total`` | Counter | — | + +Artifact-load reason taxonomy (``recotem_artifact_load_failures_total``): +``read``, ``parse``, ``hmac``, ``header_json``, ``deserialize``, ``metadata``, +``yaml``, ``unexpected``, ``dir_scan``, ``timeout``. """ from __future__ import annotations @@ -46,16 +57,17 @@ _PROMETHEUS_AVAILABLE = False -_PREDICT_TOTAL: Any = None -_PREDICT_LATENCY: Any = None _MODEL_LOADED: Any = None _ARTIFACT_LOAD_FAILURES: Any = None _ACTIVE_RECIPES: Any = None _SWAP_TOTAL: Any = None _ARTIFACT_STAT_FAILURES: Any = None _WATCHER_UNHANDLED_ERRORS: Any = None -_METADATA_LOOKUP_ERRORS: Any = None +_METADATA_INDEX_BUILD_ERRORS: Any = None +_METADATA_SERIALIZATION_ERRORS: Any = None _RECIPE_RESCAN_ERRORS: Any = None +_RECOMMENDER_LAYOUT_UNEXPECTED: Any = None +_WATCHER_STATE_DIVERGENCE: Any = None def metrics_enabled() -> bool: @@ -77,24 +89,16 @@ def _ensure_initialized() -> None: Called lazily on the first recorder invocation so importing this module does not register metrics in environments that disable them. """ - global _PREDICT_TOTAL, _PREDICT_LATENCY, _MODEL_LOADED + global _MODEL_LOADED global _ARTIFACT_LOAD_FAILURES, _ACTIVE_RECIPES, _SWAP_TOTAL global _ARTIFACT_STAT_FAILURES, _WATCHER_UNHANDLED_ERRORS - global _METADATA_LOOKUP_ERRORS, _RECIPE_RESCAN_ERRORS + global _METADATA_INDEX_BUILD_ERRORS, _METADATA_SERIALIZATION_ERRORS + global _RECIPE_RESCAN_ERRORS + global _RECOMMENDER_LAYOUT_UNEXPECTED, _WATCHER_STATE_DIVERGENCE - if not _PROMETHEUS_AVAILABLE or _PREDICT_TOTAL is not None: + if not _PROMETHEUS_AVAILABLE or _MODEL_LOADED is not None: return - _PREDICT_TOTAL = Counter( - "recotem_predict_total", - "Total /predict calls served, partitioned by status.", - ["recipe", "status"], - ) - _PREDICT_LATENCY = Histogram( - "recotem_predict_latency_seconds", - "End-to-end /predict latency in seconds.", - ["recipe"], - ) _MODEL_LOADED = Gauge( "recotem_model_loaded", "1 when the model for a recipe is loaded and serving, 0 otherwise.", @@ -102,8 +106,10 @@ def _ensure_initialized() -> None: ) _ARTIFACT_LOAD_FAILURES = Counter( "recotem_artifact_load_failures_total", - "Total artifact load failures (initial load and watcher reloads).", - ["recipe"], + "Total artifact load failures (initial load and watcher reloads). " + "reason ∈ {read, parse, hmac, header_json, deserialize, metadata, " + "yaml, unexpected, dir_scan, timeout}.", + ["recipe", "reason"], ) _ACTIVE_RECIPES = Gauge( "recotem_active_recipes", @@ -125,44 +131,38 @@ def _ensure_initialized() -> None: "Total unhandled exceptions in the watcher poll loop. " "A high rate here indicates a broken polling environment.", ) - _METADATA_LOOKUP_ERRORS = Counter( - "recotem_metadata_lookup_errors_total", - "Metadata lookup errors during /predict response enrichment.", + _METADATA_INDEX_BUILD_ERRORS = Counter( + "recotem_metadata_index_build_errors_total", + "Per-row errors during build_metadata_index at artifact-load time " + "(load-time; watcher and startup paths).", ["recipe"], ) + _METADATA_SERIALIZATION_ERRORS = Counter( + "recotem_metadata_serialization_errors_total", + "Per-item metadata serialization failures during request-time " + "response building (request-time; router path).", + ["recipe", "verb"], + ) _RECIPE_RESCAN_ERRORS = Counter( "recotem_recipe_rescan_errors_total", "Total recipe YAML parse/load errors during watcher directory rescan " "(transient failures that leave the existing model serving).", ["recipe"], ) - - -def record_predict(recipe: str, status: str, latency_seconds: float) -> None: - """Record a /predict call. - - Parameters - ---------- - recipe: - Recipe name (the ``{name}`` path parameter from ``/predict/{name}``). - status: - One of the following documented status labels: - - - ``"ok"`` — recommendation returned successfully - - ``"user_not_found"`` — user was not seen during training (HTTP 404) - - ``"unavailable"`` — recipe not loaded or unhealthy (HTTP 503) - - ``"error"`` — any other unexpected exception - - Using finer-grained labels avoids alert storms from routine 404s - (cold-start users) being conflated with genuine 503s or internal errors. - latency_seconds: - End-to-end wall-clock time for the request in seconds. - """ - _ensure_initialized() - if _PREDICT_TOTAL is None: - return - _PREDICT_TOTAL.labels(recipe=recipe, status=status).inc() - _PREDICT_LATENCY.labels(recipe=recipe).observe(latency_seconds) + _RECOMMENDER_LAYOUT_UNEXPECTED = Counter( + "recotem_recommender_layout_unexpected_total", + "Total occurrences of an unexpected recommender internal layout " + "(AttributeError when accessing _mapper.user_id_to_index or " + "_mapper.item_id_to_index). " + "A non-zero rate indicates an irspack API incompatibility.", + ["recipe"], + ) + _WATCHER_STATE_DIVERGENCE = Counter( + "recotem_watcher_state_divergence_total", + "Total times the watcher attempted to mark a load error on a recipe " + "that has no registry entry (set_load_error returned False). " + "Indicates a state ordering bug in the watcher.", + ) def set_model_loaded(recipe: str, loaded: bool) -> None: @@ -173,12 +173,39 @@ def set_model_loaded(recipe: str, loaded: bool) -> None: _MODEL_LOADED.labels(recipe=recipe).set(1 if loaded else 0) -def inc_artifact_load_failure(recipe: str) -> None: - """Increment the per-recipe artifact-load-failures counter.""" +_LOAD_FAILURE_REASONS: frozenset[str] = frozenset( + { + "read", + "parse", + "hmac", + "header_json", + "deserialize", + "metadata", + "yaml", + "unexpected", + "dir_scan", + # Stat hung in the executor thread (object-store non-responsive). + # Distinct from "read" (file could not be opened/parsed) because stat + # timeouts are an infrastructure signal rather than a data signal. + "timeout", + } +) + + +def inc_artifact_load_failure(recipe: str, reason: str = "unexpected") -> None: + """Increment the per-recipe artifact-load-failures counter. + + *reason* must be one of the values in ``_LOAD_FAILURE_REASONS`` + (``read | parse | hmac | header_json | deserialize | metadata | yaml | + unexpected | dir_scan | timeout``); any other value is silently coerced + to ``"unexpected"`` so callers cannot accidentally explode the cardinality + of the label. + """ _ensure_initialized() if _ARTIFACT_LOAD_FAILURES is None: return - _ARTIFACT_LOAD_FAILURES.labels(recipe=recipe).inc() + label = reason if reason in _LOAD_FAILURE_REASONS else "unexpected" + _ARTIFACT_LOAD_FAILURES.labels(recipe=recipe, reason=label).inc() def set_active_recipes(count: int) -> None: @@ -221,18 +248,30 @@ def inc_watcher_unhandled_error() -> None: _WATCHER_UNHANDLED_ERRORS.inc() -def inc_metadata_lookup_error(recipe: str) -> None: - """Increment the per-recipe metadata-lookup-errors counter. +def inc_metadata_index_build_error(recipe: str) -> None: + """Increment the per-recipe metadata-index-build-errors counter. + + Called at artifact-load time when ``build_metadata_index`` encounters + a row that cannot be flattened — e.g. ``AttributeError`` from a + non-unique index returning a DataFrame instead of a Series, or + ``TypeError`` from a non-string column name. + """ + _ensure_initialized() + if _METADATA_INDEX_BUILD_ERRORS is None: + return + _METADATA_INDEX_BUILD_ERRORS.labels(recipe=recipe).inc() + - Called when ``_lookup_metadata`` encounters an unexpected error (not a - plain ``KeyError`` / missing-item) during ``/predict`` response enrichment - — e.g. ``AttributeError`` from a non-unique index returning a DataFrame - instead of a Series, or ``TypeError`` from a non-string column name. +def inc_metadata_serialization_error(recipe: str, verb: str) -> None: + """Increment the per-recipe/verb metadata-serialization-errors counter. + + Called at request time when ``RecommendItem.model_validate`` fails for + a single item during the metadata-join step in the router. """ _ensure_initialized() - if _METADATA_LOOKUP_ERRORS is None: + if _METADATA_SERIALIZATION_ERRORS is None: return - _METADATA_LOOKUP_ERRORS.labels(recipe=recipe).inc() + _METADATA_SERIALIZATION_ERRORS.labels(recipe=recipe, verb=verb).inc() def inc_recipe_rescan_error(recipe: str) -> None: @@ -248,6 +287,171 @@ def inc_recipe_rescan_error(recipe: str) -> None: _RECIPE_RESCAN_ERRORS.labels(recipe=recipe).inc() +def inc_recommender_layout_unexpected(recipe: str) -> None: + """Increment the per-recipe recommender-layout-unexpected counter. + + Called when ``_any_seed_known`` encounters an ``AttributeError`` accessing + ``recommender._mapper.item_id_to_index``, indicating an unexpected irspack + internal layout. A non-zero rate signals an API incompatibility. + """ + _ensure_initialized() + if _RECOMMENDER_LAYOUT_UNEXPECTED is None: + return + _RECOMMENDER_LAYOUT_UNEXPECTED.labels(recipe=recipe).inc() + + +def inc_watcher_state_divergence() -> None: + """Increment the watcher-state-divergence counter. + + Called when ``set_load_error_no_entry`` fires in the watcher — i.e. the + watcher tried to mark a load error on a recipe that has no registry entry. + Indicates a state ordering bug: the watcher should always insert a stub + entry before attempting a load. + """ + _ensure_initialized() + if _WATCHER_STATE_DIVERGENCE is None: + return + _WATCHER_STATE_DIVERGENCE.inc() + + +# --------------------------------------------------------------------------- +# v1 API metrics +# --------------------------------------------------------------------------- + +_V1_REQUEST_COUNTER: Any = None +_V1_REQUEST_LATENCY: Any = None +_V1_BATCH_SIZE: Any = None +_V1_BATCH_ELEMENT_ERRORS: Any = None +_V1_METADATA_DEGRADED_ITEMS: Any = None +_V1_VALIDATION_ERRORS_OUTSIDE_VERB: Any = None + + +def _ensure_v1_initialized() -> None: + """Lazily create the v1 counter/histogram families. + + Called from record_v1_request and observe_batch_size. Mirrors the + pattern used by _ensure_initialized() for the operational metrics. + """ + global _V1_REQUEST_COUNTER, _V1_REQUEST_LATENCY, _V1_BATCH_SIZE + global _V1_BATCH_ELEMENT_ERRORS + global _V1_METADATA_DEGRADED_ITEMS, _V1_VALIDATION_ERRORS_OUTSIDE_VERB + if _V1_REQUEST_COUNTER is not None: + return + if not metrics_enabled(): + return + + _V1_REQUEST_COUNTER = Counter( + "recotem_v1_requests_total", + "Total number of v1 API requests by recipe, verb, and status.", + ["recipe", "verb", "status"], + ) + _V1_REQUEST_LATENCY = Histogram( + "recotem_v1_request_latency_seconds", + "End-to-end latency of v1 API requests.", + ["recipe", "verb"], + ) + _V1_BATCH_SIZE = Histogram( + "recotem_v1_batch_size", + "Number of elements in a batch v1 request.", + ["recipe", "verb"], + buckets=(1, 2, 4, 8, 16, 32, 64, 128, 256), + ) + _V1_BATCH_ELEMENT_ERRORS = Counter( + "recotem_v1_batch_element_errors_total", + "Per-element errors inside batch v1 responses, partitioned by code. " + 'An outer ``recotem_v1_requests_total{status="ok"}`` increment ' + "still records the HTTP-200 response — this counter surfaces the " + "per-element failures that the outer counter would otherwise hide.", + ["recipe", "verb", "code"], + ) + _V1_METADATA_DEGRADED_ITEMS = Counter( + "recotem_v1_metadata_degraded_items_total", + "Items that could not be fully enriched with metadata during response " + "building. kind=fallback means item_id/score-only fallback was used; " + "kind=dropped means the item was omitted entirely.", + ["recipe", "verb", "kind"], + ) + _V1_VALIDATION_ERRORS_OUTSIDE_VERB = Counter( + "recotem_v1_validation_errors_outside_verb_total", + "422 validation errors on non-v1-verb paths (e.g. /v1/recipes listing " + "with bad query parameters).", + ) + + +def record_v1_request( + recipe: str, verb: str, status: str, latency_seconds: float +) -> None: + """Record a v1 API request. + + *verb* ∈ {"recommend", "recommend-related", "batch-recommend", + "batch-recommend-related"}. *status* ∈ {"ok", "unknown_user", + "unknown_seed_items", "no_candidates", "unavailable", + "recipe_not_found", "validation_error", "error"}. + """ + _ensure_v1_initialized() + if _V1_REQUEST_COUNTER is None: + return # metrics disabled + _V1_REQUEST_COUNTER.labels(recipe=recipe, verb=verb, status=status).inc() + _V1_REQUEST_LATENCY.labels(recipe=recipe, verb=verb).observe(latency_seconds) + + +def observe_batch_size(recipe: str, verb: str, size: int) -> None: + """Record a sample for the batch-size histogram.""" + _ensure_v1_initialized() + if _V1_BATCH_SIZE is None: + return + _V1_BATCH_SIZE.labels(recipe=recipe, verb=verb).observe(size) + + +def inc_batch_element_error(recipe: str, verb: str, code: str) -> None: + """Increment the per-element batch-error counter. + + Called once per element that produces ``status="error"`` inside a + ``:batch-recommend`` or ``:batch-recommend-related`` response so + operators can alert on per-element failures even though the outer + HTTP response is still 200. + """ + _ensure_v1_initialized() + if _V1_BATCH_ELEMENT_ERRORS is None: + return + _V1_BATCH_ELEMENT_ERRORS.labels(recipe=recipe, verb=verb, code=code).inc() + + +_DEGRADED_ITEM_KINDS: frozenset[str] = frozenset({"fallback", "dropped", "unexpected"}) + + +def inc_metadata_degraded_items( + recipe: str, verb: str, kind: str, count: int = 1 +) -> None: + """Increment the metadata-degraded-items counter. + + *kind* must be ``"fallback"`` (item served with item_id/score only) or + ``"dropped"`` (item omitted entirely because even bare-item validation + failed). Any value outside ``_DEGRADED_ITEM_KINDS`` is coerced to + ``"unexpected"`` to prevent accidental label cardinality explosion. + Called from the router's ``_build_items`` when metadata enrichment degrades + for one or more items. + """ + _ensure_v1_initialized() + if _V1_METADATA_DEGRADED_ITEMS is None: + return + label = kind if kind in _DEGRADED_ITEM_KINDS else "unexpected" + _V1_METADATA_DEGRADED_ITEMS.labels(recipe=recipe, verb=verb, kind=label).inc(count) + + +def inc_validation_error_outside_verb() -> None: + """Increment the counter for 422 errors on non-v1-verb paths. + + Called from ``_validation_error_handler`` when the request path does not + match ``_V1_VERB_PATH_RE`` so operators have a metric for validation + failures on e.g. ``/v1/recipes`` list endpoints. + """ + _ensure_v1_initialized() + if _V1_VALIDATION_ERRORS_OUTSIDE_VERB is None: + return + _V1_VALIDATION_ERRORS_OUTSIDE_VERB.inc() + + def generate_latest() -> tuple[bytes, str]: """Return Prometheus exposition (data, content_type) for the registry. diff --git a/src/recotem/serving/registry.py b/src/recotem/serving/registry.py index 9fb81dbe..e24e9cfa 100644 --- a/src/recotem/serving/registry.py +++ b/src/recotem/serving/registry.py @@ -21,6 +21,7 @@ import threading from dataclasses import dataclass, field +from datetime import UTC, datetime from typing import Any # --------------------------------------------------------------------------- @@ -35,7 +36,7 @@ class ModelEntry: Attributes ---------- name: - Recipe name (matches ``/predict/{name}``). + Recipe name (matches the path parameter in ``/v1/recipes/{name}:*`` endpoints). recommender: The deserialized ``IDMappedRecommender`` instance. header: @@ -52,7 +53,8 @@ class ModelEntry: columns — no large numeric arrays are duplicated. metadata_index: Pre-flattened ``dict[str, dict[str, Any]]`` keyed by item_id for - O(1) per-item lookups during ``/predict``. Built once at model-load + O(1) per-item lookups during ``:recommend`` / ``:recommend-related`` + response metadata join. Built once at model-load time by :func:`~recotem.metadata.loader.build_metadata_index` with NaN→None normalisation and deny-list filtering already applied. ``None`` when no item_metadata is configured for this recipe. @@ -66,8 +68,9 @@ class ModelEntry: ``True`` when ``recommender`` is a usable model. ``False`` for stub entries inserted at startup when the artifact failed to load — these entries appear in ``/health`` as ``loaded=false`` so operators can see - which recipes are not serving, and ``/predict`` should reject them - with 503. + which recipes are not serving, and the v1 inference endpoints + (``:recommend``, ``:recommend-related``, ``:batch-recommend``, + ``:batch-recommend-related``) should reject them with 503. """ name: str @@ -81,6 +84,60 @@ class ModelEntry: loaded: bool = True # Internal watcher state: (mtime_or_etag, sha256_hex) _loaded_marker: tuple[Any, str] = field(default_factory=lambda: (None, "")) + # v1 additions. The watcher sets loaded_at_unix on every successful + # (re-)load. Stays at 0.0 for stub entries that never loaded. + loaded_at_unix: float = 0.0 + # Optional artifact-derived metadata used by /v1/recipes/{name}. + config_digest: str = "" + algorithms: list[str] = field(default_factory=list) + + # --- v1 API additions --- + @property + def artifact_sha256(self) -> str: + """SHA-256 of the artifact bytes (hex, no prefix). + + Derived from ``_loaded_marker[1]`` which the watcher populates + at every successful (re-)load. Empty for stub entries. + """ + return self._loaded_marker[1] if self._loaded_marker else "" + + @property + def model_version(self) -> str: + """Deterministic artifact identifier exposed via the v1 API. + + Format: ``sha256:<hex>``. Stub entries return ``sha256:``. + """ + return f"sha256:{self.artifact_sha256}" + + @property + def loaded_at(self) -> datetime: + """Timezone-aware UTC datetime of the last successful (re-)load. + + Falls back to the unix epoch for stub entries. + """ + return datetime.fromtimestamp(self.loaded_at_unix or 0.0, tz=UTC) + + @property + def kind(self) -> str: + """Inference kind exposed via /v1/recipes. + + Currently every irspack algorithm shipped by recotem is a + user-item collaborative filter, so this returns "user-item" + unconditionally. + """ + return "user-item" + + @property + def supported_verbs(self) -> list[str]: + """List of v1 verbs this entry can serve.""" + if self.kind == "user-item": + return [ + "recommend", + "recommend-related", + "batch-recommend", + "batch-recommend-related", + ] + return [] @property def trained_at(self) -> str | None: @@ -114,7 +171,7 @@ def __post_init__(self) -> None: self._models_view["name"] = self.name def models_dict(self) -> dict[str, Any]: - """Return header metadata suitable for the ``/models`` endpoint. + """Return header metadata for introspection (e.g. tests, tooling). The artifact header JSON never contains ``hmac`` or ``key`` fields — those are stored in separate binary regions of the artifact format @@ -263,6 +320,17 @@ def loaded_count(self) -> int: with self._lock: return self._loaded_count + def health_counts(self) -> tuple[int, int]: + """Return ``(loaded, total)`` under a single lock acquisition. + + Avoids the TOCTOU window between a separate ``loaded_count()`` and + ``health_snapshot()`` call in the ``/v1/health`` handler: both + numbers are read atomically so they are guaranteed to be consistent + with each other even if a hot-swap occurs between calls. + """ + with self._lock: + return self._loaded_count, len(self._entries) + def health_snapshot(self) -> dict[str, dict[str, Any]]: """Return per-recipe health info (safe copy, no model objects). @@ -282,7 +350,7 @@ def health_snapshot(self) -> dict[str, dict[str, Any]]: """ with self._lock: items = list(self._entries.items()) - # Build the dict outside the lock so /health cannot block /predict + # Build the dict outside the lock so /health cannot block `:recommend` # threads waiting to acquire the lock. return {name: entry.health_dict() for name, entry in items} diff --git a/src/recotem/serving/routes.py b/src/recotem/serving/routes.py index d5dcc5c6..b94c65e1 100644 --- a/src/recotem/serving/routes.py +++ b/src/recotem/serving/routes.py @@ -1,474 +1,962 @@ -"""FastAPI route handlers for the Recotem serving layer. - -Routes: - POST /predict/{name} — single-user recommendations - GET /health — per-recipe health (ok | degraded) - GET /models — registry entries (header metadata, no key material) - GET /metrics — Prometheus exposition (opt-in; only when prometheus_client - is importable) +"""FastAPI router for the recotem v1 HTTP API. + +The router is mounted at ``/v1`` by ``serving/app.py`` and exposes the +``:recommend``, ``:recommend-related``, ``:batch-recommend``, +``:batch-recommend-related`` colon-verb endpoints alongside the +``/recipes`` discovery, ``/health``, and (optional) ``/metrics`` routes. """ +from __future__ import annotations + +import hashlib import math -import re import time -import uuid -from typing import Annotated, Any +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Any import structlog from fastapi import APIRouter, Depends, HTTPException, Path, Request, Response -from fastapi.responses import JSONResponse -from pydantic import BaseModel, Field +from pydantic import ValidationError from recotem.config import ApiKeyEntry from recotem.serving import metrics as _metrics from recotem.serving.auth import verify_api_key -from recotem.serving.registry import ModelRegistry +from recotem.serving.registry import ModelEntry, ModelRegistry +from recotem.serving.schemas import ( + BATCH_AGGREGATE_LIMIT, + BatchRecommendRelatedRequest, + BatchRecommendRequest, + BatchRecommendResponse, + BatchResultErr, + BatchResultOk, + ErrorCode, + ErrorDetail, + RecipeDetailResponse, + RecipesListResponse, + RecommendItem, + RecommendRelatedRequest, + RecommendRequest, + RecommendResponse, +) logger = structlog.get_logger(__name__) -# Allowed characters for an echoed X-Request-ID header value (M-4). -# Accepts up to 64 characters of [A-Za-z0-9_-] (UUID-ish identifiers). -# Any header value that does not match is replaced with a fresh UUID4 so -# ANSI escape sequences, log-injection payloads, or oversized strings are -# never echoed back to the client or embedded in structured log fields. -_REQUEST_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$") - +# Path regex shared across every endpoint that names a recipe. Must mirror +# ``recotem.recipe.models.Recipe.name`` so that any recipe accepted at load +# time is also routable. The regex is intentionally permissive of leading +# ``_``/``-`` characters because the recipe loader already accepts them. +_RECIPE_NAME_RE = r"^[A-Za-z0-9_-]{1,64}$" # --------------------------------------------------------------------------- -# Request / response models +# Batch validation helpers # --------------------------------------------------------------------------- - -class PredictRequest(BaseModel): - user_id: str - cutoff: Annotated[int, Field(ge=1, le=1000)] = 10 - - -class RecommendationItem(BaseModel): - item_id: str - score: float - # Extra metadata fields are included as additional properties. - model_config = {"extra": "allow"} +# Maximum number of per-error entries included in sanitized_errors log field. +_BATCH_VALIDATION_MAX_ERRORS = 10 -class ModelInfo(BaseModel): - recipe: str - trained_at: str | None = None - best_class: str | None = None - kid: str +def _sanitize_validation_errors(exc: ValidationError) -> list[dict[str, Any]]: + """Return a sanitized list of pydantic error dicts (loc, msg, type only). + Strips ``input`` and ``url`` fields (user-controlled / verbose). + Caps to ``_BATCH_VALIDATION_MAX_ERRORS`` entries to bound log size. + """ + out: list[dict[str, Any]] = [] + for err in exc.errors()[:_BATCH_VALIDATION_MAX_ERRORS]: + out.append( + { + "loc": err.get("loc", ()), + "msg": err.get("msg", ""), + "type": err.get("type", ""), + } + ) + return out -class PredictResponse(BaseModel): - items: list[RecommendationItem] - model: ModelInfo - request_id: str +def _format_batch_validation_message(exc: ValidationError) -> str: + """Build a human-readable message from the first pydantic error. -# --------------------------------------------------------------------------- -# Router factory -# --------------------------------------------------------------------------- + Format: ``"<dot-joined loc>: <msg>"``. Falls back to ``"validation + failed"`` when the error list is empty (should not happen in practice). + """ + errors = exc.errors() + if not errors: + return "validation failed" + first = errors[0] + loc_parts = first.get("loc", ()) + loc_path = ".".join(str(p) for p in loc_parts) if loc_parts else "" + msg = first.get("msg", "validation failed") + return f"{loc_path}: {msg}" if loc_path else msg def make_router( registry: ModelRegistry, api_keys: list[ApiKeyEntry], - metadata_field_deny: list[str] | None = None, + insecure_no_auth: bool = False, ) -> APIRouter: - """Build and return the main API router. - - Parameters - ---------- - registry: - The shared :class:`~recotem.serving.registry.ModelRegistry`. - api_keys: - Parsed API key entries from ``ServeConfig``. - metadata_field_deny: - Optional list of metadata field names to strip from prediction - responses after the item-metadata join. - """ router = APIRouter() - # Case-fold all deny entries so the comparison is case-insensitive. - # e.g. "internal_id" in the deny list also blocks "Internal_ID" in metadata. - _deny_set: frozenset[str] = frozenset( - s.lower() for s in (metadata_field_deny or []) - ) - # ------------------------------------------------------------------ - # Auth dependency (closure over api_keys) - # ------------------------------------------------------------------ + # S5: distinguish explicit --insecure-no-auth from "no keys configured". + _bypass_mode = "insecure_no_auth" if insecure_no_auth else "loopback_no_keys" def _require_auth(request: Request) -> str: - return verify_api_key(request, api_keys) + return verify_api_key(request, api_keys, bypass_mode=_bypass_mode) + + def _resolve_entry( + name: str, request_id: str, kid: str, status_holder: list[str] + ) -> ModelEntry: + entry = registry.get(name) + if entry is None: + status_holder[0] = "recipe_not_found" + logger.warning( + "recipe_not_found", + name=name, + request_id=request_id, + kid=kid, + ) + raise HTTPException( + status_code=404, + detail={ + "detail": f"Recipe '{name}' not found", + "code": "RECIPE_NOT_FOUND", + }, + ) + if not entry.loaded or entry.recommender is None: + status_holder[0] = "unavailable" + logger.warning( + "recipe_not_loaded", + name=name, + request_id=request_id, + kid=kid, + ) + raise HTTPException( + status_code=503, + detail={ + "detail": f"Recipe '{name}' is registered but not loaded", + "code": "RECIPE_UNAVAILABLE", + }, + ) + return entry + + @contextmanager + def _request_metrics(recipe: str, verb: str, kid: str) -> Iterator[list[str]]: + start = time.monotonic() + structlog.contextvars.bind_contextvars(recipe=recipe, kid=kid) + status_holder: list[str] = ["error"] + try: + yield status_holder + finally: + _metrics.record_v1_request( + recipe, verb, status_holder[0], time.monotonic() - start + ) + structlog.contextvars.unbind_contextvars("recipe", "kid") + + def _build_items( + raw_results: list[tuple[str, float]], + exclude: frozenset[str], + meta_index: dict[str, Any] | None, + recipe_name: str = "", + verb: str = "", + ) -> tuple[list[RecommendItem], int, int]: + """Build the item list for a recommend response. + + Returns ``(items, fallback_count, dropped_count)``. The caller is + responsible for setting ``X-Recotem-Items-Degraded`` and incrementing + the degraded-items metrics when either count is non-zero. + """ + items: list[RecommendItem] = [] + fallback_count = 0 + dropped_count = 0 + for item_id, score in raw_results: + if item_id in exclude: + continue + fields: dict[str, Any] = {} + if meta_index is not None: + fields.update(meta_index.get(item_id, {})) + fields["item_id"] = item_id + fields["score"] = float(score) + try: + items.append(RecommendItem.model_validate(fields)) + except ValidationError as exc: + logger.warning( + "metadata_serialization_failed", + item_id=str(item_id), + error=str(exc)[:200], + recipe=recipe_name, + ) + if recipe_name: + _metrics.inc_metadata_serialization_error(recipe_name, verb) + # Fallback: serve item with only item_id and score. + bare: dict[str, Any] = {"item_id": item_id, "score": float(score)} + try: + items.append(RecommendItem.model_validate(bare)) + fallback_count += 1 + except ValidationError: + # Even bare item fails (e.g. invalid item_id) — drop it. + dropped_count += 1 + return items, fallback_count, dropped_count + + def _apply_build_items_degraded( + items_result: tuple[list[RecommendItem], int, int], + response: Response, + recipe_name: str, + verb: str, + ) -> list[RecommendItem]: + """Apply degraded-item side-effects and return the item list.""" + items, fallback_count, dropped_count = items_result + degraded = fallback_count + dropped_count + if degraded > 0: + response.headers["X-Recotem-Items-Degraded"] = str(degraded) + if fallback_count > 0: + _metrics.inc_metadata_degraded_items( + recipe_name, verb, "fallback", fallback_count + ) + if dropped_count > 0: + _metrics.inc_metadata_degraded_items( + recipe_name, verb, "dropped", dropped_count + ) + return items - # ------------------------------------------------------------------ - # POST /predict/{name} - # ------------------------------------------------------------------ + def _any_seed_known( + entry: ModelEntry, seed_items: list[str], name: str + ) -> bool | None: + """Return True if at least one seed is known to the model id-map. - @router.post( - "/predict/{name}", - response_model=PredictResponse, - summary="Get recommendations for a single user", + Returns None when the recommender layout is unexpected (caller must + treat this as INTERNAL_ERROR rather than UNKNOWN_SEED_ITEMS). + + Used to distinguish ``UNKNOWN_SEED_ITEMS`` (no seed in id-map) from + ``NO_CANDIDATES`` (some seeds known but the ranker produced no + survivors after its own filtering / score-thresholding). + """ + try: + mapper = entry.recommender._mapper + id_map = mapper.item_id_to_index + except AttributeError as exc: + # Unexpected recommender layout — log and signal to caller. + logger.warning( + "recommender_layout_unexpected", + recipe=name, + exc_type=type(exc).__name__, + ) + _metrics.inc_recommender_layout_unexpected(name) + return None + return any(str(s) in id_map for s in seed_items) + + @router.get("/health", summary="Overall health status (probe-safe)") + def health(response: Response) -> dict[str, Any]: + # Intentional design difference vs /health/details: this probe endpoint + # uses count-based degraded detection (loaded < total) under a single + # lock acquisition so the two numbers are consistent with each other. + # /health/details performs a per-recipe error scan for richer operator + # diagnostics; see health_details below. + loaded_count, total = registry.health_counts() + overall = "ok" if total == 0 or loaded_count == total else "degraded" + if overall == "degraded": + response.status_code = 503 + return {"status": overall, "total": total, "loaded": loaded_count} + + @router.get( + "/health/details", + summary="Per-recipe health detail (authenticated)", ) - def predict( - name: Annotated[str, Path(pattern=r"^[A-Za-z0-9_-]{1,64}$")], - body: PredictRequest, - request: Request, + def health_details( response: Response, kid: str = Depends(_require_auth), + ) -> dict[str, Any]: + # Intentional design difference vs /health: this operator endpoint + # checks per-recipe error fields (any last_load_error → degraded) and + # exposes per-recipe details for diagnostics. /health uses the cheaper + # count-based check so it is safe for high-frequency liveness probes. + structlog.contextvars.bind_contextvars(kid=kid) + try: + snapshot = registry.health_snapshot() + overall = "ok" + for entry_health in snapshot.values(): + if not entry_health.get("loaded", True) or entry_health.get("error"): + overall = "degraded" + break + if overall == "degraded": + response.status_code = 503 + return {"status": overall, "recipes": snapshot} + finally: + structlog.contextvars.unbind_contextvars("kid") + + if _metrics.metrics_enabled(): + + @router.get( + "/metrics", + summary="Prometheus metrics", + include_in_schema=False, + ) + def metrics_endpoint(kid: str = Depends(_require_auth)) -> Any: + structlog.contextvars.bind_contextvars(kid=kid) + try: + data, content_type = _metrics.generate_latest() + return Response(content=data, media_type=content_type) + finally: + structlog.contextvars.unbind_contextvars("kid") + + @router.post( + "/recipes/{name}:recommend", + response_model=RecommendResponse, + summary="Recommend items for a single user", + ) + def recommend( + name: str = Path(pattern=_RECIPE_NAME_RE), + body: RecommendRequest = ..., + request: Request = ..., + response: Response = ..., + kid: str = Depends(_require_auth), + ) -> Any: + request_id = request.state.request_id + verb = "recommend" + + with _request_metrics(name, verb, kid) as status_holder: + try: + entry = _resolve_entry(name, request_id, kid, status_holder) + + # S1: determine known-membership BEFORE calling irspack so a + # genuine missing user produces UNKNOWN_USER, not INTERNAL_ERROR. + # Returns None when the recommender layout is unexpected (F4). + try: + user_known: bool | None = ( + body.user_id in entry.recommender._mapper.user_id_to_index + ) + except AttributeError as _attr_exc: + # Unexpected recommender layout — mirror _any_seed_known sentinel. + logger.warning( + "recommender_layout_unexpected", + recipe=name, + verb=verb, + exc_type=type(_attr_exc).__name__, + ) + _metrics.inc_recommender_layout_unexpected(name) + user_known = ( + None # let irspack decide; None → INTERNAL_ERROR on KeyError + ) + + try: + raw_results: list[tuple[str, float]] = ( + entry.recommender.get_recommendation_for_known_user_id( + body.user_id, body.limit + ) + ) + except KeyError: + if user_known is False: + # Deterministic miss: user was not in the id-map. + status_holder[0] = "unknown_user" + raise HTTPException( + status_code=404, + detail={ + "detail": "user not seen during training", + "code": "UNKNOWN_USER", + }, + ) from None + # user_known is True or None (unexpected layout): propagate as + # INTERNAL_ERROR so layout surprises are visible, not silent. + logger.exception( + "recommender_unexpected_key_error", + recipe=name, + verb=verb, + user_id_hash=hashlib.sha256(body.user_id.encode()).hexdigest()[ + :8 + ], + ) + raise HTTPException( + status_code=500, + detail={ + "detail": "internal error", + "code": "INTERNAL_ERROR", + }, + ) from None + + exclude = ( + frozenset(body.exclude_items) if body.exclude_items else frozenset() + ) + items = _apply_build_items_degraded( + _build_items( + raw_results, exclude, entry.metadata_index, name, verb + ), + response, + name, + verb, + ) + + status_holder[0] = "ok" + response.headers["X-Recotem-Model-Version"] = entry.model_version + return RecommendResponse( + request_id=request_id, + recipe=name, + model_version=entry.model_version, + items=items, + ) + except HTTPException: + raise + except (MemoryError, RecursionError): + raise + except Exception: + raise + + @router.post( + "/recipes/{name}:recommend-related", + response_model=RecommendResponse, + summary="Recommend items related to a seed list", + ) + def recommend_related( + name: str = Path(pattern=_RECIPE_NAME_RE), + body: RecommendRelatedRequest = ..., + request: Request = ..., + response: Response = ..., + kid: str = Depends(_require_auth), ) -> Any: - """Return top-K recommendations for *user_id* using model *name*. + request_id = request.state.request_id + verb = "recommend-related" + + with _request_metrics(name, verb, kid) as status_holder: + try: + entry = _resolve_entry(name, request_id, kid, status_holder) + + seed_known = _any_seed_known(entry, body.seed_items, name) + if seed_known is None: + # M1: unexpected recommender layout — propagate as INTERNAL_ERROR. + status_holder[0] = "error" + raise HTTPException( + status_code=500, + detail={ + "detail": "internal error", + "code": "INTERNAL_ERROR", + }, + ) + if not seed_known: + status_holder[0] = "unknown_seed_items" + raise HTTPException( + status_code=404, + detail={ + "detail": "no known seed_items", + "code": "UNKNOWN_SEED_ITEMS", + }, + ) - The ``X-Request-ID`` response header is set to the request ID used - internally (echoed from the incoming ``X-Request-ID`` header when - present, otherwise a freshly generated UUID4). + try: + raw_results = entry.recommender.get_recommendation_for_new_user( + body.seed_items, body.limit + ) + except KeyError: + # S1: unexpected KeyError despite seed appearing known. + logger.exception( + "recommender_unexpected_key_error", + recipe=name, + verb=verb, + seed_items_count=len(body.seed_items), + ) + raise HTTPException( + status_code=500, + detail={ + "detail": "internal error", + "code": "INTERNAL_ERROR", + }, + ) from None + + if not raw_results: + status_holder[0] = "no_candidates" + raise HTTPException( + status_code=404, + detail={ + "detail": "no candidates produced by ranker", + "code": "NO_CANDIDATES", + }, + ) - Status labels recorded via :func:`~recotem.serving.metrics.record_predict`: + exclude = ( + frozenset(body.exclude_items) if body.exclude_items else frozenset() + ) + items = _apply_build_items_degraded( + _build_items( + raw_results, exclude, entry.metadata_index, name, verb + ), + response, + name, + verb, + ) - - ``ok`` — successful recommendation - - ``user_not_found`` — user was not in training data (HTTP 404) - - ``unavailable`` — recipe not loaded or unhealthy (HTTP 503) - - ``error`` — any other unexpected exception - """ - raw_rid = request.headers.get("x-request-id", "") - request_id = raw_rid if _REQUEST_ID_RE.match(raw_rid) else str(uuid.uuid4()) - # Set on the background Response object for non-predict paths (errors); - # the success path returns JSONResponse with its own headers dict below. - response.headers["X-Request-ID"] = request_id - start = time.monotonic() - status = "error" + status_holder[0] = "ok" + response.headers["X-Recotem-Model-Version"] = entry.model_version + return RecommendResponse( + request_id=request_id, + recipe=name, + model_version=entry.model_version, + items=items, + ) + except HTTPException: + raise + except (MemoryError, RecursionError): + raise + except Exception: + raise + @router.post( + "/recipes/{name}:batch-recommend", + response_model=BatchRecommendResponse, + summary="Recommend items for multiple users", + ) + def batch_recommend( + name: str = Path(pattern=_RECIPE_NAME_RE), + body: BatchRecommendRequest = ..., + request: Request = ..., + response: Response = ..., + kid: str = Depends(_require_auth), + ) -> Any: + request_id = request.state.request_id + verb = "batch-recommend" + + with _request_metrics(name, verb, kid) as status_holder: + try: + entry = _resolve_entry(name, request_id, kid, status_holder) + + _metrics.observe_batch_size(name, verb, len(body.requests)) + + results: list[BatchResultOk | BatchResultErr] = [] + aggregate_limit = 0 + for idx, raw in enumerate(body.requests): + if not isinstance(raw, dict): + results.append( + _batch_error_entry( + idx, "VALIDATION_ERROR", "request must be an object" + ) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + try: + single = RecommendRequest.model_validate(raw) + except ValidationError as exc: + _msg = _format_batch_validation_message(exc) + logger.warning( + "batch_element_validation_failed", + recipe=name, + verb=verb, + idx=idx, + errors=_sanitize_validation_errors(exc), + ) + results.append( + _batch_error_entry(idx, "VALIDATION_ERROR", _msg) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + if aggregate_limit + single.limit > BATCH_AGGREGATE_LIMIT: + results.append( + _batch_error_entry( + idx, + "VALIDATION_ERROR", + f"aggregate limit cap exceeded: " + f"{BATCH_AGGREGATE_LIMIT}", + ) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + aggregate_limit += single.limit + # F5: initialize user_known at top of each iteration so + # stale values from a previous iteration cannot leak on + # future refactors. + batch_user_known: bool | None = True + try: + # S1/F4: check membership before calling irspack. + # Returns None when the recommender layout is unexpected. + try: + batch_user_known = ( + single.user_id + in entry.recommender._mapper.user_id_to_index + ) + except AttributeError as _attr_exc: + # Mirror _any_seed_known sentinel: log + metric + None. + logger.warning( + "recommender_layout_unexpected", + recipe=name, + verb=verb, + exc_type=type(_attr_exc).__name__, + ) + _metrics.inc_recommender_layout_unexpected(name) + batch_user_known = None + + raw_results = ( + entry.recommender.get_recommendation_for_known_user_id( + single.user_id, single.limit + ) + ) + exclude = ( + frozenset(single.exclude_items) + if single.exclude_items + else frozenset() + ) + meta = entry.metadata_index if body.include_metadata else None + items, _fb, _dr = _build_items( + raw_results, exclude, meta, name, verb + ) + if _fb + _dr > 0: + if _fb: + _metrics.inc_metadata_degraded_items( + name, verb, "fallback", _fb + ) + if _dr: + _metrics.inc_metadata_degraded_items( + name, verb, "dropped", _dr + ) + results.append( + BatchResultOk(index=idx, status="ok", items=items) + ) + except KeyError: + if batch_user_known is False: + results.append( + _batch_error_entry( + idx, "UNKNOWN_USER", "user not seen during training" + ) + ) + _metrics.inc_batch_element_error(name, verb, "UNKNOWN_USER") + else: + # batch_user_known is True or None (unexpected layout): + # propagate as INTERNAL_ERROR for observability. + logger.exception( + "recommender_unexpected_key_error", + recipe=name, + verb=verb, + idx=idx, + ) + results.append( + _batch_error_entry( + idx, "INTERNAL_ERROR", "internal error" + ) + ) + _metrics.inc_batch_element_error( + name, verb, "INTERNAL_ERROR" + ) + except (MemoryError, RecursionError): + raise + except Exception as exc: + logger.exception( + "batch_element_error", + recipe=name, + verb=verb, + idx=idx, + exc_type=type(exc).__name__, + exc_module=type(exc).__module__, + ) + results.append( + _batch_error_entry(idx, "INTERNAL_ERROR", "internal error") + ) + _metrics.inc_batch_element_error(name, verb, "INTERNAL_ERROR") + + status_holder[0] = "ok" + response.headers["X-Recotem-Model-Version"] = entry.model_version + return BatchRecommendResponse( + request_id=request_id, + recipe=name, + model_version=entry.model_version, + results=results, + ) + except HTTPException: + raise + except (MemoryError, RecursionError): + raise + except Exception: + raise + + @router.post( + "/recipes/{name}:batch-recommend-related", + response_model=BatchRecommendResponse, + summary="Recommend items related to multiple seed lists", + ) + def batch_recommend_related( + name: str = Path(pattern=_RECIPE_NAME_RE), + body: BatchRecommendRelatedRequest = ..., + request: Request = ..., + response: Response = ..., + kid: str = Depends(_require_auth), + ) -> Any: + request_id = request.state.request_id + verb = "batch-recommend-related" + + with _request_metrics(name, verb, kid) as status_holder: + try: + entry = _resolve_entry(name, request_id, kid, status_holder) + + _metrics.observe_batch_size(name, verb, len(body.requests)) + + results: list[BatchResultOk | BatchResultErr] = [] + aggregate_limit = 0 + for idx, raw in enumerate(body.requests): + if not isinstance(raw, dict): + results.append( + _batch_error_entry( + idx, "VALIDATION_ERROR", "request must be an object" + ) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + try: + single = RecommendRelatedRequest.model_validate(raw) + except ValidationError as exc: + _msg = _format_batch_validation_message(exc) + logger.warning( + "batch_element_validation_failed", + recipe=name, + verb=verb, + idx=idx, + errors=_sanitize_validation_errors(exc), + ) + results.append( + _batch_error_entry(idx, "VALIDATION_ERROR", _msg) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + if aggregate_limit + single.limit > BATCH_AGGREGATE_LIMIT: + results.append( + _batch_error_entry( + idx, + "VALIDATION_ERROR", + f"aggregate limit cap exceeded: " + f"{BATCH_AGGREGATE_LIMIT}", + ) + ) + _metrics.inc_batch_element_error(name, verb, "VALIDATION_ERROR") + continue + aggregate_limit += single.limit + try: + seed_known = _any_seed_known(entry, single.seed_items, name) + if seed_known is None: + # M1: unexpected layout — INTERNAL_ERROR for this element. + results.append( + _batch_error_entry( + idx, "INTERNAL_ERROR", "internal error" + ) + ) + _metrics.inc_batch_element_error( + name, verb, "INTERNAL_ERROR" + ) + continue + if not seed_known: + results.append( + _batch_error_entry( + idx, + "UNKNOWN_SEED_ITEMS", + "no known seed_items", + ) + ) + _metrics.inc_batch_element_error( + name, verb, "UNKNOWN_SEED_ITEMS" + ) + continue + try: + raw_results = ( + entry.recommender.get_recommendation_for_new_user( + single.seed_items, single.limit + ) + ) + except KeyError: + # S1: unexpected KeyError despite seed appearing known. + logger.exception( + "recommender_unexpected_key_error", + recipe=name, + verb=verb, + idx=idx, + ) + results.append( + _batch_error_entry( + idx, "INTERNAL_ERROR", "internal error" + ) + ) + _metrics.inc_batch_element_error( + name, verb, "INTERNAL_ERROR" + ) + continue + if not raw_results: + results.append( + _batch_error_entry( + idx, + "NO_CANDIDATES", + "no candidates produced by ranker", + ) + ) + _metrics.inc_batch_element_error( + name, verb, "NO_CANDIDATES" + ) + continue + exclude = ( + frozenset(single.exclude_items) + if single.exclude_items + else frozenset() + ) + meta = entry.metadata_index if body.include_metadata else None + items, _fb, _dr = _build_items( + raw_results, exclude, meta, name, verb + ) + if _fb + _dr > 0: + if _fb: + _metrics.inc_metadata_degraded_items( + name, verb, "fallback", _fb + ) + if _dr: + _metrics.inc_metadata_degraded_items( + name, verb, "dropped", _dr + ) + results.append( + BatchResultOk(index=idx, status="ok", items=items) + ) + except (MemoryError, RecursionError): + raise + except Exception as exc: + logger.exception( + "batch_element_error", + recipe=name, + verb=verb, + idx=idx, + exc_type=type(exc).__name__, + exc_module=type(exc).__module__, + ) + results.append( + _batch_error_entry(idx, "INTERNAL_ERROR", "internal error") + ) + _metrics.inc_batch_element_error(name, verb, "INTERNAL_ERROR") + + status_holder[0] = "ok" + response.headers["X-Recotem-Model-Version"] = entry.model_version + return BatchRecommendResponse( + request_id=request_id, + recipe=name, + model_version=entry.model_version, + results=results, + ) + except HTTPException: + raise + except (MemoryError, RecursionError): + raise + except Exception: + raise + + @router.get( + "/recipes", + response_model=RecipesListResponse, + summary="List loaded recipes", + ) + def list_recipes(kid: str = Depends(_require_auth)) -> dict[str, Any]: + structlog.contextvars.bind_contextvars(kid=kid) + try: + all_entries = registry.list() + total = len(all_entries) + summaries: list[dict[str, Any]] = [] + for e in all_entries: + if not e.loaded: + continue + summaries.append( + { + "name": e.name, + "model_version": e.model_version if e.artifact_sha256 else None, + "loaded_at": e.loaded_at, + "supported_verbs": e.supported_verbs, + "kind": e.kind, + } + ) + shown = len(summaries) + if shown < total: + logger.debug( + "recipes_list_filtered", + total=total, + shown=shown, + ) + return {"recipes": summaries} + finally: + structlog.contextvars.unbind_contextvars("kid") + + @router.get( + "/recipes/{name}", + response_model=RecipeDetailResponse, + summary="Get recipe detail", + ) + def recipe_detail( + name: str = Path(pattern=_RECIPE_NAME_RE), + request: Request = ..., + kid: str = Depends(_require_auth), + ) -> dict[str, Any]: + request_id = request.state.request_id + structlog.contextvars.bind_contextvars(kid=kid, recipe=name) try: - entry = registry.get(name) - # Only refuse predictions when the recipe has no usable model. - # ``last_load_error`` alone is *not* a 503 condition: when a fresh - # artifact fails to verify the watcher leaves the previous model - # loaded and only flags ``last_load_error`` (see watcher._mark_error - # — "stale-but-loaded keeps serving"). Surfacing that as 503 here - # would defeat the hot-swap availability contract. - if entry is None: - reason = "no_entry" + e = registry.get(name) + if e is None: logger.warning( - "recipe_unavailable", + "recipe_not_found", name=name, - reason=reason, request_id=request_id, + kid=kid, ) - status = "unavailable" raise HTTPException( - status_code=503, + status_code=404, detail={ - "detail": f"Recipe '{name}' is not loaded or unhealthy", - "code": "recipe_unavailable", + "detail": f"Recipe '{name}' not found", + "code": "RECIPE_NOT_FOUND", }, ) - if not entry.loaded or entry.recommender is None: - reason = "not_loaded" if not entry.loaded else "recommender_none" + if not e.loaded: logger.warning( - "recipe_unavailable", + "recipe_not_loaded", name=name, - reason=reason, - last_load_error=entry.last_load_error, request_id=request_id, + kid=kid, ) - status = "unavailable" raise HTTPException( status_code=503, detail={ - "detail": f"Recipe '{name}' is not loaded or unhealthy", - "code": "recipe_unavailable", + "detail": f"Recipe '{name}' is registered but not loaded", + "code": "RECIPE_UNAVAILABLE", }, ) - - structlog.contextvars.bind_contextvars( - recipe=name, request_id=request_id, kid=kid - ) - try: - raw_results: list[tuple[str, float]] = ( - entry.recommender.get_recommendation_for_known_user_id( - body.user_id, body.cutoff + hdr = e.header + return { + "name": e.name, + "model_version": e.model_version if e.artifact_sha256 else None, + "loaded_at": e.loaded_at, + "supported_verbs": e.supported_verbs, + "kind": e.kind, + "config_digest": e.config_digest or None, + "algorithms": e.algorithms or [], + "best_algorithm": e.best_class or "", + "trained_at": hdr.get("trained_at"), + "best_class": hdr.get("best_class"), + "best_params": hdr.get("best_params"), + "best_score": ( + # Guard against NaN/Inf from old artifacts or buggy trainers. + # RecommendItem.score uses allow_inf_nan=False; apply the same + # posture here so the response is always valid JSON (M6). + _raw_score + if ( + (_raw_score := hdr.get("best_score")) is None + or (isinstance(_raw_score, float) and math.isfinite(_raw_score)) + or not isinstance(_raw_score, float) ) - ) - except KeyError: - status = "user_not_found" - raise HTTPException( - status_code=404, - detail={ - "detail": ( - f"User '{body.user_id}' was not seen during training" - ), - "code": "user_not_found", - }, - ) from None - finally: - # Only unbind the keys this handler bound — do NOT call - # clear_contextvars() which would also wipe upstream bindings - # set by middleware (e.g. request-id, correlation-id). - structlog.contextvars.unbind_contextvars("recipe", "request_id", "kid") - - # Build item list as plain dicts, joining metadata if available. - # Fast path: use the pre-flattened metadata_index (O(1) dict.get - # per item, deny filtering and NaN→None already applied at load - # time). Fallback to the DataFrame path only for entries that - # pre-date the index field (e.g. stubs created directly in tests). - # - # R-2: Return via JSONResponse(content=...) to bypass the second - # pydantic serialization pass that FastAPI performs when the route - # returns a model instance. response_model=PredictResponse is kept - # on the decorator for OpenAPI schema generation; FastAPI skips - # pydantic validation when the return value is a Response subclass. - # - # R-3: Re-set item_id and score AFTER metadata update so that a - # metadata column named "item_id" or "score" cannot shadow the - # trusted recommender values. - item_dicts: list[dict[str, Any]] = [] - meta_index = entry.metadata_index - meta_df = entry.metadata_df if meta_index is None else None - _meta_failures = 0 # I-11: count per-request metadata lookup failures - - for item_id, score in raw_results: - fields: dict[str, Any] = {} - if meta_index is not None: - fields.update(meta_index.get(item_id, {})) - elif meta_df is not None: - # Track how many items returned an empty dict due to a - # non-KeyError failure in _lookup_metadata (I-11). - _before_size = len(fields) - row = _lookup_metadata(meta_df, item_id, _deny_set, name) - if not row and item_id in meta_df.index: - # item_id was in the index but lookup returned empty — - # indicates an internal lookup failure (not a missing key). - _meta_failures += 1 - fields.update(row) - # Overwrite after metadata join: trusted recommender values - # must not be shadowed by metadata columns with the same name. - fields["item_id"] = item_id - fields["score"] = float(score) - item_dicts.append(fields) - - # name is FastAPI-validated (Path regex), trained_at/best_class/kid - # are str|None straight from the trusted artifact header and registry. - content: dict[str, Any] = { - "items": item_dicts, - "model": { - "recipe": name, - "trained_at": entry.trained_at, - "best_class": entry.best_class, - "kid": entry.kid, - }, - "request_id": request_id, + else None + ), + "metric": hdr.get("metric"), + "cutoff": hdr.get("cutoff"), + "tuning": hdr.get("tuning"), + "data_stats": hdr.get("data_stats"), + "recotem_version": hdr.get("recotem_version"), + "irspack_version": hdr.get("irspack_version"), + "recipe_hash": hdr.get("recipe_hash") or None, } - - status = "ok" - # Build response headers: always include X-Request-ID; add the - # X-Recotem-Metadata-Degraded sentinel when any metadata lookup - # failed during this request (I-11). - resp_headers: dict[str, str] = {"X-Request-ID": request_id} - if _meta_failures > 0: - resp_headers["X-Recotem-Metadata-Degraded"] = "1" - # Include X-Request-ID directly in JSONResponse headers so it is - # present regardless of how FastAPI merges background response - # headers into returned Response subclasses. - return JSONResponse( - content=content, - headers=resp_headers, - ) except HTTPException: raise except (MemoryError, RecursionError): raise - except Exception as exc: - logger.exception( - "predict_handler_unexpected_error", - name=name, - request_id=request_id, - kid=kid, - error_class=type(exc).__name__, - ) + except Exception: raise finally: - _metrics.record_predict(name, status, time.monotonic() - start) - - # ------------------------------------------------------------------ - # GET /health - # ------------------------------------------------------------------ - - @router.get("/health", summary="Overall health status (probe-safe)") - def health(response: Response) -> dict[str, Any]: - """Return aggregate health suitable for k8s readiness/liveness probes. - - Returns only ``{status, total, loaded}`` — no per-recipe detail or - sensitive key identifiers are included so this endpoint can be called - without authentication. - - Use ``GET /health/details`` (authenticated) to obtain per-recipe - breakdowns including ``kid``, ``trained_at``, and ``best_class``. - - HTTP status mirrors ``status``: - - - ``200 OK`` when every recipe is loaded and free of errors. - - ``503 Service Unavailable`` when any recipe is unloaded or carries - a ``last_load_error``. Kubernetes readiness/liveness probes only - consider the status code, so returning 200 for a degraded process - would let Pods be marked ``Ready`` while every prediction returns - 503 — defeating the rolling-upgrade safety net. - """ - snapshot = registry.health_snapshot() - total = len(snapshot) - loaded_count = sum( - 1 - for entry_health in snapshot.values() - if entry_health.get("loaded", False) and not entry_health.get("error") - ) - overall = ( - "ok" - if (loaded_count == total and total > 0 or total == 0 and loaded_count == 0) - else "degraded" - ) - # Recheck: if any entry is degraded, mark overall degraded. - for entry_health in snapshot.values(): - if not entry_health.get("loaded", True) or entry_health.get("error"): - overall = "degraded" - break - if overall == "degraded": - response.status_code = 503 - return {"status": overall, "total": total, "loaded": loaded_count} - - # ------------------------------------------------------------------ - # GET /health/details - # ------------------------------------------------------------------ - - @router.get("/health/details", summary="Per-recipe health detail (authenticated)") - def health_details( - response: Response, - kid: str = Depends(_require_auth), - ) -> dict[str, Any]: - """Return per-recipe health detail including ``kid``, ``trained_at``, - ``best_class``, and load errors. - - Requires authentication (``X-API-Key``) because the per-recipe detail - includes artifact key identifiers (``kid``) which should not be publicly - discoverable. Use ``GET /health`` for unauthenticated probe-safe status. - - Every recipe found in the recipes directory at startup appears here, - regardless of whether its artifact loaded — startup-failed recipes - are inserted as stubs with ``loaded=false`` and an ``error`` string. - - HTTP status mirrors the aggregate status: - - - ``200 OK`` when every recipe is loaded and free of errors. - - ``503 Service Unavailable`` when any recipe is unloaded or carries - a ``last_load_error``. - """ - snapshot = registry.health_snapshot() - overall = "ok" - for entry_health in snapshot.values(): - if not entry_health.get("loaded", True) or entry_health.get("error"): - overall = "degraded" - break - if overall == "degraded": - response.status_code = 503 - return {"status": overall, "recipes": snapshot} - - # ------------------------------------------------------------------ - # GET /models - # ------------------------------------------------------------------ - - @router.get("/models", summary="List loaded models") - def models( - kid: str = Depends(_require_auth), - ) -> list[dict[str, Any]]: - """Return metadata for all currently loaded models. - - Stub entries inserted for recipes whose artifact failed to load at - startup are excluded — they have no header or class to report. - Operators see those via ``/health`` instead. - """ - return [e.models_dict() for e in registry.list() if e.loaded] - - # ------------------------------------------------------------------ - # GET /metrics (opt-in via RECOTEM_METRICS_ENABLED) - # ------------------------------------------------------------------ - - if _metrics.metrics_enabled(): - - @router.get("/metrics", summary="Prometheus metrics", include_in_schema=False) - def metrics() -> Any: - """Expose Prometheus metrics. - - Requires both ``prometheus_client`` to be installed and - ``RECOTEM_METRICS_ENABLED`` to be a truthy value at app - construction time. - """ - from fastapi.responses import Response - - data, content_type = _metrics.generate_latest() - return Response(content=data, media_type=content_type) + structlog.contextvars.unbind_contextvars("kid", "recipe") return router -# --------------------------------------------------------------------------- -# Metadata join helper -# --------------------------------------------------------------------------- - - -def _lookup_metadata( - meta_df: Any, - item_id: str, - deny_set: frozenset[str], - recipe_name: str = "", -) -> dict[str, Any]: - """Return a flat dict of metadata fields for *item_id*. - - Returns an empty dict if the item is not found or any error occurs. - The documented error set that returns empty dict: - - - ``KeyError`` — item not in metadata index (normal, not an error). - - ``AttributeError`` — non-unique index returned a DataFrame instead of a - Series so ``.to_dict()`` behaves unexpectedly. - - ``TypeError`` — a non-string column name caused ``.lower()`` to fail. - - ``ValueError`` — malformed row data that cannot be iterated. - - All unexpected errors are logged at WARNING level and increment - ``recotem_metadata_lookup_errors_total`` so operators can detect - metadata misconfiguration without silencing it completely. - """ - if item_id not in meta_df.index: - return {} - try: - row = meta_df.loc[item_id] - except KeyError: - # Reaching here means item_id passed the index check above but - # loc[] still raised — possible with a non-unique index returning a - # DataFrame instead of a Series, or a corrupt index state. - # Log at WARNING so operators can detect metadata misconfiguration; - # also increment the metric so this class of error is observable in - # dashboards alongside other metadata lookup failures. - logger.warning( - "metadata_lookup_unexpected_keyerror", - recipe=recipe_name, - item_id=str(item_id), - ) - _metrics.inc_metadata_lookup_error(recipe_name) - return {} - try: - out: dict[str, Any] = {} - for k, v in row.to_dict().items(): - # Guard: skip non-string column names (M-13 — .lower() would raise - # AttributeError on an int column name). - if not isinstance(k, str): - continue - if k.lower() in deny_set: - continue - # Preserve existing NaN → None normalisation. - out[k] = None if isinstance(v, float) and math.isnan(v) else v - return out - except (AttributeError, TypeError, ValueError) as exc: - logger.warning( - "metadata_lookup_failed", - recipe=recipe_name, - item_id=str(item_id), - error_class=type(exc).__name__, - ) - _metrics.inc_metadata_lookup_error(recipe_name) - return {} +def _batch_error_entry(idx: int, code: ErrorCode, message: str) -> BatchResultErr: + return BatchResultErr( + index=idx, + status="error", + error=ErrorDetail(code=code, message=message), + ) diff --git a/src/recotem/serving/schemas.py b/src/recotem/serving/schemas.py new file mode 100644 index 00000000..2c29d5c1 --- /dev/null +++ b/src/recotem/serving/schemas.py @@ -0,0 +1,279 @@ +# src/recotem/serving/schemas.py +"""Pydantic v2 request/response models for the recotem v1 HTTP API.""" + +from __future__ import annotations + +from typing import Annotated, Any, Literal + +from pydantic import AwareDatetime, BaseModel, ConfigDict, Field + +# Aggregate ``limit`` cap across all sub-requests in a single batch call. +# Documented in docs/api-reference.md. Bounds total candidate work per HTTP +# request so a 256-element batch cannot demand 256_000 items in one go. +BATCH_AGGREGATE_LIMIT = 5000 + +# Machine-readable error codes emitted by the v1 API. Kept as a Literal +# union so OpenAPI / SDK generation produces an exhaustive enum and any +# new code added in routes/auth/app fails type-check until listed here. +ErrorCode = Literal[ + "RECIPE_NOT_FOUND", + "RECIPE_UNAVAILABLE", + "UNKNOWN_USER", + "UNKNOWN_SEED_ITEMS", + "NO_CANDIDATES", + "VALIDATION_ERROR", + "MISSING_API_KEY", + "INVALID_API_KEY", + "INTERNAL_ERROR", +] + +# --------------------------------------------------------------------------- +# Single-request inputs +# --------------------------------------------------------------------------- + +_ItemStr = Annotated[str, Field(min_length=1, max_length=256)] + + +class RecommendRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + user_id: Annotated[ + str, Field(min_length=1, max_length=256, description="User identifier") + ] + limit: Annotated[ + int, Field(ge=1, le=1000, description="Maximum number of items to return") + ] = 10 + exclude_items: Annotated[ + list[_ItemStr] | None, + Field(max_length=1000, description="Item IDs to exclude from results"), + ] = None + + +class RecommendRelatedRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + seed_items: Annotated[ + list[_ItemStr], + Field( + min_length=1, + max_length=100, + description="Item IDs to base recommendations on", + ), + ] + limit: Annotated[ + int, Field(ge=1, le=1000, description="Maximum number of items to return") + ] = 10 + exclude_items: Annotated[ + list[_ItemStr] | None, + Field(max_length=1000, description="Item IDs to exclude from results"), + ] = None + + +# --------------------------------------------------------------------------- +# Batch-request inputs +# --------------------------------------------------------------------------- +# +# Per-element schema validation is deferred to the handler so a single bad +# entry does not 422 the whole batch — instead the bad entry surfaces as +# ``BatchResultEntry(status="error", code="VALIDATION_ERROR")``. The +# ``list[dict]`` typing here only enforces the list-level invariants +# (1..256 elements). Aggregate ``limit`` is checked in the handler after +# per-element parsing. + + +class BatchRecommendRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + requests: Annotated[ + list[dict[str, Any]], + Field( + min_length=1, max_length=256, description="Individual recommend requests" + ), + ] + include_metadata: Annotated[ + bool, + Field( + description=( + "When True, include per-item metadata fields in each result " + "(same as single-recommend enrichment). Default False preserves " + "performance for large batches." + ) + ), + ] = False + + +class BatchRecommendRelatedRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + + requests: Annotated[ + list[dict[str, Any]], + Field( + min_length=1, + max_length=256, + description="Individual recommend-related requests", + ), + ] + include_metadata: Annotated[ + bool, + Field( + description=( + "When True, include per-item metadata fields in each result " + "(same as single-recommend enrichment). Default False preserves " + "performance for large batches." + ) + ), + ] = False + + +# --------------------------------------------------------------------------- +# Branded string types for artifact digests +# --------------------------------------------------------------------------- + +# ``sha256:<64 hex chars>`` — used for model_version in responses. +Sha256Hex = Annotated[str, Field(pattern=r"^sha256:[0-9a-f]{64}$")] + +# Plain 64-char hex string — used for recipe_hash in artifact headers. +HexHash = Annotated[str, Field(pattern=r"^[0-9a-f]{64}$")] + + +# --------------------------------------------------------------------------- +# Common response building blocks +# --------------------------------------------------------------------------- + + +class RecommendItem(BaseModel): + item_id: Annotated[ + str, Field(min_length=1, max_length=256, description="Item identifier") + ] + score: Annotated[ + float, Field(allow_inf_nan=False, description="Recommendation score") + ] + model_config = ConfigDict(extra="allow") + + +class ErrorDetail(BaseModel): + model_config = ConfigDict(extra="forbid") + + code: Annotated[ErrorCode, Field(description="Machine-readable error code")] + message: Annotated[ + str, Field(min_length=1, description="Human-readable error message") + ] + + +class RecommendResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + + request_id: Annotated[ + str, Field(min_length=1, description="Unique request identifier") + ] + recipe: Annotated[str, Field(min_length=1, description="Recipe name")] + model_version: Annotated[Sha256Hex, Field(description="Artifact SHA-256 digest")] + items: Annotated[ + list[RecommendItem], Field(description="Recommended items in ranked order") + ] + + +class BatchResultOk(BaseModel): + model_config = ConfigDict(extra="forbid") + + index: Annotated[ + int, + Field(ge=0, description="Zero-based index of the original sub-request"), + ] + status: Literal["ok"] + items: Annotated[ + list[RecommendItem], Field(description="Recommended items in ranked order") + ] + + +class BatchResultErr(BaseModel): + model_config = ConfigDict(extra="forbid") + + index: Annotated[ + int, + Field(ge=0, description="Zero-based index of the original sub-request"), + ] + status: Literal["error"] + error: Annotated[ErrorDetail, Field(description="Error detail")] + + +# Discriminated union: ``status`` field selects the concrete class at +# parse/serialise time so the ok/error invariant is enforced by the type +# system rather than a ``@model_validator``. +BatchResultEntry = Annotated[ + BatchResultOk | BatchResultErr, Field(discriminator="status") +] + + +class BatchRecommendResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + + request_id: Annotated[ + str, Field(min_length=1, description="Unique request identifier") + ] + recipe: Annotated[str, Field(min_length=1, description="Recipe name")] + model_version: Annotated[Sha256Hex, Field(description="Artifact SHA-256 digest")] + results: Annotated[ + list[BatchResultEntry], Field(description="Per-request results in input order") + ] + + +# --------------------------------------------------------------------------- +# Recipe discovery +# --------------------------------------------------------------------------- + + +class RecipeSummary(BaseModel): + model_config = ConfigDict(extra="forbid") + + name: Annotated[str, Field(min_length=1, description="Recipe name")] + model_version: Annotated[ + Sha256Hex | None, + Field(description="Artifact SHA-256 digest, or null for stub entries"), + ] + loaded_at: Annotated[ + AwareDatetime, + Field(description="UTC timestamp of last successful hot-swap"), + ] + supported_verbs: Annotated[ + list[ + Literal[ + "recommend", + "recommend-related", + "batch-recommend", + "batch-recommend-related", + ] + ], + Field(min_length=1, description="HTTP verbs available for this recipe"), + ] + kind: Annotated[ + Literal["user-item", "item-item"], Field(description="Recommendation kind") + ] + + +class RecipesListResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + + recipes: Annotated[list[RecipeSummary], Field(description="All loaded recipes")] + + +class RecipeDetailResponse(RecipeSummary): + config_digest: Annotated[ + Sha256Hex | None, + Field(description="SHA-256 digest of recipe config, or null when unavailable"), + ] = None + algorithms: Annotated[ + list[str], Field(description="Algorithms evaluated during training") + ] + best_algorithm: Annotated[str, Field(description="Algorithm selected by Optuna")] + trained_at: AwareDatetime | None = None + best_class: str | None = None + best_params: dict[str, Any] | None = None + best_score: float | None = None + metric: Literal["ndcg", "map", "recall", "hit"] | None = None + cutoff: Annotated[int, Field(ge=1)] | None = None + tuning: dict[str, Any] | None = None + data_stats: dict[str, Any] | None = None + recotem_version: Annotated[str, Field(pattern=r"^\d+\.\d+")] | None = None + irspack_version: Annotated[str, Field(pattern=r"^\d+\.\d+")] | None = None + recipe_hash: HexHash | None = None diff --git a/src/recotem/serving/watcher.py b/src/recotem/serving/watcher.py index c31113f0..7779d95b 100644 --- a/src/recotem/serving/watcher.py +++ b/src/recotem/serving/watcher.py @@ -23,10 +23,12 @@ from __future__ import annotations +import errno import hashlib import json import random import threading +import time as _time from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path @@ -39,6 +41,8 @@ from recotem._metrics_watcher import inc_recipes_dir_scan_failure as _inc_scan_failure from recotem.artifact.format import ArtifactError from recotem.serving import metrics as _metrics +from recotem.serving._header_utils import extract_algorithms, normalize_config_digest +from recotem.serving._naming import dedup_stub_name from recotem.serving.registry import ModelEntry, ModelRegistry if TYPE_CHECKING: @@ -181,6 +185,18 @@ class _RecipeWatchState: #: succeeded. Used by OBS-1 to demote repeated identical errors from #: WARNING to DEBUG so log aggregation is not flooded during an outage. _last_stat_error_class: str | None = None + #: Set to True after the first TypeError from artifact_path + ".sha256" + #: so subsequent polls skip the sidecar check rather than flooding logs + #: with the same warning on every poll cycle (M7). + sidecar_unsupported: bool = False + #: The yaml_mtime at which sidecar_unsupported was set. When the recipe + #: YAML changes (yaml_mtime differs from this value) the sidecar_unsupported + #: flag is cleared so the new configuration gets a fresh evaluation (C4). + sidecar_unsupported_at_mtime: float | None = None + #: Counter for consecutive transient OSErrors on sidecar reads. After + #: 3 consecutive non-ENOENT OSErrors the watcher skips sidecar checks until + #: the next mtime change to avoid triggering full reloads indefinitely (m7). + sidecar_io_error_count: int = 0 # --------------------------------------------------------------------------- @@ -417,13 +433,10 @@ def _register_yaml_failure_stub(self, yaml_file: Path, error: Exception) -> None error: The exception raised by load_recipe(). """ - stub_name = yaml_file.stem - # De-duplicate against existing names — use a suffix if needed. - _suffix = 0 - base = stub_name - while stub_name in self._states or self._registry.get(stub_name) is not None: - _suffix += 1 - stub_name = f"{base}_{_suffix}" + stub_name = dedup_stub_name( + yaml_file.stem, + lambda n: n in self._states or self._registry.get(n) is not None, + ) error_msg = f"YAML parse failed: {error}" stub = ModelEntry( @@ -437,7 +450,7 @@ def _register_yaml_failure_stub(self, yaml_file: Path, error: Exception) -> None loaded=False, ) self._registry.replace(stub_name, stub) - _metrics.inc_artifact_load_failure(stub_name) + _metrics.inc_artifact_load_failure(stub_name, reason="yaml") _metrics.set_model_loaded(stub_name, False) # Create a minimal _RecipeWatchState using a sentinel recipe object. @@ -481,6 +494,10 @@ def _scan_recipes_dir(self) -> None: scan_error_msg = f"recipes-dir scan failed: {exc}" for _name in list(self._states.keys()): self._registry.set_load_error(_name, scan_error_msg) + # W2: also bump the artifact-load-failure counter per recipe + # so Prometheus alerts on dir-scan failures can be expressed + # in terms of this counter rather than the neutral scan counter. + _metrics.inc_artifact_load_failure(_name, reason="dir_scan") return current_names = set(self._states.keys()) @@ -737,6 +754,7 @@ def _check(name: str) -> tuple[str, Any, str | None]: self._record_load_failure( _failed_recipe, f"stat timeout after {_per_future_timeout:.0f}s", + reason="timeout", ) fut.cancel() pending = set() @@ -795,7 +813,7 @@ def _process_stat_result( if entry is not None and entry.last_load_error is None: logger.warning("artifact_disappeared", name=name) self._registry.set_load_error(name, error_msg) - _metrics.inc_artifact_load_failure(name) + _metrics.inc_artifact_load_failure(name, reason="read") return # Successful stat — clear the error-class tracker (OBS-1). @@ -860,7 +878,7 @@ def _load_recipe( error=str(exc), exc_type=type(exc).__name__, ) - self._record_load_failure(name, f"read failed: {exc}") + self._record_load_failure(name, f"read failed: {exc}", reason="read") return except Exception as exc: logger.error( @@ -870,7 +888,9 @@ def _load_recipe( error=str(exc), exc_type=type(exc).__name__, ) - self._record_load_failure(name, f"unexpected read error: {exc}") + self._record_load_failure( + name, f"unexpected read error: {exc}", reason="unexpected" + ) return sha256 = _sha256_bytes(data) @@ -891,11 +911,12 @@ def _load_recipe( reason=kid_reason, kid=kid_log, ) - # Distinguish post-HMAC deserialization failures from other - # ArtifactErrors by message prefix. This prefix is set by - # unpickle_payload in artifact/signing.py. + # Classify the failure step from the error message prefix so the + # recotem_artifact_load_failures_total counter can be partitioned + # by reason (read/parse/hmac/header_json/deserialize/metadata). _err_str = str(exc) - if _err_str.startswith("deserialization failed:"): + reason = _classify_artifact_error(_err_str) + if reason == "deserialize": streak = self._post_hmac_failure_streak.get(name, 0) + 1 self._post_hmac_failure_streak[name] = streak logger.error( @@ -918,8 +939,9 @@ def _load_recipe( name=name, kid=kid_log, error=_err_str, + reason=reason, ) - self._record_load_failure(name, _err_str) + self._record_load_failure(name, _err_str, reason=reason) return except (MemoryError, RecursionError): # Never swallow OOM / stack-exhaustion in a long-running thread: @@ -934,7 +956,13 @@ def _load_recipe( exc_type=type(exc).__name__, error=str(exc), ) - self._record_load_failure(name, f"{type(exc).__name__}: {exc}") + # Reset the deserialize-streak — an unrelated exception must not + # continue accumulating a streak that was tracking a different + # failure class (M9). + self._post_hmac_failure_streak.pop(name, None) + self._record_load_failure( + name, f"{type(exc).__name__}: {exc}", reason="unexpected" + ) return new_marker = ( @@ -1009,13 +1037,27 @@ def _build_entry( metadata_df = None metadata_index = None if recipe.item_metadata is not None: - metadata_df = _load_metadata(recipe, name) - from recotem.metadata.loader import build_metadata_index + try: + metadata_df = _load_metadata(recipe, name) + from recotem.metadata.loader import build_metadata_index - deny_set: frozenset[str] = frozenset( - s.lower() for s in (self._config.metadata_field_deny or []) - ) - metadata_index = build_metadata_index(metadata_df, deny_set) + deny_set: frozenset[str] = frozenset( + s.lower() for s in (self._config.metadata_field_deny or []) + ) + _recipe_name = name + + def _on_row_error() -> None: + _metrics.inc_metadata_index_build_error(_recipe_name) + + metadata_index = build_metadata_index( + metadata_df, deny_set, on_row_error=_on_row_error + ) + except (MemoryError, RecursionError): + raise + except ArtifactError as exc: + raise ArtifactError(f"metadata load failed: {exc}") from exc + except Exception as exc: + raise ArtifactError(f"metadata load failed: {exc}") from exc return ModelEntry( name=name, @@ -1026,6 +1068,10 @@ def _build_entry( metadata_index=metadata_index, last_load_error=None, artifact_path=artifact_path, + loaded_at_unix=_time.time(), + config_digest=normalize_config_digest(header_dict.get("config_digest")) + or "", + algorithms=extract_algorithms(header_dict), ) def _mark_error(self, name: str, error: str) -> None: @@ -1038,8 +1084,8 @@ def _mark_error(self, name: str, error: str) -> None: ``set_load_error`` returns False when no entry is registered under *name*. This should be unreachable in normal operation because the watcher inserts a stub entry before any load attempt, but log it as - a warning when it does happen so we can detect ordering bugs in - future refactors (rather than silently losing the failure signal). + a warning and increment a counter when it does happen so we can + detect ordering bugs in future refactors (W1). """ ok = self._registry.set_load_error(name, error) if not ok: @@ -1048,11 +1094,17 @@ def _mark_error(self, name: str, error: str) -> None: name=name, error=error, ) + _metrics.inc_watcher_state_divergence() - def _record_load_failure(self, name: str, error: str) -> None: - """Mark the entry's load error and increment the failure metrics.""" + def _record_load_failure( + self, name: str, error: str, reason: str = "unexpected" + ) -> None: + """Mark the entry's load error and increment the failure metrics. + + *reason* labels the failure step for ``recotem_artifact_load_failures_total``. + """ self._mark_error(name, error) - _metrics.inc_artifact_load_failure(name) + _metrics.inc_artifact_load_failure(name, reason=reason) _metrics.record_swap(name, ok=False) @@ -1068,6 +1120,44 @@ def _record_load_failure(self, name: str, error: str) -> None: _KID_LOG_MAX_LEN: int = 64 +def _classify_artifact_error(err_msg: str) -> str: + """Map an ``ArtifactError`` message to the load-failure reason label. + + The reason label is used as a Prometheus metric label on + ``recotem_artifact_load_failures_total`` so operators can distinguish + bad-signature, corrupt-payload, missing-metadata, and similar failure + modes for alerting (e.g. an HMAC spike is a security signal; a metadata + spike is a data-pipeline signal). Classification is deliberately + message-prefix based to match the stable wording chosen at each + ArtifactError raise site in ``artifact/format.py`` and + ``artifact/signing.py``. + """ + lower = err_msg.lower() + if lower.startswith("deserialization failed:"): + return "deserialize" + if lower.startswith("metadata load failed:"): + return "metadata" + if lower.startswith("header json"): + return "header_json" + if "hmac verification failed" in lower or "unknown kid" in lower: + return "hmac" + if ( + "artifact too short" in lower + or "magic" in lower + or "reserved bytes" in lower + or "kid is not valid" in lower + or "header json is not valid" in lower + or "header_len" in lower + or "version" in lower + ): + return "parse" + logger.warning( + "artifact_error_unclassified", + message=err_msg[:200], + ) + return "unexpected" + + def _extract_kid_safe(data: bytes) -> tuple[str, str | None]: """Best-effort extraction of kid from raw artifact bytes. @@ -1076,10 +1166,9 @@ def _extract_kid_safe(data: bytes) -> tuple[str, str | None]: - On success: ``(sanitised_kid, None)`` — *sanitised_kid* is already processed through ``_format_kid_for_log`` (length-capped, non-printables hex-escaped). - - On structural failure: ``("\\x00<unparseable>", reason)`` — the sentinel - contains a ``\\x00`` byte that ``_format_kid_for_log`` renders as - ``\\x00``, making it impossible to collide with a valid UTF-8 kid; any - ``KeyRing`` lookup will reject it immediately. + - On structural failure: ``("<extract_failed>", reason)`` — the sentinel + is a fixed string that makes it impossible to collide with a valid UTF-8 + kid; any ``KeyRing`` lookup will reject it immediately. *reason* is one of ``"too_short"``, ``"kid_len_out_of_range"``, ``"truncated"``, or ``"decode_error"``. @@ -1092,7 +1181,7 @@ def _extract_kid_safe(data: bytes) -> tuple[str, str | None]: """ from recotem.artifact.format import FIXED_PREFIX_SIZE, MAX_KID_LEN - _UNPARSEABLE_SENTINEL = "\x00<unparseable>" + _UNPARSEABLE_SENTINEL = "<extract_failed>" try: if len(data) < FIXED_PREFIX_SIZE: @@ -1144,43 +1233,123 @@ def _check_sidecar_changed(state: _RecipeWatchState) -> bool: ``False`` if unchanged or absent (let the outer marker comparison decide). """ artifact_path = state.artifact_path + + # Short-circuit: if a previous poll already determined that sidecar + # construction is unsupported for this path, re-evaluate only if the + # recipe YAML mtime has changed since the flag was set (C4). + if state.sidecar_unsupported: + import os as _os + + yaml_mtime: float | None = None + try: + recipe_yaml = getattr(state.recipe, "_yaml_path", None) or getattr( + state.recipe, "yaml_path", None + ) + if recipe_yaml is not None: + yaml_mtime = _os.stat(recipe_yaml).st_mtime + except OSError: + pass + if ( + yaml_mtime is None + or state.sidecar_unsupported_at_mtime is None + or yaml_mtime == state.sidecar_unsupported_at_mtime + ): + return False + # YAML mtime changed — clear the flag and re-evaluate. + state.sidecar_unsupported = False + state.sidecar_unsupported_at_mtime = None + # Only meaningful for local-FS paths where we can form a sibling sidecar. # For remote URIs (s3://, gs://) this is a no-op; the marker comparison # (ETag / mtime) is already cheap enough. try: sidecar_path = Path(artifact_path + ".sha256") - except TypeError: + except TypeError as exc: + logger.warning( + "sidecar_path_type_error", + path=str(artifact_path), + exc_type=type(exc).__name__, + ) + import os as _os2 + + yaml_mtime2: float | None = None + try: + recipe_yaml2 = getattr(state.recipe, "_yaml_path", None) or getattr( + state.recipe, "yaml_path", None + ) + if recipe_yaml2 is not None: + yaml_mtime2 = _os2.stat(recipe_yaml2).st_mtime + except OSError: + pass + state.sidecar_unsupported = True + state.sidecar_unsupported_at_mtime = yaml_mtime2 return False if not sidecar_path.exists(): + state.sidecar_io_error_count = 0 return False try: sidecar_contents = sidecar_path.read_text(encoding="utf-8") + state.sidecar_io_error_count = 0 except OSError as exc: # Can't read the sidecar — be conservative and let the full stat run. # Distinguish ENOENT (sidecar was deleted between exists() and read_text) # from other OS errors (permission denied, I/O error) so operators can # diagnose misconfigured file permissions without reading raw tracebacks. - import errno # noqa: PLC0415 - if exc.errno == errno.ENOENT: - logger.debug( - "sidecar_read_failed", - path=str(sidecar_path), - error_class=type(exc).__name__, - reason="ENOENT", - ) + # W3: if the sidecar was present on the previous poll + # (last_sidecar_contents is not None), emit a one-time WARNING so + # operators can detect tooling that is removing sidecar files. + if state.last_sidecar_contents is not None: + logger.warning( + "sidecar_disappeared", + path=str(sidecar_path), + ) + state.last_sidecar_contents = None + else: + logger.debug( + "sidecar_read_failed", + path=str(sidecar_path), + error_class=type(exc).__name__, + reason="ENOENT", + ) # ENOENT: sidecar deleted between exists() and read_text — treat as # absent (no change signal); let the full-stat path decide. + state.sidecar_io_error_count = 0 return False else: + state.sidecar_io_error_count += 1 logger.warning( "sidecar_read_failed", path=str(sidecar_path), error_class=type(exc).__name__, errno=exc.errno, ) + if state.sidecar_io_error_count >= 3: + # After 3 consecutive non-ENOENT errors, stop triggering full + # reloads on every tick to avoid a reload storm from a + # persistently unreadable sidecar (m7). The flag is cleared + # when the next yaml_mtime change is detected (C4 logic above). + import os as _os3 + + yaml_mtime3: float | None = None + try: + recipe_yaml3 = getattr(state.recipe, "_yaml_path", None) or getattr( + state.recipe, "yaml_path", None + ) + if recipe_yaml3 is not None: + yaml_mtime3 = _os3.stat(recipe_yaml3).st_mtime + except OSError: + pass + state.sidecar_unsupported = True + state.sidecar_unsupported_at_mtime = yaml_mtime3 + state.sidecar_io_error_count = 0 + logger.warning( + "sidecar_io_errors_suppressed", + path=str(sidecar_path), + ) + return False # Non-ENOENT OSError (e.g. PermissionError, I/O error): trigger a # reload so that if the main artifact read also fails, _record_load_failure # surfaces the problem in /health (I-10). @@ -1294,7 +1463,18 @@ def build_initial_states( state.last_sidecar_contents = sidecar_path.read_text( encoding="utf-8" ) - except (TypeError, OSError): - pass # Remote URIs or unreadable sidecars: leave as None. + except TypeError as exc: + logger.warning( + "sidecar_path_type_error", + path=str(artifact_path), + exc_type=type(exc).__name__, + ) + except OSError as exc: + if not isinstance(exc, FileNotFoundError): + logger.warning( + "sidecar_read_failed", + path=str(artifact_path), + exc_type=type(exc).__name__, + ) states[recipe.name] = state return states diff --git a/tests/conftest.py b/tests/conftest.py index e7385531..93bf2309 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -345,3 +345,126 @@ def movielens_small_df(movielens_df: pd.DataFrame) -> pd.DataFrame: """50 most-active users from MovieLens100K — fast training slice.""" top_users = movielens_df["user_id"].value_counts().head(50).index return movielens_df[movielens_df["user_id"].isin(top_users)].reset_index(drop=True) + + +# --------------------------------------------------------------------------- +# Serving app builder for unit / integration tests +# --------------------------------------------------------------------------- +# +# ``create_app`` (in src/recotem/serving/app.py) registers a RequestIDMiddleware +# that sets ``request.state.request_id`` and two custom exception handlers +# that flatten error bodies and route 422s through a validation handler. +# +# Most tests don't want to build a full ServeConfig; they just want a router +# wired to a hand-built ModelRegistry. This helper mirrors what create_app +# does (middleware + handlers) so the tests exercise the production response +# shape without dragging in recipes-dir loading. +# --------------------------------------------------------------------------- + + +def build_v1_app( + registry, + api_keys=None, +): + """Build a FastAPI app mounting the v1 router with production middleware. + + Parameters + ---------- + registry: + ``ModelRegistry`` populated with the model entries the test needs. + api_keys: + Optional list of ``ApiKeyEntry`` (defaults to []). + + Returns + ------- + FastAPI + Application with the v1 router mounted at ``/v1`` plus the + RequestIDMiddleware and the flat-body / validation-error + exception handlers registered exactly as ``create_app`` would. + """ + + from fastapi import FastAPI, Request + from fastapi.exceptions import HTTPException, RequestValidationError + from fastapi.responses import JSONResponse + + from recotem.serving import metrics as _metrics + from recotem.serving.app import ( + _DEFAULT_DETAIL_FOR, + _V1_VERB_PATH_RE, + RequestIDMiddleware, + ) + from recotem.serving.routes import make_router + + app = FastAPI() + + @app.exception_handler(HTTPException) + async def _http_exception_handler( + request: Request, exc: HTTPException + ) -> JSONResponse: + headers = getattr(exc, "headers", None) + if isinstance(exc.detail, dict): + content: dict[str, Any] = dict(exc.detail) + content.setdefault( + "detail", _DEFAULT_DETAIL_FOR.get(exc.status_code, "Error") + ) + else: + content = {"detail": exc.detail} + return JSONResponse( + status_code=exc.status_code, + content=content, + headers=headers, + ) + + @app.exception_handler(RequestValidationError) + async def _validation_error_handler( + request: Request, exc: RequestValidationError + ) -> JSONResponse: + match = _V1_VERB_PATH_RE.match(request.url.path) + if match is not None: + _metrics.record_v1_request( + recipe=match.group("name"), + verb=match.group("verb"), + status="validation_error", + latency_seconds=0.0, + ) + request_id = getattr(request.state, "request_id", "") + sanitized_errors = [ + {k: v for k, v in err.items() if k not in ("input", "ctx")} + for err in exc.errors() + ] + return JSONResponse( + status_code=422, + content={ + "request_id": request_id, + "detail": "Request validation failed", + "code": "VALIDATION_ERROR", + "errors": sanitized_errors, + }, + ) + + # Structured exception handler for unhandled non-HTTP exceptions. + # FastAPI's default 500 response is a plain text "Internal Server Error" + # string which leaks no details. We register our own handler to ensure + # the response is JSON-formatted with a stable structure that clients can + # parse, while still NOT leaking stack traces. Starlette dispatches + # HTTPException to its dedicated handler first, so we never receive it here. + @app.exception_handler(Exception) + async def _unhandled_exception_handler( + request: Request, exc: Exception + ) -> JSONResponse: + request_id = getattr(request.state, "request_id", "") + headers = {"X-Request-ID": request_id} if request_id else None + return JSONResponse( + status_code=500, + content={"detail": "internal error", "code": "INTERNAL_ERROR"}, + headers=headers, + ) + + app.add_middleware(RequestIDMiddleware) + + router = make_router( + registry=registry, + api_keys=api_keys or [], + ) + app.include_router(router, prefix="/v1") + return app diff --git a/tests/e2e/run.sh b/tests/e2e/run.sh index 7f0ba3fe..3d6913f3 100755 --- a/tests/e2e/run.sh +++ b/tests/e2e/run.sh @@ -144,7 +144,7 @@ SERVE_PID=$! echo "[e2e] Waiting for server to start (pid=${SERVE_PID})..." MAX_WAIT=30 WAITED=0 -while ! curl -sf "http://127.0.0.1:${SERVE_PORT}/health" > /dev/null 2>&1; do +while ! curl -sf "http://127.0.0.1:${SERVE_PORT}/v1/health" > /dev/null 2>&1; do sleep 1 WAITED=$((WAITED + 1)) if [ "${WAITED}" -ge "${MAX_WAIT}" ]; then @@ -157,9 +157,9 @@ echo "[e2e] Server is up." # --------------------------------------------------------------------------- # 6. Health check # --------------------------------------------------------------------------- -echo "[e2e] Checking /health..." -HEALTH=$(curl -sf "http://127.0.0.1:${SERVE_PORT}/health") -echo "[e2e] /health response: ${HEALTH}" +echo "[e2e] Checking /v1/health..." +HEALTH=$(curl -sf "http://127.0.0.1:${SERVE_PORT}/v1/health") +echo "[e2e] /v1/health response: ${HEALTH}" STATUS=$(echo "${HEALTH}" | python3 -c "import sys, json; d=json.load(sys.stdin); print(d.get('status','unknown'))") if [ "${STATUS}" != "ok" ]; then @@ -168,29 +168,149 @@ if [ "${STATUS}" != "ok" ]; then fi # --------------------------------------------------------------------------- -# 7. /predict call +# 7. /v1/recipes/{name}:recommend call # --------------------------------------------------------------------------- -echo "[e2e] Calling /predict/${RECIPE_NAME}..." +echo "[e2e] Calling /v1/recipes/${RECIPE_NAME}:recommend..." PREDICT=$(curl -sf \ -X POST \ -H "Content-Type: application/json" \ - -d "{\"user_id\": \"${PREDICT_USER_ID}\", \"cutoff\": 5}" \ - "http://127.0.0.1:${SERVE_PORT}/predict/${RECIPE_NAME}") -echo "[e2e] /predict response: ${PREDICT}" + -d "{\"user_id\": \"${PREDICT_USER_ID}\", \"limit\": 5}" \ + "http://127.0.0.1:${SERVE_PORT}/v1/recipes/${RECIPE_NAME}:recommend") +echo "[e2e] /v1/recipes/:recommend response: ${PREDICT}" -# Validate JSON shape: must have items, model, request_id +# Validate JSON shape: must have items, recipe, model_version, request_id python3 - <<PYEOF import sys, json data = json.loads('''${PREDICT}''') assert "items" in data, f"Missing 'items' key: {data}" assert isinstance(data["items"], list), "items must be a list" -assert "model" in data, f"Missing 'model' key: {data}" +assert "recipe" in data, f"Missing 'recipe' key: {data}" +assert data["recipe"] == "${RECIPE_NAME}", f"Wrong recipe name: {data['recipe']}" assert "request_id" in data, f"Missing 'request_id' key: {data}" -model = data["model"] -assert "recipe" in model, f"Missing 'recipe' in model: {model}" -assert model["recipe"] == "${RECIPE_NAME}", f"Wrong recipe name: {model['recipe']}" +assert "model_version" in data, f"Missing 'model_version' key: {data}" print("[e2e] JSON shape validation: PASSED") PYEOF +# ---- 8. GET /v1/recipes ---- +echo "[e2e] Calling GET /v1/recipes..." +RECIPES_LIST=$(curl -sf "http://127.0.0.1:${SERVE_PORT}/v1/recipes") +echo "[e2e] GET /v1/recipes response: ${RECIPES_LIST}" + +python3 - <<PYEOF +import sys, json +data = json.loads('''${RECIPES_LIST}''') +assert "recipes" in data, f"Missing 'recipes' key: {data}" +assert isinstance(data["recipes"], list), "'recipes' must be a list" +names = [r["name"] for r in data["recipes"]] +assert "${RECIPE_NAME}" in names, f"Recipe '${RECIPE_NAME}' not found in list: {names}" +print("[e2e] GET /v1/recipes validation: PASSED") +PYEOF + +# ---- 9. GET /v1/recipes/{name} ---- +echo "[e2e] Calling GET /v1/recipes/${RECIPE_NAME}..." +RECIPE_DETAIL=$(curl -sf "http://127.0.0.1:${SERVE_PORT}/v1/recipes/${RECIPE_NAME}") +echo "[e2e] GET /v1/recipes/${RECIPE_NAME} response: ${RECIPE_DETAIL}" + +python3 - <<PYEOF +import sys, json +data = json.loads('''${RECIPE_DETAIL}''') +for key in ("name", "model_version", "loaded_at", "kind", "supported_verbs"): + assert key in data, f"Missing '{key}' key: {data}" +assert data["name"] == "${RECIPE_NAME}", f"Wrong name: {data['name']}" +assert isinstance(data["supported_verbs"], list), "'supported_verbs' must be a list" +assert len(data["supported_verbs"]) > 0, "'supported_verbs' must be non-empty" +print("[e2e] GET /v1/recipes/{name} validation: PASSED") +PYEOF + +# ---- 10. Parse seed item_id from prior :recommend response ---- +SEED_ITEM_ID=$(python3 - <<PYEOF +import sys, json +data = json.loads('''${PREDICT}''') +items = data.get("items", []) +if items: + print(items[0]["item_id"]) +else: + # Fallback: item IDs in synthetic data are "i0".."i49" + print("i0") +PYEOF +) +echo "[e2e] Using seed item_id='${SEED_ITEM_ID}' for :recommend-related" + +# ---- 11. POST /v1/recipes/{name}:recommend-related ---- +echo "[e2e] Calling /v1/recipes/${RECIPE_NAME}:recommend-related..." +RELATED=$(curl -sf \ + -X POST \ + -H "Content-Type: application/json" \ + -d "{\"seed_items\": [\"${SEED_ITEM_ID}\"], \"limit\": 5}" \ + "http://127.0.0.1:${SERVE_PORT}/v1/recipes/${RECIPE_NAME}:recommend-related") +echo "[e2e] :recommend-related response: ${RELATED}" + +python3 - <<PYEOF +import sys, json +data = json.loads('''${RELATED}''') +assert "items" in data, f"Missing 'items' key: {data}" +assert isinstance(data["items"], list), "'items' must be a list" +assert len(data["items"]) >= 1, "Expected at least one related item" +assert "recipe" in data, f"Missing 'recipe' key: {data}" +assert "model_version" in data, f"Missing 'model_version' key: {data}" +assert "request_id" in data, f"Missing 'request_id' key: {data}" +print("[e2e] :recommend-related validation: PASSED") +PYEOF + +# ---- 12. POST /v1/recipes/{name}:batch-recommend ---- +echo "[e2e] Calling /v1/recipes/${RECIPE_NAME}:batch-recommend..." +# Send two requests: one known user and one unknown user +BATCH=$(curl -sf \ + -X POST \ + -H "Content-Type: application/json" \ + -d "{\"requests\": [{\"user_id\": \"${PREDICT_USER_ID}\", \"limit\": 3}, {\"user_id\": \"__definitely_unknown_user__\", \"limit\": 3}]}" \ + "http://127.0.0.1:${SERVE_PORT}/v1/recipes/${RECIPE_NAME}:batch-recommend") +echo "[e2e] :batch-recommend response: ${BATCH}" + +python3 - <<PYEOF +import sys, json +data = json.loads('''${BATCH}''') +assert "results" in data, f"Missing 'results' key: {data}" +assert isinstance(data["results"], list), "'results' must be a list" +assert len(data["results"]) == 2, f"Expected 2 results, got {len(data['results'])}" +assert "recipe" in data, f"Missing 'recipe' key: {data}" +assert "model_version" in data, f"Missing 'model_version' key: {data}" +assert "request_id" in data, f"Missing 'request_id' key: {data}" + +# First request (known user) should succeed +r0 = data["results"][0] +assert r0["index"] == 0, f"Expected index 0, got {r0['index']}" +assert r0["status"] == "ok", f"Expected status 'ok' for known user, got {r0['status']}" +assert isinstance(r0["items"], list), "items for known user must be a list" + +# Second request (unknown user) should have error status +r1 = data["results"][1] +assert r1["index"] == 1, f"Expected index 1, got {r1['index']}" +assert r1["status"] == "error", f"Expected status 'error' for unknown user, got {r1['status']}" +assert r1["error"] is not None, "error field must be present for unknown user" +assert r1["error"]["code"] == "UNKNOWN_USER", f"Expected UNKNOWN_USER code, got {r1['error']['code']}" +print("[e2e] :batch-recommend validation: PASSED") +PYEOF + +# ---- 13. X-Request-ID echo ---- +echo "[e2e] Testing X-Request-ID echo via :recommend..." +TRACED=$(curl -sf \ + -X POST \ + -H "Content-Type: application/json" \ + -H "X-Request-ID: e2e-trace-001" \ + -d "{\"user_id\": \"${PREDICT_USER_ID}\", \"limit\": 3}" \ + "http://127.0.0.1:${SERVE_PORT}/v1/recipes/${RECIPE_NAME}:recommend") +echo "[e2e] X-Request-ID echo response: ${TRACED}" + +python3 - <<PYEOF +import sys, json +data = json.loads('''${TRACED}''') +assert "request_id" in data, f"Missing 'request_id' key: {data}" +assert data["request_id"] == "e2e-trace-001", ( + f"Expected request_id 'e2e-trace-001', got {data['request_id']!r}" +) +print("[e2e] X-Request-ID echo validation: PASSED") +PYEOF + echo "[e2e] All checks passed!" exit 0 diff --git a/tests/integration/test_hot_swap_concurrency.py b/tests/integration/test_hot_swap_concurrency.py index d7988741..7faaa80e 100644 --- a/tests/integration/test_hot_swap_concurrency.py +++ b/tests/integration/test_hot_swap_concurrency.py @@ -8,9 +8,13 @@ import threading import time +from concurrent.futures import ThreadPoolExecutor from unittest.mock import MagicMock +from fastapi.testclient import TestClient + from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app def _make_entry(name: str, version: int) -> ModelEntry: @@ -114,3 +118,101 @@ def test_two_consecutive_swaps_register_second_artifact() -> None: registry.replace("r", e3) assert registry.get("r") is e3 + + +# --------------------------------------------------------------------------- +# Finding 2: concurrent HTTP recommend requests during registry.replace_with_marker +# --------------------------------------------------------------------------- + + +def _make_http_entry(version: int) -> ModelEntry: + """Build a loaded ModelEntry for HTTP testing with stable recommender.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + (f"item{i}", 1.0 - i * 0.1) for i in range(3) + ] + return ModelEntry( + name="concurrent_recipe", + recommender=rec, + header={"best_class": "TopPop"}, + kid="active", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, f"{version:064x}"), + loaded_at_unix=float(version), + ) + + +def test_http_concurrent_recommend_during_registry_replace() -> None: + """N (>=20) concurrent :recommend requests while a background thread calls + registry.replace_with_marker repeatedly. + + Assertions: + - No 500 responses; every response is 200 or 503. + - Every 200 carries X-Recotem-Model-Version matching the body's model_version. + """ + N_REQUESTS = 30 + N_SWAPS = 20 + + registry = ModelRegistry() + registry.replace("concurrent_recipe", _make_http_entry(1)) + + app = build_v1_app(registry) + # TestClient is WSGI-based; use ThreadPoolExecutor for concurrent requests + client = TestClient(app, raise_server_exceptions=False) + + errors: list[str] = [] + responses: list[tuple[int, dict]] = [] + lock = threading.Lock() + + def _do_request() -> None: + try: + r = client.post( + "/v1/recipes/concurrent_recipe:recommend", + json={"user_id": "u1", "limit": 3}, + ) + with lock: + responses.append((r.status_code, r.headers)) + if r.status_code not in (200, 503): + errors.append(f"Unexpected status {r.status_code}: {r.text[:200]}") + elif r.status_code == 200: + body = r.json() + hdr_val = r.headers.get("x-recotem-model-version", "") + body_ver = body.get("model_version", "") + if hdr_val != body_ver: + errors.append( + f"Header version {hdr_val!r} != body version {body_ver!r}" + ) + except Exception as exc: + with lock: + errors.append(f"Request raised: {exc}") + + swap_stop = threading.Event() + + def _swap_loop() -> None: + version = 2 + while not swap_stop.is_set(): + new_entry = _make_http_entry(version) + marker = (None, f"{version:064x}") + registry.replace_with_marker("concurrent_recipe", new_entry, marker) + version += 1 + time.sleep(0.001) + + swap_thread = threading.Thread(target=_swap_loop, daemon=True) + swap_thread.start() + + with ThreadPoolExecutor(max_workers=N_REQUESTS) as pool: + futures = [pool.submit(_do_request) for _ in range(N_REQUESTS)] + for f in futures: + f.result(timeout=10.0) + + swap_stop.set() + swap_thread.join(timeout=2.0) + + ok_count = sum(1 for sc, _ in responses if sc == 200) + assert len(responses) == N_REQUESTS, ( + f"Expected {N_REQUESTS} responses; got {len(responses)}" + ) + assert not errors, "Concurrent recommend/swap errors:\n" + "\n".join(errors) + assert ok_count > 0, "At least some requests must succeed with 200" diff --git a/tests/integration/test_serve_predict_e2e.py b/tests/integration/test_serve_predict_e2e.py index f73c95a9..a2cc765f 100644 --- a/tests/integration/test_serve_predict_e2e.py +++ b/tests/integration/test_serve_predict_e2e.py @@ -1,28 +1,34 @@ -"""Integration test: in-process train + serve + httpx /predict call. +"""Integration test: in-process train + serve + v1 recommend call. Uses FastAPI TestClient for synchronous testing without a real server. Trains a TopPop model on synthetic data, writes a signed artifact, -then serves it and calls /predict. +then serves it and calls the v1 recommend endpoints. """ from __future__ import annotations import hashlib +import time from pathlib import Path from unittest.mock import MagicMock -from fastapi import FastAPI from fastapi.testclient import TestClient from recotem.config import ApiKeyEntry from recotem.serving.registry import ModelEntry, ModelRegistry -from recotem.serving.routes import make_router +from tests.conftest import build_v1_app ACTIVE_KEY_HEX = "aa" * 32 +_FAKE_SHA256_HEX = "dead" * 16 # 64 lowercase hex chars for a valid Sha256Hex marker +_FAKE_CONFIG_DIGEST = "sha256:" + "cafe" * 16 # valid Sha256Hex for config_digest def _make_mock_recommender(users: list[str], items: list[str]): - """Build a MagicMock recommender that returns fixed recommendations.""" + """Build a MagicMock recommender that returns fixed recommendations. + + Sets up ``_mapper.item_id_to_index`` so the v1 ``:recommend-related`` + seed-known pre-check (added in M-4) finds the seed items. + """ rec = MagicMock() def _get_rec(user_id, cutoff=10): @@ -30,7 +36,17 @@ def _get_rec(user_id, cutoff=10): return [(iid, 1.0 - i * 0.1) for i, iid in enumerate(items[:cutoff])] raise KeyError(f"Unknown user: {user_id}") + def _get_rec_new_user(seed_items, cutoff=10): + # Return a deterministic ranking of *items* that excludes the seeds — + # mimics irspack's get_recommendation_for_new_user contract closely + # enough for the v1 :recommend-related endpoint contract test. + seed_set = set(seed_items) + ranked = [iid for iid in items if iid not in seed_set] + return [(iid, 1.0 - i * 0.1) for i, iid in enumerate(ranked[:cutoff])] + rec.get_recommendation_for_known_user_id.side_effect = _get_rec + rec.get_recommendation_for_new_user.side_effect = _get_rec_new_user + rec._mapper.item_id_to_index = {iid: i for i, iid in enumerate(items)} return rec @@ -54,7 +70,7 @@ def _make_api_entry(plaintext: str, kid: str = "api-key") -> ApiKeyEntry: def test_serve_predict_e2e_in_process() -> None: - """Train-like mock → serve → /predict returns valid response.""" + """Train-like mock → serve → v1 :recommend returns valid response.""" users = [f"user{i}" for i in range(10)] items = [f"item{i}" for i in range(20)] @@ -68,6 +84,9 @@ def test_serve_predict_e2e_in_process() -> None: "recipe_name": "test_model", }, kid="active", + # _loaded_marker[1] is the artifact sha that backs model_version. + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, ) registry = ModelRegistry() @@ -75,14 +94,12 @@ def test_serve_predict_e2e_in_process() -> None: plaintext = "integration_test_api_key_32bytes" api_entry = _make_api_entry(plaintext) - router = make_router(registry=registry, api_keys=[api_entry]) - app = FastAPI() - app.include_router(router) + app = build_v1_app(registry, api_keys=[api_entry]) client = TestClient(app) response = client.post( - "/predict/test_model", - json={"user_id": "user0", "cutoff": 5}, + "/v1/recipes/test_model:recommend", + json={"user_id": "user0", "limit": 5}, headers={"x-api-key": plaintext}, ) assert response.status_code == 200 @@ -90,11 +107,55 @@ def test_serve_predict_e2e_in_process() -> None: assert "items" in data assert len(data["items"]) == 5 assert data["items"][0]["item_id"] == "item0" - assert "model" in data - assert data["model"]["kid"] == "active" + assert data["recipe"] == "test_model" + assert data["model_version"].startswith("sha256:") assert "request_id" in data +def test_v1_related_endpoint_returns_items() -> None: + """v1 :recommend-related returns items for a known seed item.""" + users = [f"user{i}" for i in range(10)] + items = [f"item{i}" for i in range(20)] + + rec = _make_mock_recommender(users, items) + entry = ModelEntry( + name="test_model", + recommender=rec, + header={ + "best_class": "TopPopRecommender", + "trained_at": "2026-01-01T00:00:00Z", + "recipe_name": "test_model", + }, + kid="active", + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + + registry = ModelRegistry() + registry.replace("test_model", entry) + + plaintext = "integration_test_api_key_32bytes" + api_entry = _make_api_entry(plaintext) + app = build_v1_app(registry, api_keys=[api_entry]) + client = TestClient(app) + + # "item0" is a known item id produced by _make_mock_recommender; using it + # as the seed exercises the new-user (item-based) recommend path. + response = client.post( + "/v1/recipes/test_model:recommend-related", + json={"seed_items": ["item0"], "limit": 5}, + headers={"x-api-key": plaintext}, + ) + assert response.status_code == 200, response.text + data = response.json() + assert data["recipe"] == "test_model" + assert data["model_version"].startswith("sha256:") + assert "items" in data + assert len(data["items"]) >= 1 + # The seed item itself must not appear in the related results. + assert all(it["item_id"] != "item0" for it in data["items"]) + + def test_serve_predict_e2e_unknown_user_404() -> None: """Unknown user_id returns 404.""" rec = _make_mock_recommender(["known_user"], ["item1", "item2"]) @@ -103,59 +164,60 @@ def test_serve_predict_e2e_unknown_user_404() -> None: recommender=rec, header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, kid="active", + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, ) registry = ModelRegistry() registry.replace("model2", entry) - router = make_router(registry=registry, api_keys=[]) - app = FastAPI() - app.include_router(router) + app = build_v1_app(registry) client = TestClient(app, raise_server_exceptions=False) response = client.post( - "/predict/model2", + "/v1/recipes/model2:recommend", json={"user_id": "total_stranger"}, ) assert response.status_code == 404 -def test_serve_predict_e2e_missing_recipe_503() -> None: - """Recipe not in registry returns 503.""" +def test_serve_predict_e2e_missing_recipe_404() -> None: + """Recipe not in registry returns 404 (not found).""" registry = ModelRegistry() - router = make_router(registry=registry, api_keys=[]) - app = FastAPI() - app.include_router(router) + app = build_v1_app(registry) client = TestClient(app, raise_server_exceptions=False) - response = client.post("/predict/does_not_exist", json={"user_id": "user1"}) - assert response.status_code == 503 + response = client.post( + "/v1/recipes/does_not_exist:recommend", + json={"user_id": "user1"}, + ) + assert response.status_code == 404 def test_serve_health_endpoint_ok_with_loaded_model() -> None: - """GET /health returns ok when a model is loaded.""" + """GET /v1/health returns ok when a model is loaded.""" rec = _make_mock_recommender(["u1"], ["i1"]) entry = ModelEntry( name="healthy_recipe", recommender=rec, header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, kid="k1", + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, ) registry = ModelRegistry() registry.replace("healthy_recipe", entry) - router = make_router(registry=registry, api_keys=[]) - app = FastAPI() - app.include_router(router) + app = build_v1_app(registry) client = TestClient(app) - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200 data = response.json() assert data["status"] == "ok" assert data["total"] == 1 assert data["loaded"] == 1 - details_resp = client.get("/health/details") + details_resp = client.get("/v1/health/details") assert details_resp.status_code == 200 details = details_resp.json() assert "healthy_recipe" in details["recipes"] @@ -398,24 +460,24 @@ def test_broken_yaml_does_not_abort_serve_other_recipes_still_serve( app = create_app(cfg) client = TestClient(app) - # /health must be 503 because both recipes are degraded - health_resp = client.get("/health") + # /v1/health must be 503 because both recipes are degraded + health_resp = client.get("/v1/health") assert health_resp.status_code == 503 body = health_resp.json() assert body["status"] == "degraded" assert body["loaded"] == 0 assert body["total"] == 2 - # Per-recipe detail moved to /health/details (auth-gated path). - # In this test, insecure_no_auth=True, so /health/details is reachable + # Per-recipe detail moved to /v1/health/details (auth-gated path). + # In this test, insecure_no_auth=True, so /v1/health/details is reachable # without API keys. - details_resp = client.get("/health/details") + details_resp = client.get("/v1/health/details") assert details_resp.status_code == 503 details = details_resp.json() # Broken recipe must appear with loaded=false and error info. assert "broken_recipe" in details["recipes"], ( - f"broken_recipe must appear in /health/details; " + f"broken_recipe must appear in /v1/health/details; " f"got: {list(details['recipes'].keys())}" ) broken_entry = details["recipes"]["broken_recipe"] @@ -423,7 +485,7 @@ def test_broken_yaml_does_not_abort_serve_other_recipes_still_serve( f"broken YAML recipe must have loaded=false; got {broken_entry!r}" ) assert broken_entry.get("error"), ( - "broken YAML recipe must have an error string in /health/details" + "broken YAML recipe must have an error string in /v1/health/details" ) assert "YAML parse failed" in (broken_entry.get("error") or ""), ( f"error must mention YAML parse failed; got {broken_entry.get('error')!r}" @@ -431,19 +493,278 @@ def test_broken_yaml_does_not_abort_serve_other_recipes_still_serve( # Good recipe must also appear (as missing-artifact stub) assert "good_recipe" in details["recipes"], ( - "good_recipe must appear in /health/details even when artifact is missing" + "good_recipe must appear in /v1/health/details even when artifact is missing" ) - # /predict for the broken recipe must return 503 + # v1 :recommend for the broken recipe must return 503 predict_broken = client.post( - "/predict/broken_recipe", json={"user_id": "u1", "cutoff": 5} + "/v1/recipes/broken_recipe:recommend", + json={"user_id": "u1", "limit": 5}, ) assert predict_broken.status_code == 503, ( - f"broken recipe /predict must return 503; got {predict_broken.status_code}" + f"broken recipe :recommend must return 503; got {predict_broken.status_code}" ) - # /predict for the good (missing artifact) recipe must also return 503 + # v1 :recommend for the good (missing artifact) recipe must also return 503 predict_good = client.post( - "/predict/good_recipe", json={"user_id": "u1", "cutoff": 5} + "/v1/recipes/good_recipe:recommend", + json={"user_id": "u1", "limit": 5}, ) assert predict_good.status_code == 503 + + +# --------------------------------------------------------------------------- +# Shared setup helper for new v1-surface integration tests +# --------------------------------------------------------------------------- + + +def _make_registry_and_client( + users: list[str], + items: list[str], + recipe_name: str = "test_model", + plaintext: str = "integration_test_api_key_32bytes", + algorithms: list[str] | None = None, + config_digest: str = _FAKE_CONFIG_DIGEST, +) -> tuple[ModelRegistry, TestClient, str]: + """Build a populated registry, a FastAPI TestClient, and the auth key. + + Returns (registry, client, plaintext_api_key). + Used by at least two test functions — extracted to avoid duplication. + """ + rec = _make_mock_recommender(users, items) + entry = ModelEntry( + name=recipe_name, + recommender=rec, + header={ + "best_class": "TopPopRecommender", + "trained_at": "2026-01-01T00:00:00Z", + "recipe_name": recipe_name, + }, + kid="active", + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=time.time(), + algorithms=algorithms or ["TopPopRecommender"], + config_digest=config_digest, + ) + registry = ModelRegistry() + registry.replace(recipe_name, entry) + + api_entry = _make_api_entry(plaintext) + app = build_v1_app(registry, api_keys=[api_entry]) + client = TestClient(app) + return registry, client, plaintext + + +# --------------------------------------------------------------------------- +# New integration tests: batch-recommend, batch-recommend-related, discovery +# --------------------------------------------------------------------------- + + +def test_batch_recommend_train_serve_call() -> None: + """POST :batch-recommend returns per-request results for known and unknown users.""" + users = [f"user{i}" for i in range(5)] + items = [f"item{i}" for i in range(10)] + + _, client, plaintext = _make_registry_and_client(users, items) + + # Three requests: two known users, one unknown user. + response = client.post( + "/v1/recipes/test_model:batch-recommend", + json={ + "requests": [ + {"user_id": "user0", "limit": 3}, + {"user_id": "user2", "limit": 3}, + {"user_id": "totally_unknown_user", "limit": 3}, + ] + }, + headers={"x-api-key": plaintext}, + ) + assert response.status_code == 200, response.text + data = response.json() + + # Top-level envelope fields. + assert "results" in data + assert "recipe" in data + assert "model_version" in data + assert "request_id" in data + assert data["recipe"] == "test_model" + assert data["model_version"].startswith("sha256:") + + results = data["results"] + assert len(results) == 3 + + # Index 0: known user — must succeed with items. + r0 = results[0] + assert r0["index"] == 0 + assert r0["status"] == "ok" + assert isinstance(r0["items"], list) + assert len(r0["items"]) == 3 + assert r0["items"][0]["item_id"] == "item0" + + # Index 1: another known user — must also succeed. + r1 = results[1] + assert r1["index"] == 1 + assert r1["status"] == "ok" + assert isinstance(r1["items"], list) + + # Index 2: unknown user — must carry UNKNOWN_USER error, no items. + # Under the discriminated-union schema, _BatchResultErr has no "items" field. + r2 = results[2] + assert r2["index"] == 2 + assert r2["status"] == "error" + assert "items" not in r2, "_BatchResultErr must not carry 'items' field" + assert r2["error"] is not None + assert r2["error"]["code"] == "UNKNOWN_USER" + + +def test_batch_recommend_related_train_serve_call() -> None: + """POST :batch-recommend-related handles known seed items and unknown seeds.""" + users = [f"user{i}" for i in range(5)] + items = [f"item{i}" for i in range(10)] + + _, client, plaintext = _make_registry_and_client(users, items) + + # Three requests, each exercising a distinct error branch: + # - index 0: known seed → status=ok + # - index 1: every item is a seed → ranker returns [] (NO_CANDIDATES, + # the seeds are all known to the id-map but nothing is left to rank) + # - index 2: seed with no member in the id-map → UNKNOWN_SEED_ITEMS + all_item_seeds = [f"item{i}" for i in range(10)] + response = client.post( + "/v1/recipes/test_model:batch-recommend-related", + json={ + "requests": [ + {"seed_items": ["item0"], "limit": 3}, + {"seed_items": all_item_seeds, "limit": 3}, + {"seed_items": ["unknown-stranger"], "limit": 3}, + ] + }, + headers={"x-api-key": plaintext}, + ) + assert response.status_code == 200, response.text + data = response.json() + + # Top-level envelope. + assert "results" in data + assert "recipe" in data + assert "model_version" in data + assert "request_id" in data + assert data["recipe"] == "test_model" + assert data["model_version"].startswith("sha256:") + + results = data["results"] + assert len(results) == 3 + + # Index 0: known seed item — returns items, seed excluded. + r0 = results[0] + assert r0["index"] == 0 + assert r0["status"] == "ok" + assert isinstance(r0["items"], list) + assert len(r0["items"]) >= 1 + # The seed "item0" must not appear in the related results. + assert all(it["item_id"] != "item0" for it in r0["items"]) + + # Index 1: seeds known but ranker returns [] → NO_CANDIDATES. + # Under the discriminated-union schema, _BatchResultErr has no "items" field. + r1 = results[1] + assert r1["index"] == 1 + assert r1["status"] == "error" + assert "items" not in r1, "_BatchResultErr must not carry 'items' field" + assert r1["error"] is not None + assert r1["error"]["code"] == "NO_CANDIDATES" + + # Index 2: seed not in id-map → UNKNOWN_SEED_ITEMS. + r2 = results[2] + assert r2["index"] == 2 + assert r2["status"] == "error" + assert r2["error"]["code"] == "UNKNOWN_SEED_ITEMS" + + +def test_recipes_discovery_list_and_detail() -> None: + """GET /v1/recipes and /v1/recipes/{name} return full schema after model load.""" + users = [f"user{i}" for i in range(5)] + items = [f"item{i}" for i in range(10)] + + _, client, plaintext = _make_registry_and_client( + users, + items, + recipe_name="discovery_model", + algorithms=["TopPopRecommender"], + config_digest=_FAKE_CONFIG_DIGEST, + ) + + # --- GET /v1/recipes (list) --- + list_resp = client.get( + "/v1/recipes", + headers={"x-api-key": plaintext}, + ) + assert list_resp.status_code == 200, list_resp.text + list_data = list_resp.json() + + assert "recipes" in list_data + assert isinstance(list_data["recipes"], list) + names = [r["name"] for r in list_data["recipes"]] + assert "discovery_model" in names + + # Validate RecipeSummary shape in the list entry. + summary = next(r for r in list_data["recipes"] if r["name"] == "discovery_model") + assert "model_version" in summary + assert summary["model_version"].startswith("sha256:") + assert "loaded_at" in summary + # loaded_at must be a non-empty ISO-8601 UTC string. + assert summary["loaded_at"].endswith("Z") + assert "kind" in summary + assert summary["kind"] == "user-item" + assert "supported_verbs" in summary + assert isinstance(summary["supported_verbs"], list) + expected_verbs = { + "recommend", + "recommend-related", + "batch-recommend", + "batch-recommend-related", + } + assert set(summary["supported_verbs"]) == expected_verbs + + # --- GET /v1/recipes/{name} (detail) --- + detail_resp = client.get( + "/v1/recipes/discovery_model", + headers={"x-api-key": plaintext}, + ) + assert detail_resp.status_code == 200, detail_resp.text + detail = detail_resp.json() + + # RecipeDetailResponse extends RecipeSummary with config_digest, algorithms, + # and best_algorithm. + for field in ( + "name", + "model_version", + "loaded_at", + "kind", + "supported_verbs", + "config_digest", + "algorithms", + "best_algorithm", + ): + assert field in detail, f"Missing field '{field}' in detail response" + + assert detail["name"] == "discovery_model" + assert detail["model_version"].startswith("sha256:") + assert detail["loaded_at"].endswith("Z") + assert detail["kind"] == "user-item" + assert isinstance(detail["supported_verbs"], list) + assert set(detail["supported_verbs"]) == expected_verbs + + # Config digest and algorithms must match what was set in ModelEntry. + assert detail["config_digest"] == _FAKE_CONFIG_DIGEST + assert isinstance(detail["algorithms"], list) + assert "TopPopRecommender" in detail["algorithms"] + + # best_algorithm is derived from header["best_class"]. + assert detail["best_algorithm"] == "TopPopRecommender" + + # --- GET /v1/recipes/{name} for a non-existent recipe returns 404 --- + missing_resp = client.get( + "/v1/recipes/no_such_recipe", + headers={"x-api-key": plaintext}, + ) + assert missing_resp.status_code == 404 diff --git a/tests/integration/test_v1_model_version_rotation.py b/tests/integration/test_v1_model_version_rotation.py new file mode 100644 index 00000000..601576ab --- /dev/null +++ b/tests/integration/test_v1_model_version_rotation.py @@ -0,0 +1,98 @@ +# tests/integration/test_v1_model_version_rotation.py +"""T1: model_version actually rotates after a successful hot-swap. + +Asserts that model_version in the response body (and the matching +X-Recotem-Model-Version header) change after registry.replace_with_marker +swaps in a new artifact, and that both values conform to ``sha256:<64 hex>``. +""" + +from __future__ import annotations + +import re +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_SHA256_PATTERN = re.compile(r"^sha256:[0-9a-f]{64}$") + +_SHA256_HEX_A = "a" * 64 # 64 lowercase hex chars +_SHA256_HEX_B = "b" * 64 # different 64 lowercase hex chars + + +def _make_entry(name: str, sha256_hex: str) -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + rec._mapper = MagicMock() + rec._mapper.user_id_to_index = {"u1": 0} + return ModelEntry( + name=name, + recommender=rec, + header={"best_class": "TopPop"}, + kid="active", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, sha256_hex), + loaded_at_unix=1747800000.0, + ) + + +def test_model_version_rotates_after_hot_swap() -> None: + """model_version changes after registry.replace_with_marker swaps in artifact B. + + 1. Load artifact A, call :recommend, capture model_version_a. + 2. Swap in artifact B via replace_with_marker. + 3. Call :recommend again, capture model_version_b. + 4. Assert model_version_a != model_version_b and both match sha256:<64 hex>. + 5. Assert X-Recotem-Model-Version header equals body model_version in both calls. + """ + registry = ModelRegistry() + entry_a = _make_entry("demo", _SHA256_HEX_A) + registry.replace("demo", entry_a) + + app = build_v1_app(registry) + client = TestClient(app) + + # --- Call 1: artifact A --- + r1 = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 1}) + assert r1.status_code == 200, r1.text + body1 = r1.json() + model_version_a = body1["model_version"] + header_version_a = r1.headers.get("x-recotem-model-version") + + assert _SHA256_PATTERN.match(model_version_a), ( + f"model_version_a must match sha256:<64 hex>; got {model_version_a!r}" + ) + assert header_version_a == model_version_a, ( + "X-Recotem-Model-Version header must equal body model_version for artifact A" + ) + + # --- Swap in artifact B --- + entry_b = _make_entry("demo", _SHA256_HEX_B) + marker_b = (None, _SHA256_HEX_B) + registry.replace_with_marker("demo", entry_b, marker_b) + + # --- Call 2: artifact B --- + r2 = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 1}) + assert r2.status_code == 200, r2.text + body2 = r2.json() + model_version_b = body2["model_version"] + header_version_b = r2.headers.get("x-recotem-model-version") + + assert _SHA256_PATTERN.match(model_version_b), ( + f"model_version_b must match sha256:<64 hex>; got {model_version_b!r}" + ) + assert header_version_b == model_version_b, ( + "X-Recotem-Model-Version header must equal body model_version for artifact B" + ) + + # --- Core assertion --- + assert model_version_a != model_version_b, ( + "model_version must rotate after a hot-swap: " + f"model_version_a={model_version_a!r}, model_version_b={model_version_b!r}" + ) + assert model_version_a == f"sha256:{_SHA256_HEX_A}" + assert model_version_b == f"sha256:{_SHA256_HEX_B}" diff --git a/tests/integration/test_v1_yaml_deleted_runtime.py b/tests/integration/test_v1_yaml_deleted_runtime.py new file mode 100644 index 00000000..c4fc05f1 --- /dev/null +++ b/tests/integration/test_v1_yaml_deleted_runtime.py @@ -0,0 +1,188 @@ +# tests/integration/test_v1_yaml_deleted_runtime.py +"""T4: YAML deleted at runtime → v1 :recommend returns 404 or 503. + +Scenario: + 1. Start serving with a recipes dir containing one valid recipe + loaded artifact. + 2. Issue :recommend — assert 200. + 3. Delete the recipe YAML file. + 4. Wait for the watcher to observe the deletion (watches at WATCH_INTERVAL=0.05s). + 5. Issue :recommend again — assert 404 (RECIPE_NOT_FOUND) once the registry + has removed the entry, OR 503 (RECIPE_UNAVAILABLE) during transition. + Either is acceptable; we assert it is one of the two. + +Why: verifies the end-to-end HTTP path for YAML-deletion removal, complementing +the existing registry-level unit test. +""" + +from __future__ import annotations + +import time +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from recotem.artifact.signing import KeyRing +from recotem.config import ServeConfig +from recotem.serving.registry import ModelRegistry +from recotem.serving.watcher import ArtifactWatcher, _RecipeWatchState +from tests.conftest import ACTIVE_KEY_HEX, build_raw_artifact, build_v1_app + +WATCH_INTERVAL = 0.05 # seconds — must be fast for the test to be tractable + + +def _make_serve_config() -> ServeConfig: + cfg = ServeConfig() + cfg.signing_keys_raw = f"active:{ACTIVE_KEY_HEX}" + cfg.watch_interval = WATCH_INTERVAL + cfg.max_artifact_bytes = 50 * 1024 * 1024 + return cfg + + +def _write_artifact(path: Path) -> None: + """Write a minimal but valid signed artifact to *path*. + + Note: build_raw_artifact uses pickle internally (required by the artifact + format — irspack uses scipy sparse matrices which require pickle). + This is a test fixture using the same pattern as conftest.py and + test_real_watcher_hot_swap.py. + """ + import pickle # noqa: S403 # test fixture: HMAC-signed artifact under test + + payload = pickle.dumps({"tag": "v1"}, protocol=4) # noqa: S301 + data = build_raw_artifact( + kid="active", + key_hex=ACTIVE_KEY_HEX, + header_dict={ + "recipe_name": "yaml_deleted", + "best_class": "TopPop", + "trained_at": "2026-01-01T00:00:00Z", + }, + payload_bytes=payload, + ) + path.write_bytes(data) + + +def _write_recipe_yaml(recipes_dir: Path, name: str, artifact_path: Path) -> Path: + content = f"""\ +name: {name} +source: + type: csv + path: /tmp/data.csv +schema: + user_column: user_id + item_column: item_id +training: + algorithms: [TopPop] + n_trials: 1 +output: + path: {artifact_path} +""" + yaml_path = recipes_dir / f"{name}.yaml" + yaml_path.write_text(content) + return yaml_path + + +def test_yaml_deleted_at_runtime_causes_recommend_404_or_503( + tmp_path: Path, +) -> None: + """Delete recipe YAML while serving → :recommend returns 404 or 503. + + Either status code is acceptable: + - 404 (RECIPE_NOT_FOUND): registry.remove() has already fired. + - 503 (RECIPE_UNAVAILABLE): stub with loaded=False registered during transition. + The test documents both possibilities and asserts it is one of the two. + """ + recipes_dir = tmp_path / "recipes" + recipes_dir.mkdir() + artifact_path = tmp_path / "model.recotem" + + _write_artifact(artifact_path) + yaml_path = _write_recipe_yaml(recipes_dir, "yaml_deleted", artifact_path) + + kr = KeyRing(f"active:{ACTIVE_KEY_HEX}") + registry = ModelRegistry() + cfg = _make_serve_config() + + from recotem.recipe.loader import load_recipe + + recipe = load_recipe(yaml_path) + + # Force initial load on first tick by using last_sha256="". + initial_states: dict[str, _RecipeWatchState] = { + "yaml_deleted": _RecipeWatchState( + recipe=recipe, + artifact_path=str(artifact_path), + last_sha256="", + last_marker=None, + ), + } + + watcher = ArtifactWatcher( + registry=registry, + recipes_dir=recipes_dir, + serve_config=cfg, + key_ring=kr, + initial_states=initial_states, + ) + watcher.start() + + # --- Step 1: Wait for artifact to load --- + deadline = time.monotonic() + 5.0 + while time.monotonic() < deadline: + entry = registry.get("yaml_deleted") + if entry is not None and entry.loaded and entry.last_load_error is None: + break + time.sleep(0.05) + else: + watcher.stop() + watcher.join(timeout=3.0) + pytest.fail("Watcher did not load artifact within 5s") + + # --- Step 2: Confirm the entry exists in the registry before deletion --- + # (The deserialized payload is a plain dict, not a real recommender, so the + # HTTP :recommend call may fail — but the entry existence is what matters here.) + assert registry.get("yaml_deleted") is not None, ( + "Entry must exist in registry before YAML deletion" + ) + + app = build_v1_app(registry) + client = TestClient(app, raise_server_exceptions=False) + + # --- Step 3: Delete the YAML file --- + yaml_path.unlink() + + # --- Step 4: Wait for watcher to remove the registry entry --- + deadline = time.monotonic() + 5.0 + removed = False + while time.monotonic() < deadline: + entry = registry.get("yaml_deleted") + if entry is None: + removed = True + break + time.sleep(WATCH_INTERVAL) + + watcher.stop() + watcher.join(timeout=3.0) + + assert removed, ( + "Registry entry for 'yaml_deleted' must be removed after the YAML file " + "is deleted and the watcher completes at least one scan cycle." + ) + + # --- Step 5: Assert :recommend returns 404 after removal --- + r_after = client.post( + "/v1/recipes/yaml_deleted:recommend", + json={"user_id": "u1", "limit": 1}, + ) + # 404 RECIPE_NOT_FOUND: registry.remove() has fired. + # 503 RECIPE_UNAVAILABLE: stub registered during transition (not expected here + # since the watcher removes directly, but documented as acceptable). + assert r_after.status_code in (404, 503), ( + f"After YAML deletion and watcher scan, :recommend must return 404 or 503; " + f"got {r_after.status_code}: {r_after.text}" + ) + body = r_after.json() + assert body.get("code") in ("RECIPE_NOT_FOUND", "RECIPE_UNAVAILABLE"), ( + f"Error code must be RECIPE_NOT_FOUND or RECIPE_UNAVAILABLE; got {body!r}" + ) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index bc6a1de0..4e3b38e1 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -637,7 +637,7 @@ def test_serve_smoke_starts_and_responds_to_health(tmp_path: Path, monkeypatch) app_instance = create_app(cfg) client = TestClient(app_instance) - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200 @@ -906,7 +906,7 @@ def test_serve_dev_allow_unsigned_allowed_in_development_env( app_instance = create_app(cfg) client = TestClient(app_instance) - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200 diff --git a/tests/unit/test_datasource_ga4.py b/tests/unit/test_datasource_ga4.py index 264eeb4d..3b11e774 100644 --- a/tests/unit/test_datasource_ga4.py +++ b/tests/unit/test_datasource_ga4.py @@ -302,14 +302,20 @@ def _make_page(rows, row_count=None, property_quota=None, has_data_loss=False): def test_fetch_paginates_until_drained(monkeypatch) -> None: """Pagination accumulates rows across pages; short final page ends the loop. - Page 0: 100_000 rows (full page → loop continues). + Page 0: ``_PAGE_SIZE`` rows (full page → loop continues). Page 1: 4 rows (short page → loop breaks). - Total: 100_004 rows. + ``_PAGE_SIZE`` is monkey-patched to 10 to keep the mock-row count small — + the production constant (100_000) would make every full-page test build + half a million MagicMock instances and iterate them through pandas. """ + import recotem.datasource.ga4 as ga4_mod + + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + fake_mod = MagicMock() fake_client = MagicMock() - page_size = 100_000 + page_size = 10 full_rows = [ _row([f"u{i}", "i1", "20260101", "purchase"], "1") for i in range(page_size) ] @@ -319,9 +325,10 @@ def test_fetch_paginates_until_drained(monkeypatch) -> None: _row(["u3", "i3", "20260103", "purchase"], "2"), _row(["u4", "i4", "20260104", "purchase"], "5"), ] + total = page_size + len(partial_rows) pages = [ - _make_page(rows=full_rows, row_count=100_004), - _make_page(rows=partial_rows, row_count=100_004), + _make_page(rows=full_rows, row_count=total), + _make_page(rows=partial_rows, row_count=total), ] fake_client.run_report.side_effect = pages fake_mod.BetaAnalyticsDataClient.return_value = fake_client @@ -332,7 +339,7 @@ def test_fetch_paginates_until_drained(monkeypatch) -> None: src = GA4Source(_cfg(max_rows=1_000_000)) df = src.fetch(_fetch_ctx()) - assert len(df) == 100_004 + assert len(df) == total assert list(df.columns) == ["userId", "itemId", "date", "event_count"] assert df["event_count"].dtype.name == "int64" # Two API calls: one full page + one short page @@ -380,8 +387,15 @@ def test_fetch_max_rows_exceeded(monkeypatch) -> None: def test_fetch_max_pages_exceeded(monkeypatch) -> None: fake_mod = MagicMock() fake_client = MagicMock() - # Return a full page (100_000 rows) every call to never hit a short-page break - full_page_rows = [_row(["u", "i", "20260101", "purchase"], "1")] * 100_000 + + import recotem.datasource.ga4 as ga4_mod + + # Shrink page size so the production loop iterates a few rows per page, + # not 100_000. The behavior under test (max_pages cap) is independent + # of the absolute page size. + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + # Return a full page every call to never hit a short-page break + full_page_rows = [_row(["u", "i", "20260101", "purchase"], "1")] * 10 resp = _make_page(rows=full_page_rows, row_count=1_000_000) fake_client.run_report.return_value = resp fake_mod.BetaAnalyticsDataClient.return_value = fake_client @@ -389,8 +403,6 @@ def test_fetch_max_pages_exceeded(monkeypatch) -> None: monkeypatch.setitem(sys.modules, "google.analytics.data_v1beta.types", MagicMock()) # Patch the module-level get_ga4_max_pages alias to 3: - import recotem.datasource.ga4 as ga4_mod - monkeypatch.setattr(ga4_mod, "get_ga4_max_pages", lambda: 3) from recotem.datasource.ga4 import GA4Source @@ -544,12 +556,16 @@ def test_fetch_permission_denied_on_page_gt_0_raises(monkeypatch) -> None: """PermissionDenied raised on page 1 (not page 0) must still be caught.""" from google.api_core.exceptions import PermissionDenied + import recotem.datasource.ga4 as ga4_mod + + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + fake_mod = MagicMock() fake_client = MagicMock() # Page 0: return a full page so the loop continues to page 1 full_page_rows = [ - _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(100_000) + _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(10) ] page0 = _make_page(rows=full_page_rows, row_count=2_000_000) fake_client.run_report.side_effect = [page0, PermissionDenied("denied on page 1")] @@ -570,38 +586,47 @@ def test_fetch_permission_denied_on_page_gt_0_raises(monkeypatch) -> None: def test_fetch_short_page_breaks_early(monkeypatch) -> None: - """3 full pages (100k rows each) + 1 partial page = 305k rows, no error.""" + """3 full pages + 1 partial page, no error. + + Uses a tiny ``_PAGE_SIZE`` so the test builds 35 MagicMock rows rather + than 305_000. The short-page-break behavior under test is independent + of the absolute page size. + """ + import recotem.datasource.ga4 as ga4_mod + + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + fake_mod = MagicMock() fake_client = MagicMock() - page_size = 100_000 + page_size = 10 + partial_size = 5 full_rows = [ _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(page_size) ] partial_rows = [ - _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(5_000) + _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(partial_size) ] + total = page_size * 3 + partial_size pages = [ - _make_page(rows=full_rows, row_count=305_000), - _make_page(rows=full_rows, row_count=305_000), - _make_page(rows=full_rows, row_count=305_000), - _make_page(rows=partial_rows, row_count=305_000), + _make_page(rows=full_rows, row_count=total), + _make_page(rows=full_rows, row_count=total), + _make_page(rows=full_rows, row_count=total), + _make_page(rows=partial_rows, row_count=total), ] fake_client.run_report.side_effect = pages fake_mod.BetaAnalyticsDataClient.return_value = fake_client monkeypatch.setitem(sys.modules, "google.analytics.data_v1beta", fake_mod) monkeypatch.setitem(sys.modules, "google.analytics.data_v1beta.types", MagicMock()) - import recotem.datasource.ga4 as ga4_mod - monkeypatch.setattr(ga4_mod, "get_ga4_max_pages", lambda: 500) from recotem.datasource.ga4 import GA4Source src = GA4Source(_cfg(max_rows=50_000_000)) df = src.fetch(_fetch_ctx()) - assert len(df) == 305_000 + assert len(df) == total # Exactly 4 API calls (3 full + 1 partial) assert fake_client.run_report.call_count == 4 @@ -616,11 +641,12 @@ def test_fetch_wall_clock_budget_exceeded(monkeypatch) -> None: fake_mod = MagicMock() fake_client = MagicMock() - # Return a full page every call so the loop would normally continue - full_page_rows = [ - _row([f"u{i}", "i", "20260101", "purchase"], "1") for i in range(100_000) - ] - resp = _make_page(rows=full_page_rows, row_count=10_000_000) + # The fake_monotonic below makes the top-of-loop deadline check trip + # before run_report is ever consumed — so any non-empty rows list works. + resp = _make_page( + rows=[_row(["u", "i", "20260101", "purchase"], "1")], + row_count=10_000_000, + ) fake_client.run_report.return_value = resp fake_mod.BetaAnalyticsDataClient.return_value = fake_client monkeypatch.setitem(sys.modules, "google.analytics.data_v1beta", fake_mod) @@ -916,6 +942,10 @@ def test_fetch_emits_quota_warning_at_most_once_across_pages(monkeypatch) -> Non """ import structlog.testing + import recotem.datasource.ga4 as ga4_mod + + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + fake_mod = MagicMock() fake_client = MagicMock() @@ -935,7 +965,7 @@ def _empty_quota_response(rows_count, full_page): resp.property_quota = empty_quota return resp - page_size = 100_000 + page_size = 10 fake_client.run_report.side_effect = [ _empty_quota_response(page_size, full_page=True), _empty_quota_response(page_size, full_page=True), @@ -974,13 +1004,15 @@ def test_fetch_post_call_deadline_check_fires(monkeypatch) -> None: """ import time as _time + import recotem.datasource.ga4 as ga4_mod + + monkeypatch.setattr(ga4_mod, "_PAGE_SIZE", 10) + fake_mod = MagicMock() fake_client = MagicMock() full_page = _make_page( - rows=[ - _row(["u1", f"i{i}", "20260101", "purchase"], "1") for i in range(100_000) - ], - row_count=100_000, + rows=[_row(["u1", f"i{i}", "20260101", "purchase"], "1") for i in range(10)], + row_count=10, has_data_loss=False, ) fake_client.run_report.return_value = full_page diff --git a/tests/unit/test_routes_dependency_introspection.py b/tests/unit/test_routes_dependency_introspection.py deleted file mode 100644 index de1206d2..00000000 --- a/tests/unit/test_routes_dependency_introspection.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Regression test: routes.py must NOT use `from __future__ import annotations`. - -CLAUDE.md prohibits `from __future__ import annotations` in routes.py because -it defers annotation evaluation and can break FastAPI's dependency introspection -for patterns like `kid: str = Depends(_require_auth)`. - -This test locks in two invariants: -1. The source file does not contain `from __future__ import annotations`. -2. Endpoint parameters that use `Depends` (predict, models) resolve `kid` as - a plain `str` — not a forward-reference string — so FastAPI can introspect - them correctly at router-construction time. -""" - -import ast -import inspect -from pathlib import Path - -import fastapi.params - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -_ROUTES_PATH = ( - Path(__file__).parent.parent.parent / "src" / "recotem" / "serving" / "routes.py" -) - - -def _make_minimal_router(): - """Build a router with empty api_keys so all endpoints are registered.""" - from recotem.serving.registry import ModelRegistry - from recotem.serving.routes import make_router - - registry = ModelRegistry() - return make_router(registry, api_keys=[]) - - -# --------------------------------------------------------------------------- -# Source-level check -# --------------------------------------------------------------------------- - - -def test_routes_has_no_future_annotations_import(): - """routes.py source must not contain `from __future__ import annotations`.""" - source = _ROUTES_PATH.read_text(encoding="utf-8") - tree = ast.parse(source) - for node in ast.walk(tree): - if isinstance(node, ast.ImportFrom) and node.module == "__future__": - names = [alias.name for alias in node.names] - assert "annotations" not in names, ( - "routes.py contains `from __future__ import annotations`, " - "which is prohibited by CLAUDE.md because it breaks FastAPI " - "dependency introspection for `kid: str = Depends(...)` patterns." - ) - - -# --------------------------------------------------------------------------- -# Runtime dependency-introspection checks -# --------------------------------------------------------------------------- - - -def test_predict_kid_parameter_is_depends(): - """The `kid` parameter on `predict` has a Depends default, not a string annotation.""" - router = _make_minimal_router() - - # Find the predict endpoint function from the registered routes. - predict_fn = None - for route in router.routes: - if hasattr(route, "path") and route.path == "/predict/{name}": - predict_fn = route.endpoint - break - - assert predict_fn is not None, "Could not find /predict/{name} route" - - sig = inspect.signature(predict_fn) - assert "kid" in sig.parameters, "predict endpoint is missing the `kid` parameter" - - kid_param = sig.parameters["kid"] - # The annotation should resolve to the real `str` type, not the string "str". - assert kid_param.annotation is str, ( - f"Expected `kid` annotation to be `str` type, got {kid_param.annotation!r}. " - "This suggests deferred annotation evaluation is active." - ) - # The default must be a FastAPI Depends instance. - assert isinstance(kid_param.default, fastapi.params.Depends), ( - f"Expected `kid` default to be fastapi.params.Depends, " - f"got {type(kid_param.default)!r}" - ) - - -def test_models_kid_parameter_is_depends(): - """The `kid` parameter on `models` has a Depends default, not a string annotation.""" - router = _make_minimal_router() - - models_fn = None - for route in router.routes: - if hasattr(route, "path") and route.path == "/models": - models_fn = route.endpoint - break - - assert models_fn is not None, "Could not find /models route" - - sig = inspect.signature(models_fn) - assert "kid" in sig.parameters, "models endpoint is missing the `kid` parameter" - - kid_param = sig.parameters["kid"] - assert kid_param.annotation is str, ( - f"Expected `kid` annotation to be `str` type, got {kid_param.annotation!r}. " - "This suggests deferred annotation evaluation is active." - ) - assert isinstance(kid_param.default, fastapi.params.Depends), ( - f"Expected `kid` default to be fastapi.params.Depends, " - f"got {type(kid_param.default)!r}" - ) - - -def test_health_has_no_kid_parameter(): - """`/health` is unauthenticated and must NOT have a `kid` parameter.""" - router = _make_minimal_router() - - health_fn = None - for route in router.routes: - if hasattr(route, "path") and route.path == "/health": - health_fn = route.endpoint - break - - assert health_fn is not None, "Could not find /health route" - - sig = inspect.signature(health_fn) - assert "kid" not in sig.parameters, ( - "/health should be unauthenticated but has a `kid` parameter" - ) diff --git a/tests/unit/test_serving_app.py b/tests/unit/test_serving_app.py index 4806d720..7571d1c6 100644 --- a/tests/unit/test_serving_app.py +++ b/tests/unit/test_serving_app.py @@ -311,7 +311,7 @@ def test_TrustedHost_blocks_unrecognized_host(tmp_path: Path) -> None: cfg.allowed_hosts = ["allowed.example.com"] app = create_app(cfg) client = TestClient(app, raise_server_exceptions=False) - response = client.get("/health", headers={"host": "evil.attacker.com"}) + response = client.get("/v1/health", headers={"host": "evil.attacker.com"}) assert response.status_code in (400, 403, 422) @@ -325,7 +325,7 @@ def test_TrustedHost_allows_configured_host(tmp_path: Path) -> None: cfg.allowed_hosts = ["testserver"] app = create_app(cfg) client = TestClient(app) - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200 @@ -345,7 +345,7 @@ def test_CORS_blocks_unconfigured_origin(tmp_path: Path) -> None: app = create_app(cfg) client = TestClient(app) response = client.options( - "/health", + "/v1/health", headers={ "origin": "https://evil.example.com", "access-control-request-method": "GET", @@ -365,7 +365,7 @@ def test_CORS_allows_configured_origin(tmp_path: Path) -> None: app = create_app(cfg) client = TestClient(app) response = client.options( - "/health", + "/v1/health", headers={ "origin": "https://app.example.com", "access-control-request-method": "GET", @@ -376,6 +376,37 @@ def test_CORS_allows_configured_origin(tmp_path: Path) -> None: ) +def test_CORS_expose_headers_includes_recotem_headers(tmp_path: Path) -> None: + """CORSMiddleware must expose X-Request-ID, X-Recotem-Model-Version, and + X-Recotem-Items-Degraded so browser JS can read them from cross-origin responses. + + ``Access-Control-Expose-Headers`` is sent on actual cross-origin requests + (GET/POST), not on preflight (OPTIONS) per the CORS spec. + """ + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + cfg.allowed_origins = ["https://app.example.com"] + app = create_app(cfg) + client = TestClient(app) + response = client.get( + "/v1/health", + headers={"origin": "https://app.example.com"}, + ) + expose = response.headers.get("access-control-expose-headers", "") + exposed = {h.strip() for h in expose.split(",")} + for expected in ( + "X-Request-ID", + "X-Recotem-Model-Version", + "X-Recotem-Items-Degraded", + ): + assert expected in exposed, ( + f"Access-Control-Expose-Headers must include '{expected}'; got {expose!r}" + ) + + # --------------------------------------------------------------------------- # security.posture log includes unsafe_mode flag # --------------------------------------------------------------------------- @@ -494,7 +525,7 @@ def test_failed_initial_load_inserts_stub_with_loaded_false(tmp_path: Path) -> N app = create_app(cfg) client = TestClient(app) # /health (probe-safe) must return 503 when degraded - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 503 body = response.json() assert body["status"] == "degraded", ( @@ -502,7 +533,7 @@ def test_failed_initial_load_inserts_stub_with_loaded_false(tmp_path: Path) -> N ) # /health/details carries the per-recipe info (auth passed via insecure_no_auth) - response_details = client.get("/health/details") + response_details = client.get("/v1/health/details") assert response_details.status_code == 503 details = response_details.json() assert "missing_recipe" in details["recipes"] @@ -514,8 +545,8 @@ def test_failed_initial_load_inserts_stub_with_loaded_false(tmp_path: Path) -> N def test_failed_load_recipe_returns_503_on_predict(tmp_path: Path) -> None: - """/predict against a recipe whose artifact failed to load returns 503, - not 200 or 500.""" + """POST /v1/recipes/{name}:recommend against a recipe whose artifact failed + to load returns 503, not 200 or 500.""" from fastapi.testclient import TestClient from recotem.serving.app import create_app @@ -527,7 +558,10 @@ def test_failed_load_recipe_returns_503_on_predict(tmp_path: Path) -> None: app = create_app(cfg) client = TestClient(app) - response = client.post("/predict/broken", json={"user_id": "u1", "cutoff": 5}) + response = client.post( + "/v1/recipes/broken:recommend", + json={"user_id": "u1", "limit": 5}, + ) assert response.status_code == 503 @@ -591,7 +625,7 @@ def test_initial_load_metadata_field_missing_with_on_field_missing_error_marks_f app = create_app(cfg) client = TestClient(app) - response = client.get("/health") + response = client.get("/v1/health") # Same B-2 contract: a stub recipe (loaded=False) makes the overall # status degraded, which now surfaces as HTTP 503. assert response.status_code == 503 @@ -599,7 +633,7 @@ def test_initial_load_metadata_field_missing_with_on_field_missing_error_marks_f assert body["status"] == "degraded" # Per-recipe detail only available via /health/details (I-3). - response_details = client.get("/health/details") + response_details = client.get("/v1/health/details") assert response_details.status_code == 503 details = response_details.json() assert "with_bad_metadata" in details["recipes"] @@ -611,8 +645,8 @@ def test_initial_load_metadata_field_missing_with_on_field_missing_error_marks_f def test_failed_load_recipe_excluded_from_models_listing(tmp_path: Path) -> None: - """/models lists only successfully loaded recipes; stubs are hidden - (operators see them via /health instead).""" + """/v1/recipes lists only successfully loaded recipes; stubs are hidden + (operators see them via /v1/health/details instead).""" from fastapi.testclient import TestClient from recotem.serving.app import create_app @@ -624,9 +658,9 @@ def test_failed_load_recipe_excluded_from_models_listing(tmp_path: Path) -> None app = create_app(cfg) client = TestClient(app) - response = client.get("/models") + response = client.get("/v1/recipes") assert response.status_code == 200 - names = [m.get("name") for m in response.json()] + names = [r.get("name") for r in response.json().get("recipes", [])] assert "broken" not in names @@ -947,7 +981,7 @@ def test_corrupt_header_json_returns_failed_entry_not_crash(tmp_path: Path) -> N app = create_app(cfg) client = TestClient(app) - response = client.get("/health") + response = client.get("/v1/health") # Same B-2 contract: a stub recipe (loaded=False) makes overall # status degraded → HTTP 503. assert response.status_code == 503 @@ -955,7 +989,7 @@ def test_corrupt_header_json_returns_failed_entry_not_crash(tmp_path: Path) -> N assert body["status"] == "degraded" # Per-recipe detail only available via /health/details (I-3). - response_details = client.get("/health/details") + response_details = client.get("/v1/health/details") assert response_details.status_code == 503 details = response_details.json() assert "corrupt_header" in details["recipes"], ( @@ -996,7 +1030,7 @@ def test_CORS_preflight_returns_success_for_configured_origin(tmp_path: Path) -> app = create_app(cfg) client = TestClient(app) response = client.options( - "/predict/some_model", + "/v1/recipes/some_model:recommend", headers={ "origin": "https://app.example.com", "access-control-request-method": "POST", @@ -1032,7 +1066,7 @@ def test_CORS_allow_credentials_header_not_sent_for_configured_origin( app = create_app(cfg) client = TestClient(app) response = client.options( - "/health", + "/v1/health", headers={ "origin": "https://app.example.com", "access-control-request-method": "GET", @@ -1216,7 +1250,7 @@ def test_insecure_no_auth_http_request_without_key_returns_200( app = create_app(cfg) client = TestClient(app) # No X-API-Key header — must still pass with insecure_no_auth=True - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200, ( f"insecure_no_auth=True must allow unauthenticated requests; " f"got {response.status_code}" @@ -1378,9 +1412,9 @@ def test_startup_one_failed_load_does_not_block_others( app = create_app(cfg) with TestClient(app) as client: - response = client.get("/health") + response = client.get("/v1/health") # /health/details shows per-recipe breakdown (I-3); auth skipped (insecure_no_auth) - response_details = client.get("/health/details") + response_details = client.get("/v1/health/details") # /health aggregate body = response.json() @@ -1568,12 +1602,13 @@ def test_try_load_artifact_builds_metadata_index_when_item_metadata_present( key_ring = KeyRing(f"active:{ACTIVE_KEY_HEX}") - entry = _try_load_artifact(recipe, key_ring, cfg) + entry, reason = _try_load_artifact(recipe, key_ring, cfg) assert entry.loaded, ( f"_try_load_artifact must return loaded=True for a valid artifact + metadata; " - f"error={entry.last_load_error!r}" + f"error={entry.last_load_error!r}; reason={reason!r}" ) + assert reason == "ok" assert entry.metadata_index is not None, ( "metadata_index must be populated (not None) when item_metadata is present" ) @@ -1591,6 +1626,99 @@ def test_try_load_artifact_builds_metadata_index_when_item_metadata_present( ) +def test_try_load_artifact_populates_loaded_at_unix(tmp_path: Path) -> None: + """Regression: startup-scan path must populate v1 fields. + + Bug-for-bug parallel to the watcher's _build_entry fix in Task 3 of + the v1 API overhaul plan. Without this, recipes loaded at startup + report ``loaded_at: 1970-01-01T00:00:00Z`` from GET /v1/recipes until + a hot-swap occurs. + + Invariant: any ModelEntry returned with ``loaded=True`` must carry + ``loaded_at_unix > 0`` and an ``algorithms`` list / ``config_digest`` + string sourced from the header. + """ + import time as _time + + from recotem.artifact.signing import KeyRing + from recotem.config import ServeConfig + from recotem.recipe.loader import load_recipe + from recotem.serving.app import _try_load_artifact + from tests.conftest import ACTIVE_KEY_HEX, build_raw_artifact + + # build_raw_artifact provides a safe default payload (a small builtin + # dict pickled via SafeUnpickler's allow-list); we only override the + # header to carry the v1 fields we want to assert on. + artifact_path = tmp_path / "model.recotem" + artifact_path.write_bytes( + build_raw_artifact( + kid="active", + key_hex=ACTIVE_KEY_HEX, + header_dict={ + "recipe_name": "loaded_at_test", + "best_class": "TopPop", + "trained_at": "2026-01-01T00:00:00Z", + "config_digest": "deadbeef", + "algorithms": ["TopPop", "IALS"], + }, + ) + ) + + recipes_dir = tmp_path / "recipes" + recipes_dir.mkdir() + yaml_path = recipes_dir / "loaded_at_test.yaml" + yaml_path.write_text( + f"""\ +name: loaded_at_test +source: + type: csv + path: /tmp/data.csv +schema: + user_column: user_id + item_column: item_id +training: + algorithms: [TopPop] + n_trials: 1 +output: + path: {artifact_path} +""" + ) + + recipe = load_recipe(yaml_path) + + cfg = ServeConfig() + cfg.signing_keys_raw = f"active:{ACTIVE_KEY_HEX}" + cfg.max_artifact_bytes = 100 * 1024 * 1024 + cfg.max_payload_bytes = 50 * 1024 * 1024 + cfg.metadata_field_deny = [] + + key_ring = KeyRing(f"active:{ACTIVE_KEY_HEX}") + + before = _time.time() + entry, reason = _try_load_artifact(recipe, key_ring, cfg) + after = _time.time() + + assert entry.loaded, ( + f"_try_load_artifact must return loaded=True for a valid artifact; " + f"error={entry.last_load_error!r}; reason={reason!r}" + ) + assert reason == "ok" + assert entry.loaded_at_unix > 0, ( + f"startup-scan path must populate loaded_at_unix (regression: was 0.0); " + f"got {entry.loaded_at_unix!r}" + ) + assert before <= entry.loaded_at_unix <= after, ( + f"loaded_at_unix must fall within the load window " + f"[{before}, {after}]; got {entry.loaded_at_unix}" + ) + assert entry.config_digest == "sha256:deadbeef", ( + f"config_digest must be normalized to sha256:<hex>; got {entry.config_digest!r}" + ) + assert entry.algorithms == ["TopPop", "IALS"], ( + f"algorithms must be sourced from header; got {entry.algorithms!r}" + ) + + # --------------------------------------------------------------------------- # N-15: startup_parallelism — true parallel execution verified via thread IDs # --------------------------------------------------------------------------- @@ -1995,7 +2123,7 @@ def test_security_posture_signing_key_status_dev_allow_unsigned( def test_unhandled_exception_returns_structured_json_500(tmp_path: Path) -> None: """I-1: An unexpected Exception from a route handler must return HTTP 500 - with body {detail: 'internal error', code: 'internal_error'} rather than + with body {detail: 'internal error', code: 'INTERNAL_ERROR'} rather than a plain-text FastAPI default or an unhandled traceback. """ from fastapi.testclient import TestClient @@ -2020,8 +2148,8 @@ async def explode(): assert data.get("detail") == "internal error", ( f"Expected detail='internal error'; got {data!r}" ) - assert data.get("code") == "internal_error", ( - f"Expected code='internal_error'; got {data!r}" + assert data.get("code") == "INTERNAL_ERROR", ( + f"Expected code='INTERNAL_ERROR'; got {data!r}" ) @@ -2069,7 +2197,7 @@ def test_health_returns_only_aggregate_counts(tmp_path: Path) -> None: cfg = _minimal_config(tmp_path) app = create_app(cfg) client = TestClient(app) - response = client.get("/health") + response = client.get("/v1/health") assert response.status_code == 200 body = response.json() @@ -2113,7 +2241,7 @@ def test_health_details_requires_auth_when_keys_configured(tmp_path: Path) -> No app = create_app(cfg) client = TestClient(app, raise_server_exceptions=False) - response = client.get("/health/details") + response = client.get("/v1/health/details") assert response.status_code == 401, ( f"/health/details must return 401 when auth is configured; got {response.status_code}" ) @@ -2136,7 +2264,7 @@ def test_health_details_returns_per_recipe_data_when_auth_passes( app = create_app(cfg) client = TestClient(app) - response = client.get("/health/details") + response = client.get("/v1/health/details") # Degraded because artifact is missing. assert response.status_code == 503 @@ -2248,3 +2376,147 @@ def test_docs_disabled_when_env_is_staging(tmp_path: Path) -> None: assert resp.status_code == 404, ( f"RECOTEM_ENV=staging: /docs must return 404; got {resp.status_code}" ) + + +# --------------------------------------------------------------------------- +# RequestIDMiddleware — X-Request-ID contract +# --------------------------------------------------------------------------- + + +def test_request_id_header_present_on_200_response(tmp_path: Path) -> None: + """X-Request-ID must be present in a 200 response (e.g. GET /v1/health).""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app) + response = client.get("/v1/health") + assert response.status_code == 200 + assert "x-request-id" in response.headers, ( + "X-Request-ID must be present in every 200 response" + ) + assert response.headers["x-request-id"], "X-Request-ID must not be empty" + + +def test_request_id_header_present_on_404_response(tmp_path: Path) -> None: + """X-Request-ID must be present on a 404 (non-existent recipe GET).""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app, raise_server_exceptions=False) + response = client.get("/v1/recipes/no_such") + assert response.status_code == 404 + assert "x-request-id" in response.headers, ( + "X-Request-ID must be present even on 404 responses" + ) + + +def test_request_id_header_present_on_503_response(tmp_path: Path) -> None: + """X-Request-ID must be present on a 503 (unloaded recipe POST).""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + recipes_dir = Path(cfg.recipes_dir) # type: ignore[arg-type] + missing_artifact = tmp_path / "no-artifact.recotem" + _write_recipe_yaml(recipes_dir, "broken_for_rid", missing_artifact) + + app = create_app(cfg) + client = TestClient(app, raise_server_exceptions=False) + response = client.post( + "/v1/recipes/broken_for_rid:recommend", + json={"user_id": "u1", "limit": 5}, + ) + assert response.status_code == 503 + assert "x-request-id" in response.headers, ( + "X-Request-ID must be present on 503 responses" + ) + + +def test_request_id_echoed_when_client_supplies_valid_id(tmp_path: Path) -> None: + """When the client sends a valid X-Request-ID, the same value is echoed back.""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app) + trace_id = "my-trace-id-123" + response = client.get("/v1/health", headers={"X-Request-ID": trace_id}) + assert response.status_code == 200 + assert response.headers.get("x-request-id") == trace_id, ( + f"Valid client X-Request-ID must be echoed; " + f"got {response.headers.get('x-request-id')!r}" + ) + + +def test_request_id_replaced_when_client_supplies_overlong_value( + tmp_path: Path, +) -> None: + """A client-supplied X-Request-ID longer than 128 chars is rejected and + replaced by a server-generated ID.""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app) + too_long = "a" * 200 + response = client.get("/v1/health", headers={"X-Request-ID": too_long}) + assert response.status_code == 200 + returned = response.headers.get("x-request-id", "") + assert returned != too_long, ( + "Overlong X-Request-ID must be replaced by a server-generated value" + ) + assert len(returned) <= 128, ( + f"Server-generated ID must be <=128 chars; got {len(returned)}" + ) + assert returned, "Server-generated X-Request-ID must not be empty" + + +def test_request_id_replaced_when_client_supplies_invalid_chars( + tmp_path: Path, +) -> None: + """A client-supplied X-Request-ID with disallowed characters is replaced + by a server-generated ID.""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app) + bad_value = "<script>alert(1)</script>" + response = client.get("/v1/health", headers={"X-Request-ID": bad_value}) + assert response.status_code == 200 + returned = response.headers.get("x-request-id", "") + assert returned != bad_value, ( + "X-Request-ID with invalid chars must be replaced by a server-generated value" + ) + assert returned, "Server-generated X-Request-ID must not be empty" + + +def test_request_id_replaced_when_client_supplies_empty_value( + tmp_path: Path, +) -> None: + """An empty X-Request-ID header is treated as absent and replaced by a + server-generated ID.""" + from fastapi.testclient import TestClient + + from recotem.serving.app import create_app + + cfg = _minimal_config(tmp_path) + app = create_app(cfg) + client = TestClient(app) + response = client.get("/v1/health", headers={"X-Request-ID": ""}) + assert response.status_code == 200 + returned = response.headers.get("x-request-id", "") + assert returned, "Empty X-Request-ID must be replaced by a server-generated value" diff --git a/tests/unit/test_serving_auth.py b/tests/unit/test_serving_auth.py index 54e0f9a1..7b094f73 100644 --- a/tests/unit/test_serving_auth.py +++ b/tests/unit/test_serving_auth.py @@ -911,6 +911,88 @@ def test_matched_kid_retains_first_match_when_duplicates_present() -> None: assert request.state.kid == "first-team" +# --------------------------------------------------------------------------- +# Finding 12: Auth bypass mode log field distinguishes insecure_no_auth from +# loopback_no_keys +# --------------------------------------------------------------------------- + + +def test_anonymous_bypass_log_mode_field_is_insecure_no_auth() -> None: + """When bypass_mode='insecure_no_auth', the debug log must carry + mode='insecure_no_auth'.""" + import structlog.testing + + import recotem.serving.auth as auth_mod + + auth_mod._anon_seen.clear() + + request = _make_anon_request("127.0.0.1") + + with structlog.testing.capture_logs() as logs: + verify_api_key(request, [], bypass_mode="insecure_no_auth") + + debug_logs = [ + e + for e in logs + if e.get("event") == "auth_anonymous_bypass" and e.get("log_level") == "debug" + ] + assert debug_logs, "auth_anonymous_bypass DEBUG log must fire" + assert debug_logs[0].get("mode") == "insecure_no_auth", ( + f"Debug log must carry mode='insecure_no_auth'; got {debug_logs[0]!r}" + ) + + +def test_anonymous_bypass_log_mode_field_is_loopback_no_keys() -> None: + """When bypass_mode='loopback_no_keys' (default), the debug log must carry + mode='loopback_no_keys'.""" + import structlog.testing + + import recotem.serving.auth as auth_mod + + auth_mod._anon_seen.clear() + + request = _make_anon_request("127.0.0.1") + + with structlog.testing.capture_logs() as logs: + # Default bypass_mode='loopback_no_keys' + verify_api_key(request, []) + + debug_logs = [ + e + for e in logs + if e.get("event") == "auth_anonymous_bypass" and e.get("log_level") == "debug" + ] + assert debug_logs, "auth_anonymous_bypass DEBUG log must fire" + assert debug_logs[0].get("mode") == "loopback_no_keys", ( + f"Debug log must carry mode='loopback_no_keys'; got {debug_logs[0]!r}" + ) + + +def test_anonymous_bypass_first_seen_log_carries_mode_field() -> None: + """The INFO log (first_seen) must also carry a mode field.""" + import structlog.testing + + import recotem.serving.auth as auth_mod + + auth_mod._anon_seen.clear() + + request = _make_anon_request("10.2.3.4") + + with structlog.testing.capture_logs() as logs: + verify_api_key(request, [], bypass_mode="insecure_no_auth") + + info_logs = [ + e + for e in logs + if e.get("event") == "auth_anonymous_bypass_first_seen" + and e.get("log_level") == "info" + ] + assert info_logs, "auth_anonymous_bypass_first_seen INFO log must fire" + assert info_logs[0].get("mode") == "insecure_no_auth", ( + f"First-seen log must carry mode='insecure_no_auth'; got {info_logs[0]!r}" + ) + + def test_matched_kid_single_match_unchanged() -> None: """Regression guard: single-match behaviour must be unchanged.""" from recotem.serving.auth import verify_api_key diff --git a/tests/unit/test_serving_metrics.py b/tests/unit/test_serving_metrics.py index fa858919..285f9f15 100644 --- a/tests/unit/test_serving_metrics.py +++ b/tests/unit/test_serving_metrics.py @@ -150,3 +150,114 @@ def test_bigquery_fallback_counter_exposed_via_metrics_endpoint( assert "text/plain" in content_type, ( f"Content-Type must be text/plain Prometheus format; got {content_type!r}" ) + + +# --------------------------------------------------------------------------- +# v1 API metrics: recotem_v1_requests_total, recotem_v1_request_latency_seconds, +# recotem_v1_batch_size +# --------------------------------------------------------------------------- + + +@pytest.fixture() +def reset_metrics_registry(monkeypatch: pytest.MonkeyPatch): + """Reset v1 metric globals and unregister their collectors before/after. + + The prometheus_client default registry is a process-global singleton, so + re-running a test that creates the same Counter/Histogram name would raise + "Duplicated timeseries in CollectorRegistry". This fixture: + + 1. Enables metrics via ``RECOTEM_METRICS_ENABLED=1``. + 2. Tears down any pre-existing v1 collectors (defensive — handles state + leaked from a prior test run within the same process). + 3. Resets the module-level globals so ``_ensure_v1_initialized`` runs. + 4. Repeats teardown after the test so subsequent tests start clean. + """ + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + + from prometheus_client import REGISTRY + + from recotem.serving import metrics as _m + + def _teardown() -> None: + v1_names = { + "recotem_v1_requests", + "recotem_v1_request_latency_seconds", + "recotem_v1_batch_size", + "recotem_v1_batch_element_errors", + "recotem_v1_metadata_degraded_items", + "recotem_v1_validation_errors_outside_verb", + } + for collector in list(REGISTRY._names_to_collectors.values()): + describe = getattr(collector, "describe", None) + if describe is None: + continue + try: + metrics = describe() + except Exception: + continue + for m in metrics: + if getattr(m, "name", None) in v1_names: + try: + REGISTRY.unregister(collector) + except (KeyError, ValueError): + pass + break + for attr in ( + "_V1_REQUEST_COUNTER", + "_V1_REQUEST_LATENCY", + "_V1_BATCH_SIZE", + "_V1_BATCH_ELEMENT_ERRORS", + "_V1_METADATA_DEGRADED_ITEMS", + "_V1_VALIDATION_ERRORS_OUTSIDE_VERB", + ): + setattr(_m, attr, None) + + _teardown() + yield + _teardown() + + +from recotem.serving import metrics as _m # noqa: E402 + + +def test_record_v1_request_accepts_verb_label(reset_metrics_registry): + _m.record_v1_request("smartstocknotes", "recommend", "ok", 0.012) + _m.record_v1_request( + "smartstocknotes", "recommend-related", "unknown_seed_items", 0.005 + ) + out, _ = _m.generate_latest() + text = out.decode() + assert 'verb="recommend"' in text + assert 'verb="recommend-related"' in text + assert 'status="unknown_seed_items"' in text + + +def test_observe_batch_size_records_histogram(reset_metrics_registry): + _m.observe_batch_size("smartstocknotes", "batch-recommend", 7) + out, _ = _m.generate_latest() + text = out.decode() + assert "recotem_v1_batch_size_bucket" in text + + +@pytest.mark.skipif( + not _prometheus_available(), + reason="prometheus_client not installed in this environment", +) +def test_inc_metadata_degraded_items_coerces_unknown_kind(reset_metrics_registry): + """Unknown kind values must be coerced to 'unexpected' to prevent label + cardinality explosion; known kinds 'fallback' and 'dropped' pass through.""" + _m.inc_metadata_degraded_items("r1", "recommend", "fallback", 2) + _m.inc_metadata_degraded_items("r1", "recommend", "dropped", 1) + _m.inc_metadata_degraded_items("r1", "recommend", "arbitrary_future_kind", 3) + + out, _ = _m.generate_latest() + text = out.decode() + + assert 'kind="fallback"' in text, "fallback kind must appear in output" + assert 'kind="dropped"' in text, "dropped kind must appear in output" + assert 'kind="unexpected"' in text, ( + "arbitrary_future_kind must be coerced to 'unexpected'" + ) + assert "arbitrary_future_kind" not in text, ( + "raw unknown kind must not appear in Prometheus output" + ) diff --git a/tests/unit/test_serving_registry.py b/tests/unit/test_serving_registry.py index 56c7ff0a..704a3115 100644 --- a/tests/unit/test_serving_registry.py +++ b/tests/unit/test_serving_registry.py @@ -862,3 +862,73 @@ def test_registry_lock_is_plain_lock_not_rlock() -> None: f"ModelRegistry._lock must be a plain threading.Lock, " f"got {lock_type_name!r} — reentrancy is not needed and adds overhead" ) + + +# --------------------------------------------------------------------------- +# Plan A Task 3: v1 ModelEntry extensions +# --------------------------------------------------------------------------- + + +def _stub_entry() -> ModelEntry: + """Return a minimal ModelEntry with a populated sha256 marker for v1 tests. + + Re-uses the module's existing _make_entry() helper and additionally sets + a non-empty _loaded_marker[1] so the model_version property has a real + artifact hash to format. loaded_at_unix is set so loaded_at returns a + real timestamp rather than the unix epoch. + """ + entry = _make_entry("v1_entry") + entry._loaded_marker = ("etag-v1", "a" * 64) + entry.loaded_at_unix = 1_700_000_000.0 + return entry + + +def test_model_entry_supported_verbs_default_for_user_item_kind(): + e = _stub_entry() + assert "recommend" in e.supported_verbs + assert "recommend-related" in e.supported_verbs + assert "batch-recommend" in e.supported_verbs + assert "batch-recommend-related" in e.supported_verbs + + +def test_model_entry_kind_defaults_to_user_item(): + e = _stub_entry() + assert e.kind == "user-item" + + +def test_model_entry_model_version_sha256_prefixed(): + e = _stub_entry() + assert e.model_version.startswith("sha256:") + assert len(e.model_version) > len("sha256:") # not empty hex + + +def test_model_entry_loaded_at_is_utc_datetime(): + """loaded_at is now a timezone-aware datetime (not a string).""" + + e = _stub_entry() + loaded_at = e.loaded_at + assert loaded_at.tzinfo is not None, "loaded_at must be timezone-aware" + assert loaded_at.utcoffset().total_seconds() == 0, "loaded_at must be UTC" + + +# --------------------------------------------------------------------------- +# K. last_load_error sanitization (PR #103) +# --------------------------------------------------------------------------- + + +def test_last_load_error_redacts_uri_paths() -> None: + from recotem.serving.app import _sanitize_error + + reason = "read failed: s3://my-bucket/secret-models/foo.bin not accessible" + sanitized = _sanitize_error(reason) + assert "<redacted-uri>" in sanitized + assert "my-bucket" not in sanitized + assert "secret-models" not in sanitized + + +def test_last_load_error_truncated_to_200_chars() -> None: + from recotem.serving.app import _sanitize_error + + long_reason = "x" * 500 + sanitized = _sanitize_error(long_reason) + assert len(sanitized) <= 200 diff --git a/tests/unit/test_serving_routes.py b/tests/unit/test_serving_routes.py deleted file mode 100644 index b3d4a910..00000000 --- a/tests/unit/test_serving_routes.py +++ /dev/null @@ -1,1944 +0,0 @@ -"""Unit tests for recotem.serving.routes. - -Tests: -- /predict happy path -- /predict 401 (missing API key) -- /predict 404 (user not found) -- /predict 503 (recipe not loaded) -- /health overall + per-recipe -- /models -- /metrics off-by-default and on with extras -- request_id in X-Request-ID header -- kid field in model block of predict response -""" - -from __future__ import annotations - -import hashlib -from unittest.mock import MagicMock - -import pytest -import pytest as _pytest -from fastapi import FastAPI -from fastapi.testclient import TestClient - -from recotem.config import ApiKeyEntry -from recotem.serving.registry import ModelEntry, ModelRegistry -from recotem.serving.routes import make_router - -# --------------------------------------------------------------------------- -# Fixture helpers -# --------------------------------------------------------------------------- - - -def _make_registry_with_recipe( - name: str = "test_recipe", - user_id_to_items: dict | None = None, -) -> ModelRegistry: - """Build a ModelRegistry with a minimal mock recommender.""" - if user_id_to_items is None: - user_id_to_items = {"user1": [("item1", 0.9), ("item2", 0.8)]} - - recommender = MagicMock() - - def _get_rec(user_id, cutoff): - if user_id in user_id_to_items: - return user_id_to_items[user_id][:cutoff] - raise KeyError(f"user {user_id} not in training data") - - recommender.get_recommendation_for_known_user_id.side_effect = _get_rec - - entry = ModelEntry( - name=name, - recommender=recommender, - header={ - "best_class": "TopPopRecommender", - "trained_at": "2026-01-01T00:00:00Z", - }, - kid="active", - ) - - registry = ModelRegistry() - registry.replace(name, entry) - return registry - - -def _make_api_key_entry(plaintext: str, kid: str = "k1") -> ApiKeyEntry: - # Mirror recotem.serving.auth._hash_api_key (scrypt KDF with the - # ``recotem.api-key.v1`` domain-separation salt at minimum cost). - sha256_hex = hashlib.scrypt( - plaintext.encode("utf-8"), - salt=b"recotem.api-key.v1", - n=2, - r=8, - p=1, - dklen=32, - ).hex() - return ApiKeyEntry(kid=kid, sha256_hex=sha256_hex) - - -def _make_test_client( - registry: ModelRegistry | None = None, - api_keys: list[ApiKeyEntry] | None = None, - insecure: bool = True, -) -> tuple[TestClient, str]: - """Return (TestClient, plaintext_api_key).""" - if api_keys is None and not insecure: - plaintext = "test_api_key_32_bytes_exactly!!!" - api_keys = [_make_api_key_entry(plaintext)] - elif api_keys is None: - plaintext = "" - api_keys = [] - else: - plaintext = "" - - if registry is None: - registry = _make_registry_with_recipe() - - router = make_router(registry=registry, api_keys=api_keys) - app = FastAPI() - app.include_router(router) - return TestClient(app), plaintext - - -# --------------------------------------------------------------------------- -# /predict happy path -# --------------------------------------------------------------------------- - - -def test_predict_happy_path_returns_items() -> None: - registry = _make_registry_with_recipe() - client, _ = _make_test_client(registry=registry) - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1", "cutoff": 5}, - ) - assert response.status_code == 200 - data = response.json() - assert "items" in data - assert len(data["items"]) > 0 - assert data["items"][0]["item_id"] == "item1" - - -def test_predict_response_includes_model_block() -> None: - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - assert response.status_code == 200 - data = response.json() - assert "model" in data - assert data["model"]["recipe"] == "test_recipe" - assert "kid" in data["model"] - - -def test_predict_response_includes_request_id() -> None: - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - assert response.status_code == 200 - assert "request_id" in response.json() - - -def test_request_id_returned_in_X_Request_ID_header() -> None: - # The routes do not currently set X-Request-ID header — test response body - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - data = response.json() - assert "request_id" in data - assert len(data["request_id"]) > 0 - - -def test_response_includes_kid_field_in_model_block() -> None: - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - assert response.json()["model"]["kid"] == "active" - - -# --------------------------------------------------------------------------- -# /predict 401 -# --------------------------------------------------------------------------- - - -def test_predict_401_without_api_key() -> None: - """With keys configured, missing X-API-Key header → 401.""" - plaintext = "api_key_32_bytes_exactly_here!!!" - entry = _make_api_key_entry(plaintext) - registry = _make_registry_with_recipe() - router = make_router(registry=registry, api_keys=[entry]) - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - assert response.status_code == 401 - - -def test_predict_401_with_wrong_api_key() -> None: - plaintext = "correct_api_key_32_bytes_exactly" - entry = _make_api_key_entry(plaintext) - registry = _make_registry_with_recipe() - router = make_router(registry=registry, api_keys=[entry]) - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1"}, - headers={"x-api-key": "wrong_key"}, - ) - assert response.status_code == 401 - - -# --------------------------------------------------------------------------- -# /predict 404 (user not found) -# --------------------------------------------------------------------------- - - -def test_predict_404_user_not_in_training_data() -> None: - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "unknown_user"}) - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# /predict 503 (recipe not loaded / unhealthy) -# --------------------------------------------------------------------------- - - -def test_predict_503_recipe_not_loaded() -> None: - registry = ModelRegistry() # empty - client, _ = _make_test_client(registry=registry) - response = client.post("/predict/no_such_recipe", json={"user_id": "user1"}) - assert response.status_code == 503 - - -def test_stale_but_loaded_recipe_keeps_serving() -> None: - """A recipe whose latest hot-swap failed must keep serving the old model. - - The watcher sets ``last_load_error`` on the existing entry without - dropping the recommender. ``/predict`` must continue to return 200 - so that a bad new artifact does not take down the endpoint. - """ - registry = _make_registry_with_recipe("stale_recipe") - entry = registry.get("stale_recipe") - assert entry is not None - entry.last_load_error = "hmac mismatch on new artifact" - client, _ = _make_test_client(registry=registry) - response = client.post("/predict/stale_recipe", json={"user_id": "user1"}) - assert response.status_code == 200 - - -def test_initial_load_failure_returns_503() -> None: - """A recipe that never loaded (``loaded=False`` stub) must return 503.""" - registry = ModelRegistry() - stub = ModelEntry( - name="never_loaded", - recommender=None, - header={}, - kid="", - loaded=False, - last_load_error="initial load failed: bad header", - ) - registry.replace("never_loaded", stub) - client, _ = _make_test_client(registry=registry) - response = client.post("/predict/never_loaded", json={"user_id": "user1"}) - assert response.status_code == 503 - - -# --------------------------------------------------------------------------- -# /health -# --------------------------------------------------------------------------- - - -def test_health_returns_ok_when_all_recipes_loaded() -> None: - client, _ = _make_test_client() - response = client.get("/health") - assert response.status_code == 200 - data = response.json() - assert data["status"] == "ok" - - -def test_health_overall_degraded_when_any_recipe_unloaded() -> None: - registry = _make_registry_with_recipe("loaded") - broken_entry = ModelEntry( - name="broken", - recommender=MagicMock(), - header={}, - kid="active", - last_load_error="signature mismatch", - ) - registry.replace("broken", broken_entry) - client, _ = _make_test_client(registry=registry) - response = client.get("/health") - # Degraded must surface as HTTP 503 so K8s readiness probes mark the Pod - # NotReady — returning 200 would let rolling upgrades silently swap in a - # Pod whose every /predict returns 503. See routes.health() docstring. - assert response.status_code == 503 - data = response.json() - assert data["status"] == "degraded" - - -def test_health_returns_200_when_all_recipes_loaded() -> None: - """Counterpart to the degraded-503 test: healthy state stays 200.""" - client, _ = _make_test_client() - response = client.get("/health") - assert response.status_code == 200 - assert response.json()["status"] == "ok" - - -def test_health_503_when_recipe_loaded_false_even_without_error() -> None: - """A stub entry with loaded=False (startup load failure) → 503. - - Covers the recipe_not_loaded_at_startup branch where the watcher inserts - a ``ModelEntry(loaded=False, last_load_error=...)`` placeholder. Both - flags should drive overall degraded; this test pins the ``loaded=False`` - half so a future refactor cannot regress to "degraded only when error". - """ - registry = _make_registry_with_recipe("loaded") - stub_entry = ModelEntry( - name="never_loaded", - recommender=None, - header={}, - kid="", - last_load_error="HMAC verify failed", - loaded=False, - ) - registry.replace("never_loaded", stub_entry) - client, _ = _make_test_client(registry=registry) - response = client.get("/health") - assert response.status_code == 503 - assert response.json()["status"] == "degraded" - - -def test_health_per_recipe_status() -> None: - """Per-recipe detail is now at /health/details (I-3). - /health only returns aggregate {status, total, loaded}.""" - client, _ = _make_test_client() - # /health must NOT have per-recipe data (I-3). - response = client.get("/health") - data = response.json() - assert "recipes" not in data, ( - "/health must not expose per-recipe data (moved to /health/details)" - ) - assert "total" in data - assert "loaded" in data - # /health/details does have per-recipe data (insecure_no_auth → no creds needed). - response_details = client.get("/health/details") - details = response_details.json() - assert "recipes" in details - assert "test_recipe" in details["recipes"] - - -# --------------------------------------------------------------------------- -# /models -# --------------------------------------------------------------------------- - - -def test_models_endpoint_returns_list() -> None: - client, _ = _make_test_client() - response = client.get("/models") - assert response.status_code == 200 - data = response.json() - assert isinstance(data, list) - assert len(data) > 0 - - -# --------------------------------------------------------------------------- -# /metrics — opt-in via RECOTEM_METRICS_ENABLED -# --------------------------------------------------------------------------- - - -def test_metrics_endpoint_404_when_env_unset(monkeypatch) -> None: - """Without RECOTEM_METRICS_ENABLED, /metrics is not registered.""" - monkeypatch.delenv("RECOTEM_METRICS_ENABLED", raising=False) - client, _ = _make_test_client() - response = client.get("/metrics") - assert response.status_code == 404 - - -def test_metrics_endpoint_404_when_env_falsy(monkeypatch) -> None: - """Falsy values for RECOTEM_METRICS_ENABLED keep /metrics off.""" - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "false") - client, _ = _make_test_client() - response = client.get("/metrics") - assert response.status_code == 404 - - -# --------------------------------------------------------------------------- -# m-4: metadata_field_deny is case-insensitive -# --------------------------------------------------------------------------- - - -def test_metadata_field_deny_is_case_insensitive() -> None: - """Deny-list entries must block metadata columns regardless of case. - - e.g. denying 'internal_id' must also block 'Internal_ID', 'INTERNAL_ID'. - """ - import pandas as pd - - from recotem.serving.routes import _lookup_metadata - - df = pd.DataFrame( - { - "item_id": ["i1"], - "title": ["Widget"], - "Internal_ID": ["secret-123"], - "SCORE": [0.99], - } - ).set_index("item_id") - - # Deny list uses lowercase; columns use mixed/upper case. - deny_set: frozenset[str] = frozenset({"internal_id", "score"}) - - result = _lookup_metadata(df, "i1", deny_set) - - # 'title' should be present — not in deny list. - assert "title" in result - # 'Internal_ID' denied via 'internal_id' (case-fold). - assert "Internal_ID" not in result - # 'SCORE' denied via 'score' (case-fold). - assert "SCORE" not in result - - -def test_metadata_field_deny_blocks_lower_when_deny_entry_is_upper() -> None: - """The deny-list itself is also case-folded at router construction time. - - Passing 'INTERNAL_ID' in the deny list must still block 'internal_id' and - 'Internal_ID' in the metadata columns. - """ - import pandas as pd - from fastapi import FastAPI - from fastapi.testclient import TestClient - - df = pd.DataFrame( - { - "item_id": ["u_item"], - "title": ["Thing"], - "secret_col": ["hide-me"], - } - ).set_index("item_id") - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [("u_item", 0.5)] - - entry = ModelEntry( - name="deny_test", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_df=df, - ) - registry = ModelRegistry() - registry.replace("deny_test", entry) - - # Pass deny list with UPPER-CASE entry — must still block the lower-case column. - router = make_router( - registry=registry, api_keys=[], metadata_field_deny=["SECRET_COL"] - ) - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/deny_test", json={"user_id": "u_item", "cutoff": 1} - ) - assert response.status_code == 200 - items = response.json()["items"] - assert len(items) == 1 - item = items[0] - assert "title" in item, "'title' must not be denied" - assert "secret_col" not in item, ( - "'secret_col' must be denied even when deny entry was 'SECRET_COL'" - ) - - -# --------------------------------------------------------------------------- -# C-1: X-Request-ID response header -# --------------------------------------------------------------------------- - - -def test_predict_response_includes_x_request_id_header() -> None: - """On a 200 success, response header X-Request-ID must match body request_id.""" - client, _ = _make_test_client() - response = client.post("/predict/test_recipe", json={"user_id": "user1"}) - assert response.status_code == 200 - data = response.json() - assert "X-Request-ID" in response.headers, ( - "X-Request-ID header must be present in the response" - ) - assert response.headers["X-Request-ID"] == data["request_id"], ( - "X-Request-ID header must match the request_id in the response body" - ) - - -def test_predict_echoes_x_request_id_from_request() -> None: - """When the client sends X-Request-ID, the same value is echoed back.""" - client, _ = _make_test_client() - custom_id = "my-trace-id-12345" - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1"}, - headers={"X-Request-ID": custom_id}, - ) - assert response.status_code == 200 - assert response.headers.get("X-Request-ID") == custom_id, ( - "X-Request-ID sent by client must be echoed back unchanged" - ) - assert response.json()["request_id"] == custom_id - - -# --------------------------------------------------------------------------- -# N-6: M-4 — X-Request-ID validation: invalid IDs replaced with UUID -# --------------------------------------------------------------------------- - - -def test_valid_x_request_id_echoed_unchanged() -> None: - """A valid X-Request-ID (alphanumeric + _- up to 64 chars) is echoed back. - - M-4 added a regex guard so only safe IDs are echoed; this test verifies - that a valid ID passes the guard and is not replaced. - """ - client, _ = _make_test_client() - valid_id = "abc-123_DEF" - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1"}, - headers={"X-Request-ID": valid_id}, - ) - assert response.status_code == 200 - assert response.headers.get("X-Request-ID") == valid_id, ( - "Valid X-Request-ID must be echoed unchanged" - ) - assert response.json()["request_id"] == valid_id - - -@_pytest.mark.parametrize( - "invalid_id", - [ - "", # empty string — fails 1-char minimum - "a" * 65, # exceeds 64-char maximum - "<script>alert(1)</script>", # contains angle brackets - "evil\x00byte", # contains null byte - "space here", # contains space - ], -) -def test_invalid_x_request_id_replaced_with_uuid(invalid_id: str) -> None: - """Invalid X-Request-ID values must be replaced with a server-generated UUID. - - M-4 added regex validation ``^[A-Za-z0-9_-]{1,64}$`` so that control - characters, angle brackets, spaces, oversized IDs, and empty strings cannot - be injected into logs via the request ID header. Any value that fails the - regex is silently replaced with a uuid4. - """ - import re - - client, _ = _make_test_client() - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1"}, - headers={"X-Request-ID": invalid_id}, - ) - assert response.status_code == 200 - returned_id = response.headers.get("X-Request-ID", "") - # The server must NOT echo the invalid value back. - assert returned_id != invalid_id or not invalid_id, ( - f"Invalid X-Request-ID {invalid_id!r} must not be echoed unchanged" - ) - # The replacement must look like a UUID4 (hex + hyphens, 36 chars). - uuid_re = re.compile( - r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" - ) - assert uuid_re.match(returned_id), ( - f"Invalid X-Request-ID must be replaced with a uuid4; got {returned_id!r}" - ) - - -# --------------------------------------------------------------------------- -# N-12: OBS-4 — _lookup_metadata index pre-check (item not in index → {}) -# --------------------------------------------------------------------------- - - -def test_lookup_metadata_missing_item_id_returns_empty_dict() -> None: - """_lookup_metadata returns an empty dict when item_id is not in the index. - - OBS-4 added an ``item_id not in meta_df.index`` pre-check to short-circuit - before calling .loc[], which avoids an unnecessary KeyError and improves - performance. This test verifies the short-circuit path returns {}. - """ - import pandas as pd - - from recotem.serving.routes import _lookup_metadata - - df = pd.DataFrame({"item_id": ["i1", "i2"], "title": ["Alpha", "Beta"]}).set_index( - "item_id" - ) - - result = _lookup_metadata(df, "not_in_index", frozenset(), "test_recipe") - assert result == {}, ( - "_lookup_metadata must return empty dict for missing item_id (pre-check)" - ) - - -def test_lookup_metadata_known_item_id_returns_fields() -> None: - """_lookup_metadata returns the row dict for a known item_id. - - Complement to the missing-item test: after passing the pre-check, a known - item_id must produce the expected field dict. - """ - import pandas as pd - - from recotem.serving.routes import _lookup_metadata - - df = pd.DataFrame( - {"item_id": ["i1"], "title": ["Alpha"], "genre": ["action"]} - ).set_index("item_id") - - result = _lookup_metadata(df, "i1", frozenset(), "test_recipe") - assert result.get("title") == "Alpha" - assert result.get("genre") == "action" - - -# --------------------------------------------------------------------------- -# MAJOR-10: predict status label separation -# --------------------------------------------------------------------------- - - -def test_predict_status_label_ok(monkeypatch) -> None: - """Successful prediction records status='ok'.""" - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from unittest.mock import patch - - from recotem.serving import metrics - - recipe_name = "status_ok_recipe" - registry = _make_registry_with_recipe(recipe_name) - client, _ = _make_test_client(registry=registry) - metrics._ensure_initialized() - - recorded: list[str] = [] - real_record = metrics.record_predict - - def _capture(r, s, latency): - recorded.append(s) - real_record(r, s, latency) - - with patch.object(metrics, "record_predict", side_effect=_capture): - response = client.post( - f"/predict/{recipe_name}", json={"user_id": "user1", "cutoff": 5} - ) - - assert response.status_code == 200 - assert recorded == ["ok"], f"Expected status=['ok'], got {recorded!r}" - - -def test_predict_status_label_user_not_found(monkeypatch) -> None: - """User not found (404) records status='user_not_found'.""" - from unittest.mock import patch - - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - recipe_name = "status_404_recipe" - registry = _make_registry_with_recipe(recipe_name) - client, _ = _make_test_client(registry=registry) - - recorded: list[str] = [] - real_record = metrics.record_predict - - def _capture(r, s, latency): - recorded.append(s) - real_record(r, s, latency) - - with patch.object(metrics, "record_predict", side_effect=_capture): - response = client.post( - f"/predict/{recipe_name}", - json={"user_id": "totally_unknown_user", "cutoff": 5}, - ) - - assert response.status_code == 404 - assert recorded == ["user_not_found"], ( - f"Expected status=['user_not_found'], got {recorded!r}" - ) - - -def test_predict_status_label_unavailable(monkeypatch) -> None: - """Recipe not loaded (503) records status='unavailable'.""" - from unittest.mock import patch - - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - registry = ModelRegistry() # empty — no recipe registered - client, _ = _make_test_client(registry=registry) - - recorded: list[str] = [] - real_record = metrics.record_predict - - def _capture(r, s, latency): - recorded.append(s) - real_record(r, s, latency) - - with patch.object(metrics, "record_predict", side_effect=_capture): - response = client.post( - "/predict/nonexistent_recipe", json={"user_id": "user1", "cutoff": 5} - ) - - assert response.status_code == 503 - assert recorded == ["unavailable"], ( - f"Expected status=['unavailable'], got {recorded!r}" - ) - - -def test_predict_status_label_error(monkeypatch) -> None: - """Unexpected exception records status='error'.""" - from unittest.mock import patch - - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - recipe_name = "status_error_recipe" - registry = _make_registry_with_recipe(recipe_name) - - # Build the client without raise_server_exceptions so the 500 response is - # returned rather than the RuntimeError propagating to the test. - router = make_router(registry=registry, api_keys=[]) - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - - recorded: list[str] = [] - real_record = metrics.record_predict - - def _capture(r, s, latency): - recorded.append(s) - real_record(r, s, latency) - - # Make the recommender raise a non-KeyError to exercise the generic error path - entry = registry.get(recipe_name) - assert entry is not None - entry.recommender.get_recommendation_for_known_user_id.side_effect = RuntimeError( - "unexpected internal failure" - ) - - with patch.object(metrics, "record_predict", side_effect=_capture): - client.post( - f"/predict/{recipe_name}", - json={"user_id": "user1", "cutoff": 5}, - ) - - # status label must be "error"; the HTTP response is 500 - assert recorded == ["error"], f"Expected status=['error'], got {recorded!r}" - - -def test_metrics_endpoint_exposes_documented_metrics(monkeypatch) -> None: - """RECOTEM_METRICS_ENABLED=true exposes /metrics with all six recotem_* metrics.""" - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - client, _ = _make_test_client() - - # Drive the predict path so predict_total + predict_latency are populated. - response = client.post( - "/predict/test_recipe", - json={"user_id": "user1", "cutoff": 5}, - ) - assert response.status_code == 200 - - # The other metrics are populated by app startup / watcher in production; - # in this unit test we exercise the recorders directly so the - # gauges/counters appear in the exposition output. - metrics.set_model_loaded("test_recipe", True) - metrics.inc_artifact_load_failure("test_recipe") - metrics.set_active_recipes(1) - metrics.record_swap("test_recipe", ok=True) - metrics.inc_metadata_lookup_error("test_recipe") - metrics.inc_recipe_rescan_error("test_recipe") - - response = client.get("/metrics") - assert response.status_code == 200 - body = response.text - - # All metric series documented in operations.md (plus the two new ones) - # must be present in the /metrics output. - for name in ( - "recotem_predict_total", - "recotem_predict_latency_seconds", - "recotem_model_loaded", - "recotem_artifact_load_failures_total", - "recotem_active_recipes", - "recotem_swap_total", - "recotem_metadata_lookup_errors_total", - "recotem_recipe_rescan_errors_total", - ): - assert name in body, f"missing {name!r} in /metrics output" - - # Spot-check label cardinality on the metrics that carry the recipe - # label, so a future refactor that drops the label fails the test. - assert 'recotem_predict_total{recipe="test_recipe"' in body - assert 'recotem_model_loaded{recipe="test_recipe"' in body - assert 'recotem_swap_total{recipe="test_recipe"' in body - - -# --------------------------------------------------------------------------- -# CRITICAL: predict increments recotem_predict_total counter -# --------------------------------------------------------------------------- - - -def test_predict_increments_predict_total_metric(monkeypatch) -> None: - """After a POST /predict, recotem_predict_total must increment for the recipe. - - Uses a dedicated recipe name that is unique to this test to avoid - interference with the global Prometheus registry from other test runs. - Captures the counter value before and after the predict call. - """ - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - recipe_name = "counter_increment_recipe" - registry = _make_registry_with_recipe(recipe_name) - client, _ = _make_test_client(registry=registry) - - # Ensure metrics are initialized (idempotent). - metrics._ensure_initialized() - - # Read the current counter value before the predict call. - before = 0.0 - if metrics._PREDICT_TOTAL is not None: - try: - before = metrics._PREDICT_TOTAL.labels( - recipe=recipe_name, status="ok" - )._value.get() - except Exception: - before = 0.0 - - response = client.post( - f"/predict/{recipe_name}", - json={"user_id": "user1", "cutoff": 5}, - ) - assert response.status_code == 200 - - # The counter must have incremented. - after = 0.0 - if metrics._PREDICT_TOTAL is not None: - try: - after = metrics._PREDICT_TOTAL.labels( - recipe=recipe_name, status="ok" - )._value.get() - except Exception: - after = 0.0 - - assert after > before, ( - f"recotem_predict_total must increment after /predict; " - f"before={before}, after={after}" - ) - - # Also confirm /metrics output contains the counter. - metrics_response = client.get("/metrics") - assert metrics_response.status_code == 200 - assert "recotem_predict_total" in metrics_response.text - - -# --------------------------------------------------------------------------- -# CRITICAL: load failure increments recotem_artifact_load_failures_total -# --------------------------------------------------------------------------- - - -def test_load_failure_increments_artifact_load_failures_total(monkeypatch) -> None: - """inc_artifact_load_failure must be visible in /metrics output. - - Calls the metric recorder directly (simulating the watcher's failure - path) and confirms the counter appears in the Prometheus exposition. - """ - import pytest - - pytest.importorskip("prometheus_client") - monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "true") - - from recotem.serving import metrics - - recipe_name = "fail_load_recipe_unique" - metrics._ensure_initialized() - - before = 0.0 - if metrics._ARTIFACT_LOAD_FAILURES is not None: - try: - before = metrics._ARTIFACT_LOAD_FAILURES.labels( - recipe=recipe_name - )._value.get() - except Exception: - before = 0.0 - - metrics.inc_artifact_load_failure(recipe_name) - - after = 0.0 - if metrics._ARTIFACT_LOAD_FAILURES is not None: - try: - after = metrics._ARTIFACT_LOAD_FAILURES.labels( - recipe=recipe_name - )._value.get() - except Exception: - after = 0.0 - - assert after == before + 1, ( - f"recotem_artifact_load_failures_total must increment by 1; " - f"before={before}, after={after}" - ) - - # Also confirm /metrics exposition includes the counter. - client, _ = _make_test_client() - metrics_response = client.get("/metrics") - assert metrics_response.status_code == 200 - assert "recotem_artifact_load_failures_total" in metrics_response.text - - -# --------------------------------------------------------------------------- -# C11 — predict returns 503 after recipe removed from registry -# --------------------------------------------------------------------------- - - -def test_yaml_deleted_then_predict_returns_503() -> None: - """After a recipe entry is removed from the registry, POST /predict/<name> - must return HTTP 503 with code 'recipe_unavailable'. - - This simulates the watcher removing a recipe when its YAML is deleted. - The route checks ``registry.get(name)`` — None means the recipe is gone - and the route raises HTTPException(503, code='recipe_unavailable'). - """ - registry = _make_registry_with_recipe("will_be_deleted") - client, _ = _make_test_client(registry=registry) - - # Confirm the recipe is serving before removal. - response_before = client.post("/predict/will_be_deleted", json={"user_id": "user1"}) - assert response_before.status_code == 200, ( - f"Predict must succeed before removal; got {response_before.status_code}" - ) - - # Simulate YAML deletion by removing the entry from the registry. - registry.remove("will_be_deleted") - - # After removal, the route must return 503. - response_after = client.post("/predict/will_be_deleted", json={"user_id": "user1"}) - assert response_after.status_code == 503, ( - f"Predict must return 503 after recipe removed from registry; " - f"got {response_after.status_code}" - ) - detail = response_after.json() - assert detail.get("detail", {}).get("code") == "recipe_unavailable", ( - f"503 response must include code='recipe_unavailable'; got: {detail!r}" - ) - - -# --------------------------------------------------------------------------- -# C-2 + M-13: _lookup_metadata error handling -# --------------------------------------------------------------------------- - - -def test_lookup_metadata_returns_empty_on_keyerror() -> None: - """_lookup_metadata returns {} when item_id is absent from the index.""" - import pandas as pd - - from recotem.serving.routes import _lookup_metadata - - df = pd.DataFrame({"item_id": ["i1", "i2"], "title": ["A", "B"]}).set_index( - "item_id" - ) - - result = _lookup_metadata(df, "not_in_index", frozenset(), "test_recipe") - assert result == {} - - -def test_lookup_metadata_swallows_attribute_error_increments_metric( - monkeypatch, -) -> None: - """AttributeError during row.to_dict() returns empty dict, logs, and increments - recotem_metadata_lookup_errors_total. - - A non-unique index makes .loc[] return a DataFrame (not a Series), whose - .to_dict() returns a dict-of-lists rather than a flat dict — iterating it - with (.items() → .lower()) raises AttributeError on the list values. - We simulate this by using a mock that raises AttributeError on to_dict(). - - OBS-4: _lookup_metadata now performs an ``item_id not in meta_df.index`` - pre-check before calling .loc[]. We therefore configure the mock's - ``__contains__`` to return True so the pre-check passes and the code - reaches the AttributeError path. - """ - import structlog.testing - - from recotem.serving import metrics - from recotem.serving.routes import _lookup_metadata - - # Build a mock row object that raises AttributeError on to_dict() - class _BadRow: - def to_dict(self): - raise AttributeError("simulated: non-unique index returned DataFrame") - - # Patch .loc as a property that returns _BadRow for any key. - # Also configure the index so __contains__ returns True — after OBS-4 the - # pre-check ``item_id not in meta_df.index`` would otherwise short-circuit - # and the AttributeError path would never be reached. - bad_df = MagicMock() - bad_df.index.__contains__ = MagicMock(return_value=True) - bad_df.loc.__getitem__ = MagicMock(return_value=_BadRow()) - - metrics._ensure_initialized() - before = 0.0 - if metrics._METADATA_LOOKUP_ERRORS is not None: - try: - before = metrics._METADATA_LOOKUP_ERRORS.labels( - recipe="attr_err_recipe" - )._value.get() - except Exception: - before = 0.0 - - with structlog.testing.capture_logs() as cap: - result = _lookup_metadata(bad_df, "some_item", frozenset(), "attr_err_recipe") - - assert result == {}, "AttributeError must result in empty dict return" - - after = 0.0 - if metrics._METADATA_LOOKUP_ERRORS is not None: - try: - after = metrics._METADATA_LOOKUP_ERRORS.labels( - recipe="attr_err_recipe" - )._value.get() - except Exception: - after = 0.0 - - import pytest - - pytest.importorskip("prometheus_client") - assert after == before + 1, ( - f"recotem_metadata_lookup_errors_total must increment by 1 on AttributeError; " - f"before={before}, after={after}" - ) - - warn_events = [e for e in cap if e.get("event") == "metadata_lookup_failed"] - assert warn_events, "metadata_lookup_failed WARN must be emitted on AttributeError" - evt = warn_events[0] - assert evt.get("recipe") == "attr_err_recipe" - assert evt.get("error_class") == "AttributeError" - - -def test_lookup_metadata_skips_non_string_columns() -> None: - """DataFrame with int column names must not crash — int columns are skipped.""" - import pandas as pd - - from recotem.serving.routes import _lookup_metadata - - # Construct a DataFrame with an int column name and a string column name. - df = pd.DataFrame([[1, "hello"]], columns=[42, "title"]) - df.index = pd.Index(["item_x"], name="item_id") - - result = _lookup_metadata(df, "item_x", frozenset(), "int_col_recipe") - - # int column 42 must be silently skipped, string column 'title' kept. - assert 42 not in result, "int column name must be omitted from the output" - assert "title" in result, "'title' string column must be present" - assert result["title"] == "hello" - - -# --------------------------------------------------------------------------- -# M-14: {name} path param regex validation -# --------------------------------------------------------------------------- - - -def test_predict_with_invalid_name_returns_422_not_503() -> None: - """Arbitrary strings in the recipe name path param must return 422. - - FastAPI should validate the ``name`` path parameter against the pattern - ``^[A-Za-z0-9_-]{1,64}$`` before reaching any business logic. Sending - characters outside that set (e.g. slashes, unicode, shell metacharacters) - must produce a 422 Unprocessable Entity, not a 503 that reflects the - arbitrary string into the response body. - """ - client, _ = _make_test_client() - - for bad_name in [ - "recipe with spaces", # space not in [A-Za-z0-9_-] - "recipe!@#meta", # special characters - "a" * 65, # over 64 chars - "recipe.dotted", # dot not in character class - ]: - response = client.post( - "/predict/" + bad_name, - json={"user_id": "user1"}, - ) - assert response.status_code == 422, ( - f"Expected 422 for name={bad_name!r}, got {response.status_code}" - ) - - -# --------------------------------------------------------------------------- -# P-2: JSONResponse hot-path optimization (R-2) -# --------------------------------------------------------------------------- - - -def test_predict_response_is_json_response_bypassing_pydantic_serialization() -> None: - """The /predict handler must return JSONResponse (plain dict) — not a pydantic model. - - R-2 optimization: returning JSONResponse(content=dict) bypasses the second - pydantic serialization pass that FastAPI performs when a route returns a - model instance. The route keeps ``response_model=PredictResponse`` for - OpenAPI schema generation but FastAPI skips validation when the return - value is a Response subclass. - - This test verifies: - 1. The response status code is 200. - 2. The JSON wire format matches the documented PredictResponse schema. - 3. Pydantic model_construct is NOT called on the hot path (plain dicts used - instead of model instances), confirming the optimization is in place. - """ - from unittest.mock import call, patch - - from recotem.serving import routes as _routes - - registry = _make_registry_with_recipe() - client, _ = _make_test_client(registry=registry) - - predict_response_construct_calls: list[call] = [] - original_pr = _routes.PredictResponse.model_construct - - def _spy_pr(*args, **kwargs): - predict_response_construct_calls.append(call(*args, **kwargs)) - return original_pr(*args, **kwargs) - - with patch.object(_routes.PredictResponse, "model_construct", side_effect=_spy_pr): - response = client.post( - "/predict/test_recipe", json={"user_id": "user1", "cutoff": 5} - ) - - assert response.status_code == 200, ( - f"Predict must succeed; got {response.status_code}" - ) - # R-2: hot path must NOT call PredictResponse.model_construct — it returns - # a plain dict via JSONResponse, bypassing the second pydantic serialization. - assert len(predict_response_construct_calls) == 0, ( - "R-2: predict hot path must not call PredictResponse.model_construct; " - "it should return JSONResponse(content=dict) directly. " - f"Got {len(predict_response_construct_calls)} call(s)." - ) - - # Wire format must match the documented PredictResponse schema. - data = response.json() - assert "items" in data, "Response must contain 'items'" - assert "model" in data, "Response must contain 'model'" - assert "request_id" in data, "Response must contain 'request_id'" - assert len(data["items"]) > 0, "items list must not be empty" - item = data["items"][0] - assert "item_id" in item, "Each item must have 'item_id'" - assert "score" in item, "Each item must have 'score'" - model_block = data["model"] - assert "recipe" in model_block, "model block must have 'recipe'" - assert "kid" in model_block, "model block must have 'kid'" - - -def test_predict_response_still_serializes_correctly_after_model_construct() -> None: - """model_construct path must produce identical JSON wire format to the validated path. - - Builds a PredictResponse via model_construct (the new hot path) and via - normal __init__ (the validated path) and asserts that FastAPI's jsonable_encoder - produces the same output for both, confirming the optimization doesn't break - the wire format. - """ - import json - - from fastapi.encoders import jsonable_encoder - - from recotem.serving.routes import ModelInfo, PredictResponse, RecommendationItem - - # Construct via normal __init__ (fully validated path). - validated = PredictResponse( - items=[ - RecommendationItem(item_id="item1", score=0.9), - RecommendationItem(item_id="item2", score=0.8), - ], - model=ModelInfo( - recipe="test_recipe", - trained_at="2026-01-01T00:00:00Z", - best_class="TopPopRecommender", - kid="active", - ), - request_id="test-request-id-123", - ) - - # Construct via model_construct (the optimized hot path). - optimized = PredictResponse.model_construct( - items=[ - # scores cast to float, item_id is str from IDMappedRecommender - RecommendationItem.model_construct(item_id="item1", score=0.9), - RecommendationItem.model_construct(item_id="item2", score=0.8), - ], - # name is FastAPI-validated, trained_at/best_class/kid from artifact header - model=ModelInfo.model_construct( - recipe="test_recipe", - trained_at="2026-01-01T00:00:00Z", - best_class="TopPopRecommender", - kid="active", - ), - request_id="test-request-id-123", - ) - - validated_json = json.dumps(jsonable_encoder(validated), sort_keys=True) - optimized_json = json.dumps(jsonable_encoder(optimized), sort_keys=True) - - assert validated_json == optimized_json, ( - f"model_construct and __init__ must produce identical JSON wire format.\n" - f"validated: {validated_json}\n" - f"optimized: {optimized_json}" - ) - - -# --------------------------------------------------------------------------- -# P-1: /predict uses metadata_index for O(1) lookup -# --------------------------------------------------------------------------- - - -def test_predict_uses_metadata_index_O1_lookup() -> None: - """When metadata_index is populated, /predict uses dict.get rather than - iterating the DataFrame. - - This test verifies the fast path at the integration level: - - Build a ModelEntry with metadata_index set and metadata_df=None. - - POST /predict and confirm metadata fields appear in the response. - - The DataFrame path is unreachable (metadata_df is None), proving the - response must have come from the dict index. - """ - from recotem.serving.registry import ModelEntry, ModelRegistry - - metadata_index = { - "item1": {"title": "Widget Alpha", "category": "tools"}, - "item2": {"title": "Widget Beta", "category": "garden"}, - } - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [ - ("item1", 0.9), - ("item2", 0.8), - ] - - entry = ModelEntry( - name="index_recipe", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_df=None, # DataFrame path explicitly unavailable. - metadata_index=metadata_index, # Only the index is set. - ) - registry = ModelRegistry() - registry.replace("index_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/index_recipe", - json={"user_id": "user1", "cutoff": 2}, - ) - assert response.status_code == 200, ( - f"Expected 200; got {response.status_code}: {response.text}" - ) - items = response.json()["items"] - assert len(items) == 2 - - # item1 must carry its metadata fields from the pre-flattened index. - item1 = next(it for it in items if it["item_id"] == "item1") - assert item1.get("title") == "Widget Alpha", ( - f"'title' from metadata_index must appear in response; got {item1!r}" - ) - assert item1.get("category") == "tools", ( - f"'category' from metadata_index must appear in response; got {item1!r}" - ) - - # item2 must also carry its fields. - item2 = next(it for it in items if it["item_id"] == "item2") - assert item2.get("title") == "Widget Beta" - assert item2.get("category") == "garden" - - -def test_predict_metadata_index_missing_item_returns_empty_fields() -> None: - """When item_id is absent from the metadata_index, no extra fields are added. - - dict.get(item_id, {}) returns an empty dict for unknown items — the - response item contains only item_id and score, never crashes. - """ - from recotem.serving.registry import ModelEntry, ModelRegistry - - metadata_index = {"known_item": {"title": "Known"}} - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [ - ("known_item", 0.9), - ("unknown_item", 0.5), # not in index - ] - - entry = ModelEntry( - name="partial_index_recipe", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_df=None, - metadata_index=metadata_index, - ) - registry = ModelRegistry() - registry.replace("partial_index_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/partial_index_recipe", - json={"user_id": "user1", "cutoff": 2}, - ) - assert response.status_code == 200 - items = response.json()["items"] - assert len(items) == 2 - - known = next(it for it in items if it["item_id"] == "known_item") - assert known.get("title") == "Known" - - unknown = next(it for it in items if it["item_id"] == "unknown_item") - # No title or extra fields -- only item_id and score. - assert "title" not in unknown, "Unknown item must not have metadata fields" - assert "item_id" in unknown and "score" in unknown - - -# --------------------------------------------------------------------------- -# Fix 2: unbind_contextvars must not wipe upstream middleware bindings -# --------------------------------------------------------------------------- - - -def test_predict_unbinds_only_handler_keys_not_upstream_context() -> None: - """After predict() returns, only the keys it bound (recipe, request_id, kid) - must be removed from the structlog context. Upstream keys set by middleware - (e.g. trace_id) must remain intact. - - Pre-fix: `clear_contextvars()` wiped the entire context including upstream - bindings. Fix replaces it with `unbind_contextvars("recipe", "request_id", - "kid")` so only handler-owned keys are removed. - """ - import structlog.contextvars - - from recotem.serving.registry import ModelEntry, ModelRegistry - from recotem.serving.routes import make_router - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] - - entry = ModelEntry( - name="ctx_recipe", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="active", - ) - registry = ModelRegistry() - registry.replace("ctx_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - upstream_key_preserved: list[bool] = [] - - # Middleware that binds an upstream context key before the route handler runs - # and checks it is still present after the handler returns. - from starlette.middleware.base import BaseHTTPMiddleware - from starlette.requests import Request - - class _UpstreamMiddleware(BaseHTTPMiddleware): - async def dispatch(self, request: Request, call_next): - structlog.contextvars.bind_contextvars(trace_id="upstream-trace-123") - response = await call_next(request) - # After the route handler finishes, trace_id must still be in context - ctx = structlog.contextvars.get_contextvars() - upstream_key_preserved.append("trace_id" in ctx) - structlog.contextvars.clear_contextvars() # cleanup after ourselves - return response - - app = FastAPI() - app.add_middleware(_UpstreamMiddleware) - app.include_router(router) - client = TestClient(app) - - response = client.post("/predict/ctx_recipe", json={"user_id": "user1"}) - assert response.status_code == 200 - - assert upstream_key_preserved, "Middleware dispatch must have run" - assert upstream_key_preserved[0], ( - "predict() must NOT call clear_contextvars() — it must only unbind its " - "own keys (recipe, request_id, kid), leaving upstream bindings intact" - ) - - -# --------------------------------------------------------------------------- -# R-3: metadata column named 'item_id' or 'score' cannot shadow recommender values -# --------------------------------------------------------------------------- - - -def test_predict_metadata_score_column_does_not_shadow_recommender_score() -> None: - """A metadata column named 'score' must not override the recommender's score. - - R-3 fix: item_id and score are re-written AFTER fields.update(metadata) - so that trusted recommender values always win regardless of what column - names the metadata file contains. - """ - from recotem.serving.registry import ModelEntry, ModelRegistry - from recotem.serving.routes import make_router - - # Metadata index contains a 'score' column with a different value. - metadata_index = { - "item1": {"title": "Widget A", "score": 999.0}, # rogue column - "item2": {"title": "Widget B", "score": -1.0}, # rogue column - } - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [ - ("item1", 0.9), - ("item2", 0.8), - ] - - entry = ModelEntry( - name="score_shadow_recipe", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_index=metadata_index, - ) - registry = ModelRegistry() - registry.replace("score_shadow_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/score_shadow_recipe", - json={"user_id": "user1", "cutoff": 2}, - ) - assert response.status_code == 200 - items = response.json()["items"] - assert len(items) == 2 - - item1 = next(it for it in items if it["item_id"] == "item1") - assert item1["score"] == pytest.approx(0.9), ( - f"item1 score must be 0.9 (recommender value), not 999.0 (metadata column); " - f"got {item1['score']}" - ) - item2 = next(it for it in items if it["item_id"] == "item2") - assert item2["score"] == pytest.approx(0.8), ( - f"item2 score must be 0.8 (recommender value), not -1.0 (metadata column); " - f"got {item2['score']}" - ) - # The 'title' metadata column must still be present. - assert item1.get("title") == "Widget A", "'title' from metadata must appear" - - -def test_predict_metadata_item_id_column_does_not_shadow_recommender_item_id() -> None: - """A metadata column named 'item_id' must not override the recommender's item_id. - - R-3 fix: item_id is re-written AFTER fields.update(metadata) so that a - rogue metadata column cannot inject a different item_id into the response. - """ - from recotem.serving.registry import ModelEntry, ModelRegistry - from recotem.serving.routes import make_router - - # Metadata index where 'item_id' column value differs from the key. - metadata_index = { - "real_item": {"item_id": "injected_item", "title": "Some Product"}, - } - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [ - ("real_item", 0.7), - ] - - entry = ModelEntry( - name="itemid_shadow_recipe", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_index=metadata_index, - ) - registry = ModelRegistry() - registry.replace("itemid_shadow_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/itemid_shadow_recipe", - json={"user_id": "user1", "cutoff": 1}, - ) - assert response.status_code == 200 - items = response.json()["items"] - assert len(items) == 1 - - item = items[0] - assert item["item_id"] == "real_item", ( - f"item_id must be the recommender's value 'real_item', " - f"not the metadata column value 'injected_item'; got {item['item_id']!r}" - ) - - -# --------------------------------------------------------------------------- -# R-2: /predict returns JSONResponse (wire format matches documented schema) -# --------------------------------------------------------------------------- - - -def test_predict_response_wire_format_matches_predict_response_schema() -> None: - """JSONResponse path must produce the same wire format as PredictResponse. - - Constructs both a PredictResponse instance (validated path) and the plain - dict returned by the route (R-2 JSONResponse path) and asserts they encode - identically via jsonable_encoder. - - This confirms that switching from model_construct to plain dicts does not - alter the documented API contract. - """ - import json - - from fastapi.encoders import jsonable_encoder - - from recotem.serving.routes import ModelInfo, PredictResponse, RecommendationItem - - # Build the reference via normal PredictResponse __init__. - reference = PredictResponse( - items=[ - RecommendationItem(item_id="item1", score=0.9), - RecommendationItem(item_id="item2", score=0.8), - ], - model=ModelInfo( - recipe="test_recipe", - trained_at="2026-01-01T00:00:00Z", - best_class="TopPopRecommender", - kid="active", - ), - request_id="test-request-id-123", - ) - - # Build what the R-2 route now returns as the JSONResponse content dict. - route_dict: dict = { - "items": [ - {"item_id": "item1", "score": 0.9}, - {"item_id": "item2", "score": 0.8}, - ], - "model": { - "recipe": "test_recipe", - "trained_at": "2026-01-01T00:00:00Z", - "best_class": "TopPopRecommender", - "kid": "active", - }, - "request_id": "test-request-id-123", - } - - ref_json = json.dumps(jsonable_encoder(reference), sort_keys=True) - route_json = json.dumps(route_dict, sort_keys=True) - - assert ref_json == route_json, ( - f"R-2 plain-dict route output must match PredictResponse schema.\n" - f"reference: {ref_json}\n" - f"route: {route_json}" - ) - - -# --------------------------------------------------------------------------- -# CRIT-2: _lookup_metadata — unexpected KeyError → warning + metric -# --------------------------------------------------------------------------- - - -def test_lookup_metadata_keyerror_on_loc_emits_warning_and_increments_metric() -> None: - """CRIT-2: When item_id passes the index check but .loc[] raises KeyError - (non-unique index / corrupt state), a WARNING must be emitted and the - metadata_lookup_errors metric must be incremented. - """ - from unittest.mock import MagicMock, patch - - import pandas as pd - import structlog.testing - - from recotem.serving import metrics as _metrics - from recotem.serving.routes import _lookup_metadata - - # Build a DataFrame whose .loc[] raises KeyError even for a present key. - # We simulate this with a mock .loc that raises KeyError. - df = pd.DataFrame( - {"title": ["A", "B"]}, - index=pd.Index(["i1", "i1"]), # duplicate index → .loc raises MultipleValues - ) - - # pandas .loc on a non-unique index with a scalar key returns a DataFrame, - # which then has .to_dict() behaving unexpectedly — but we also want to test - # the pure KeyError branch, so we monkeypatch loc directly. - class _BrokenLocAccessor: - def __getitem__(self, key): - raise KeyError(key) - - df_mock = MagicMock() - df_mock.index = df.index - df_mock.loc = _BrokenLocAccessor() - # item_id is 'i1' which IS in the duplicate index - df_mock.index.__contains__ = lambda self, item: True - - inc_calls: list[str] = [] - original_inc = _metrics.inc_metadata_lookup_error - - def _counting_inc(name: str) -> None: - inc_calls.append(name) - original_inc(name) - - with structlog.testing.capture_logs() as cap: - with patch.object( - _metrics, "inc_metadata_lookup_error", side_effect=_counting_inc - ): - result = _lookup_metadata(df_mock, "i1", frozenset(), "my_recipe") - - assert result == {}, "_lookup_metadata must return {} on KeyError" - - warn_events = [ - e - for e in cap - if e.get("event") == "metadata_lookup_unexpected_keyerror" - and e.get("log_level") == "warning" - ] - assert warn_events, ( - "metadata_lookup_unexpected_keyerror must be logged at WARNING; " - f"got events: {[e for e in cap if 'metadata' in e.get('event', '')]!r}" - ) - assert warn_events[0].get("recipe") == "my_recipe" - - assert "my_recipe" in inc_calls, ( - "inc_metadata_lookup_error must be called for the unexpected KeyError path" - ) - - -# --------------------------------------------------------------------------- -# CRIT-3: predict handler — unexpected exception logging -# --------------------------------------------------------------------------- - - -def test_predict_handler_logs_exception_on_unexpected_error() -> None: - """CRIT-3: When get_recommendation_for_known_user_id raises an unexpected - exception (not KeyError), predict_handler_unexpected_error must be logged - and the response must be 500. - """ - import structlog.testing - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.side_effect = ValueError( - "unexpected internal error" - ) - - entry = ModelEntry( - name="err_recipe", - recommender=recommender, - header={"best_class": "TopPop"}, - kid="active", - ) - registry = ModelRegistry() - registry.replace("err_recipe", entry) - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - - with structlog.testing.capture_logs() as cap: - resp = client.post("/predict/err_recipe", json={"user_id": "u1", "cutoff": 5}) - - assert resp.status_code == 500, ( - f"Unexpected ValueError must yield 500; got {resp.status_code}" - ) - - exc_events = [ - e for e in cap if e.get("event") == "predict_handler_unexpected_error" - ] - assert exc_events, ( - "predict_handler_unexpected_error must be logged when recommender raises unexpectedly" - ) - assert exc_events[0].get("error_class") == "ValueError" - assert exc_events[0].get("name") == "err_recipe" - - -def test_predict_503_no_entry_logs_reason_no_entry() -> None: - """CRIT-3: When the recipe does not exist, recipe_unavailable with - reason='no_entry' must be logged before returning 503. - """ - import structlog.testing - - registry = ModelRegistry() # empty - - router = make_router(registry=registry, api_keys=[]) - from fastapi import FastAPI - from fastapi.testclient import TestClient - - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - - with structlog.testing.capture_logs() as cap: - resp = client.post("/predict/no_such", json={"user_id": "u1", "cutoff": 5}) - - assert resp.status_code == 503 - - events = [ - e - for e in cap - if e.get("event") == "recipe_unavailable" and e.get("reason") == "no_entry" - ] - assert events, ( - "recipe_unavailable with reason='no_entry' must be logged when entry is missing; " - f"got: {[e for e in cap if 'recipe_unavailable' in str(e)]!r}" - ) - - -# --------------------------------------------------------------------------- -# I-3: /health returns only aggregate {status, total, loaded} — no per-recipe -# --------------------------------------------------------------------------- - - -def test_health_returns_status_total_loaded_only() -> None: - """I-3: /health must return only {status, total, loaded}. - - Per-recipe detail (kid, trained_at, best_class, error) is now - gated behind /health/details (authenticated). - """ - client, _ = _make_test_client() - response = client.get("/health") - assert response.status_code == 200 - body = response.json() - assert "status" in body - assert "total" in body - assert "loaded" in body - # Must NOT expose per-recipe detail without auth. - assert "recipes" not in body, ( - "/health must not expose per-recipe data (I-3: moved to /health/details)" - ) - - -def test_health_total_and_loaded_counts_match_registry() -> None: - """I-3: total and loaded counts in /health must reflect registry state.""" - registry = _make_registry_with_recipe("r1") - # Add a second, broken recipe. - broken = ModelEntry( - name="r2", - recommender=None, - header={}, - kid="", - loaded=False, - last_load_error="failed", - ) - registry.replace("r2", broken) - - client, _ = _make_test_client(registry=registry) - response = client.get("/health") - assert response.status_code == 503 # degraded - body = response.json() - assert body["status"] == "degraded" - assert body["total"] == 2 - assert body["loaded"] == 1 - - -def test_health_details_returns_per_recipe_data() -> None: - """I-3: /health/details must include the per-recipe breakdown.""" - client, _ = _make_test_client() - response = client.get("/health/details") - assert response.status_code == 200 - body = response.json() - assert "status" in body - assert "recipes" in body - assert "test_recipe" in body["recipes"] - - -def test_health_details_401_when_auth_configured() -> None: - """I-3: /health/details returns 401 when API keys are configured and - no key is provided. - """ - import hashlib - - from fastapi import FastAPI - from fastapi.testclient import TestClient - - from recotem.config import ApiKeyEntry - - registry = _make_registry_with_recipe() - plaintext = "api_key_32_bytes_exactly_here!!!" - sha256_hex = hashlib.scrypt( - plaintext.encode(), - salt=b"recotem.api-key.v1", - n=2, - r=8, - p=1, - dklen=32, - ).hex() - entry = ApiKeyEntry(kid="k1", sha256_hex=sha256_hex) - router = make_router(registry=registry, api_keys=[entry]) - app = FastAPI() - app.include_router(router) - client = TestClient(app, raise_server_exceptions=False) - - # /health is open (no auth) - response = client.get("/health") - assert response.status_code == 200 - - # /health/details requires auth - response_details = client.get("/health/details") - assert response_details.status_code == 401, ( - f"/health/details must return 401 without auth key; got {response_details.status_code}" - ) - - -# --------------------------------------------------------------------------- -# I-11: metadata degradation signals X-Recotem-Metadata-Degraded header -# --------------------------------------------------------------------------- - - -def test_metadata_degraded_header_set_when_lookup_fails() -> None: - """I-11: When metadata lookup fails for one or more items in the cutoff, - the response must include X-Recotem-Metadata-Degraded: 1 header. - """ - from unittest.mock import MagicMock - - import pandas as pd - from fastapi import FastAPI - from fastapi.testclient import TestClient - - from recotem.serving.registry import ModelEntry, ModelRegistry - from recotem.serving.routes import make_router - - # Build a DataFrame where .loc[] will raise AttributeError for one item. - # We mock the DataFrame so that item "bad_item" is in the index but - # row retrieval fails, causing _lookup_metadata to return {} with a failure. - real_df = pd.DataFrame( - {"item_id": ["good_item", "bad_item"], "title": ["Good", "Bad"]} - ).set_index("item_id") - - class _BrokenLocDF: - """DataFrame-like object where 'bad_item' causes AttributeError on to_dict().""" - - @property - def index(self): - return real_df.index - - def __getitem__(self, key): - return self - - @property - def loc(self): - return self - - def __getitem__(self, key): # noqa: F811 - if key == "bad_item": - - class _BadRow: - def to_dict(self): - raise AttributeError("simulated metadata failure") - - return _BadRow() - return real_df.loc[key] - - broken_df = MagicMock() - broken_df.index = real_df.index - # Make "bad_item" appear in the index but fail on to_dict() - broken_df.index.__contains__ = lambda self_, item: item in real_df.index - - class _BrokenLoc: - def __getitem__(self, key): - if key == "bad_item": - - class _BadRow: - def to_dict(self): - raise AttributeError("simulated metadata failure") - - return _BadRow() - return real_df.loc[key] - - broken_df.loc = _BrokenLoc() - - recommender = MagicMock() - recommender.get_recommendation_for_known_user_id.return_value = [ - ("good_item", 0.9), - ("bad_item", 0.5), - ] - - entry = ModelEntry( - name="meta_degrade", - recommender=recommender, - header={"best_class": "TopPop", "trained_at": "2026-01-01T00:00:00Z"}, - kid="k1", - metadata_df=broken_df, - metadata_index=None, # force DataFrame path - ) - registry = ModelRegistry() - registry.replace("meta_degrade", entry) - - router = make_router(registry=registry, api_keys=[]) - app = FastAPI() - app.include_router(router) - client = TestClient(app) - - response = client.post( - "/predict/meta_degrade", - json={"user_id": "user1", "cutoff": 2}, - ) - assert response.status_code == 200 - assert response.headers.get("X-Recotem-Metadata-Degraded") == "1", ( - "When metadata lookup fails, X-Recotem-Metadata-Degraded: 1 must be set; " - f"got headers: {dict(response.headers)!r}" - ) - - -def test_no_metadata_degraded_header_when_lookup_succeeds() -> None: - """I-11: When metadata lookup succeeds for all items, no - X-Recotem-Metadata-Degraded header must be set. - """ - client, _ = _make_test_client() - response = client.post( - "/predict/test_recipe", json={"user_id": "user1", "cutoff": 2} - ) - assert response.status_code == 200 - assert "X-Recotem-Metadata-Degraded" not in response.headers, ( - "X-Recotem-Metadata-Degraded must NOT be set when all metadata lookups succeed" - ) diff --git a/tests/unit/test_serving_schemas.py b/tests/unit/test_serving_schemas.py new file mode 100644 index 00000000..452b930d --- /dev/null +++ b/tests/unit/test_serving_schemas.py @@ -0,0 +1,590 @@ +# tests/unit/test_serving_schemas.py +"""Unit tests for recotem.serving.schemas (v1).""" + +from datetime import UTC + +import pytest +from pydantic import ValidationError + +from recotem.serving.schemas import ( + BatchRecommendRelatedRequest, + BatchRecommendRequest, + BatchResultErr, + BatchResultOk, + ErrorDetail, + RecipeDetailResponse, + RecipesListResponse, + RecipeSummary, + RecommendItem, + RecommendRelatedRequest, + RecommendRequest, + RecommendResponse, +) + + +def test_recommend_request_defaults_limit_10(): + req = RecommendRequest(user_id="u1") + assert req.limit == 10 + assert req.exclude_items is None + + +def test_recommend_request_rejects_empty_user_id(): + with pytest.raises(ValidationError): + RecommendRequest(user_id="") + + +def test_recommend_request_limit_bounds(): + with pytest.raises(ValidationError): + RecommendRequest(user_id="u1", limit=0) + with pytest.raises(ValidationError): + RecommendRequest(user_id="u1", limit=1001) + + +def test_recommend_related_request_requires_non_empty_seed(): + with pytest.raises(ValidationError): + RecommendRelatedRequest(seed_items=[]) + + +def test_recommend_related_request_caps_seed_at_100(): + RecommendRelatedRequest(seed_items=[f"i{i}" for i in range(100)]) + with pytest.raises(ValidationError): + RecommendRelatedRequest(seed_items=[f"i{i}" for i in range(101)]) + + +def test_recommend_item_allows_extra_metadata_fields(): + item = RecommendItem(item_id="i1", score=0.5, title="Hello") + dumped = item.model_dump() + assert dumped["title"] == "Hello" + assert dumped["item_id"] == "i1" + + +def test_batch_recommend_request_requires_at_least_one(): + with pytest.raises(ValidationError): + BatchRecommendRequest(requests=[]) + + +def test_batch_recommend_request_caps_at_256(): + BatchRecommendRequest(requests=[{"user_id": f"u{i}"} for i in range(256)]) + with pytest.raises(ValidationError): + BatchRecommendRequest(requests=[{"user_id": f"u{i}"} for i in range(257)]) + + +def test_batch_recommend_related_request_caps_at_256(): + seeds = [{"seed_items": [f"i{i}"]} for i in range(256)] + BatchRecommendRelatedRequest(requests=seeds) + with pytest.raises(ValidationError): + BatchRecommendRelatedRequest(requests=seeds + [seeds[0]]) + + +def test_batch_recommend_request_accepts_arbitrary_dict_for_runtime_validation(): + """Per-element schema validation is deferred to the handler: malformed + entries must NOT cause a whole-batch 422 at the wrapper level.""" + # Whole-request schema accepts a malformed sub-entry; the handler will + # surface it as status=error, code=VALIDATION_ERROR per-element. + BatchRecommendRequest(requests=[{"user_id": "u1"}, {"limit": 9999}]) + + +def test_batch_result_entry_status_enum(): + """BatchResultOk and BatchResultErr are the two concrete discriminated variants.""" + BatchResultOk(index=0, status="ok", items=[]) + BatchResultErr( + index=0, + status="error", + error=ErrorDetail(code="VALIDATION_ERROR", message="m"), + ) + # Wrong status literal on concrete class + with pytest.raises(ValidationError): + BatchResultOk(index=0, status="error", items=[]) # type: ignore[arg-type] + with pytest.raises(ValidationError): + BatchResultErr( + index=0, + status="ok", + error=ErrorDetail(code="INTERNAL_ERROR", message="m"), # type: ignore[arg-type] + ) + + +def test_batch_result_entry_rejects_unknown_error_code(): + with pytest.raises(ValidationError): + ErrorDetail(code="NOT_A_REAL_CODE", message="m") # type: ignore[arg-type] + + +def test_batch_result_ok_requires_items(): + """BatchResultOk must carry items; it has no error field.""" + # items is a required field + with pytest.raises((ValidationError, TypeError)): + BatchResultOk(index=0, status="ok") # type: ignore[call-arg] + + +def test_batch_result_err_requires_error(): + """BatchResultErr must carry an error; it has no items field.""" + with pytest.raises((ValidationError, TypeError)): + BatchResultErr(index=0, status="error") # type: ignore[call-arg] + + +def test_batch_result_entry_rejects_negative_index(): + with pytest.raises(ValidationError): + BatchResultOk(index=-1, status="ok", items=[]) + + +_VALID_SHA256 = "sha256:" + "a" * 64 +_VALID_HEX64 = "a" * 64 + + +def test_recommend_response_round_trip(): + r = RecommendResponse( + request_id="req_1", + recipe="r", + model_version=_VALID_SHA256, + items=[RecommendItem(item_id="i1", score=0.9)], + ) + assert r.model_dump()["items"][0]["item_id"] == "i1" + + +def test_recipe_summary_supports_verb_list(): + s = RecipeSummary( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend", "recommend-related"], + kind="user-item", + ) + assert "recommend" in s.supported_verbs + + +def test_recipes_list_response_is_serialisable(): + s = RecipeSummary( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + ) + payload = RecipesListResponse(recipes=[s]).model_dump() + assert payload["recipes"][0]["name"] == "r" + + +def test_recipe_summary_allows_none_model_version(): + """Stub entries emit model_version=None (not loaded yet).""" + s = RecipeSummary( + name="r", + model_version=None, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + ) + assert s.model_version is None + + +def test_recipe_summary_rejects_empty_supported_verbs(): + """A loaded recipe must advertise at least one verb.""" + with pytest.raises(ValidationError): + RecipeSummary( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=[], + kind="user-item", + ) + + +def test_recipe_detail_response_includes_config_digest(): + d = RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + config_digest=_VALID_SHA256, + algorithms=["TopPop"], + best_algorithm="TopPop", + ) + assert d.config_digest == _VALID_SHA256 + + +def test_recipe_detail_config_digest_accepts_sha256_format(): + """config_digest: must be Sha256Hex or None.""" + # Valid Sha256Hex + d = RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + config_digest=_VALID_SHA256, + algorithms=["TopPop"], + best_algorithm="TopPop", + ) + assert d.config_digest == _VALID_SHA256 + + # None is also valid (stub / no digest available) + d2 = RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + config_digest=None, + algorithms=["TopPop"], + best_algorithm="TopPop", + ) + assert d2.config_digest is None + + # Empty string must be rejected (not a valid Sha256Hex) + with pytest.raises(ValidationError): + RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + config_digest="", + algorithms=["TopPop"], + best_algorithm="TopPop", + ) + + +def test_recipe_detail_recipe_hash_accepts_hexhash_format(): + """recipe_hash: must be HexHash (64 lowercase hex chars) or None.""" + d = RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + algorithms=["TopPop"], + best_algorithm="TopPop", + recipe_hash=_VALID_HEX64, + ) + assert d.recipe_hash == _VALID_HEX64 + + # None is also valid + d2 = RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + algorithms=["TopPop"], + best_algorithm="TopPop", + recipe_hash=None, + ) + assert d2.recipe_hash is None + + # Wrong length must be rejected + with pytest.raises(ValidationError): + RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + algorithms=["TopPop"], + best_algorithm="TopPop", + recipe_hash="a" * 32, # too short + ) + + +# --------------------------------------------------------------------------- +# Task C — under-tested field round-trips +# --------------------------------------------------------------------------- + + +def test_recommend_request_accepts_exclude_items() -> None: + """exclude_items: list[str] | None, max_length=1000 (from schema Field).""" + # None by default + req = RecommendRequest(user_id="u1") + assert req.exclude_items is None + + # Accepts a list up to the cap + items_at_cap = [f"i{n}" for n in range(1000)] + req2 = RecommendRequest(user_id="u1", exclude_items=items_at_cap) + assert len(req2.exclude_items) == 1000 + + # Rejects lists that exceed the cap + with pytest.raises(ValidationError): + RecommendRequest(user_id="u1", exclude_items=[f"i{n}" for n in range(1001)]) + + # Rejects non-string entries (Pydantic strict-ish: int will not coerce to str + # in a list[str] field in v2 when the value is obviously wrong type) + # Note: pydantic v2 coerces ints to str in lax mode for list[str], so we + # check that the correct Python type is accepted and None is accepted too. + req3 = RecommendRequest(user_id="u1", exclude_items=None) + assert req3.exclude_items is None + + +def test_recommend_request_extra_fields_rejected() -> None: + """RecommendRequest has extra=forbid: unknown fields must be rejected.""" + with pytest.raises(ValidationError): + RecommendRequest(user_id="u1", context={"a": 1}) # type: ignore[call-arg] + + +# --------------------------------------------------------------------------- +# Finding 6: Discriminated union extra-field enforcement +# --------------------------------------------------------------------------- + + +def test_batch_result_ok_rejects_error_field() -> None: + """BatchResultOk has extra='forbid'; passing an 'error' field must raise.""" + with pytest.raises(ValidationError): + BatchResultOk( + index=0, + status="ok", + items=[], + error={"code": "INTERNAL_ERROR", "message": "bad"}, # type: ignore[call-arg] + ) + + +def test_batch_result_err_rejects_items_field() -> None: + """BatchResultErr has extra='forbid'; passing an 'items' field must raise.""" + with pytest.raises(ValidationError): + BatchResultErr( + index=0, + status="error", + error=ErrorDetail(code="INTERNAL_ERROR", message="m"), + items=[], # type: ignore[call-arg] + ) + + +def test_batch_result_entry_deserializes_ok_shape_via_discriminator() -> None: + """BatchResultEntry deserializes from a dict with status='ok'.""" + import pydantic + + from recotem.serving.schemas import BatchResultEntry + + class _Wrapper(pydantic.BaseModel): + entry: BatchResultEntry + + w = _Wrapper.model_validate( + { + "entry": { + "index": 0, + "status": "ok", + "items": [{"item_id": "i1", "score": 0.9}], + } + } + ) + assert isinstance(w.entry, BatchResultOk) + assert w.entry.status == "ok" + assert w.entry.items[0].item_id == "i1" + + +def test_batch_result_entry_deserializes_error_shape_via_discriminator() -> None: + """BatchResultEntry deserializes from a dict with status='error'.""" + import pydantic + + from recotem.serving.schemas import BatchResultEntry + + class _Wrapper(pydantic.BaseModel): + entry: BatchResultEntry + + w = _Wrapper.model_validate( + { + "entry": { + "index": 1, + "status": "error", + "error": {"code": "UNKNOWN_USER", "message": "not found"}, + } + } + ) + assert isinstance(w.entry, BatchResultErr) + assert w.entry.status == "error" + assert w.entry.error.code == "UNKNOWN_USER" + + +def test_batch_result_entry_openapi_contains_discriminator() -> None: + """BatchRecommendResponse's OpenAPI schema must expose the discriminator.""" + from recotem.serving.schemas import BatchRecommendResponse + + schema = BatchRecommendResponse.model_json_schema() + schema_str = str(schema) + # The discriminator field should appear somewhere in the schema + assert "status" in schema_str, ( + "Discriminator field 'status' must appear in BatchRecommendResponse schema" + ) + # oneOf should appear in the schema for the union + assert "anyOf" in schema_str or "oneOf" in schema_str or "$defs" in schema_str, ( + "Schema for discriminated union must contain anyOf/oneOf or $defs references" + ) + + +# --------------------------------------------------------------------------- +# Finding 7: Sha256Hex / HexHash validation +# --------------------------------------------------------------------------- + + +def test_sha256hex_valid_prefix_and_length() -> None: + """sha256:<64 hex chars> is a valid model_version.""" + valid = "sha256:" + "a" * 64 + r = RecommendResponse( + request_id="r1", + recipe="demo", + model_version=valid, + items=[], + ) + assert r.model_version == valid + + +def test_sha256hex_rejects_missing_prefix() -> None: + """model_version is now Sha256Hex — strings without 'sha256:' prefix must + be rejected by RecommendResponse at validation time.""" + with pytest.raises(ValidationError): + RecommendResponse( + request_id="r1", + recipe="demo", + model_version="abc123", + items=[], + ) + + +def test_sha256hex_type_rejects_wrong_length() -> None: + """Sha256Hex must reject a string with wrong hex length after the prefix.""" + from pydantic import TypeAdapter + + from recotem.serving.schemas import Sha256Hex + + ta = TypeAdapter(Sha256Hex) + with pytest.raises(ValidationError): + ta.validate_python("sha256:" + "a" * 63) # one char short + with pytest.raises(ValidationError): + ta.validate_python("sha256:" + "a" * 65) # one char over + + +def test_sha256hex_type_rejects_uppercase() -> None: + """Sha256Hex must reject uppercase hex characters.""" + from pydantic import TypeAdapter + + from recotem.serving.schemas import Sha256Hex + + ta = TypeAdapter(Sha256Hex) + with pytest.raises(ValidationError): + ta.validate_python("sha256:" + "A" * 64) + + +def test_sha256hex_type_rejects_non_hex() -> None: + """Sha256Hex must reject non-hex characters.""" + from pydantic import TypeAdapter + + from recotem.serving.schemas import Sha256Hex + + ta = TypeAdapter(Sha256Hex) + with pytest.raises(ValidationError): + ta.validate_python("sha256:" + "g" * 64) # 'g' not in [0-9a-f] + + +def test_hexhash_type_accepts_64_hex() -> None: + """HexHash accepts a 64-character lowercase hex string.""" + from pydantic import TypeAdapter + + from recotem.serving.schemas import HexHash + + ta = TypeAdapter(HexHash) + result = ta.validate_python("a" * 64) + assert result == "a" * 64 + + +def test_hexhash_type_rejects_32_hex() -> None: + """HexHash rejects a 32-character hex string (too short).""" + from pydantic import TypeAdapter + + from recotem.serving.schemas import HexHash + + ta = TypeAdapter(HexHash) + with pytest.raises(ValidationError): + ta.validate_python("a" * 32) + + +# --------------------------------------------------------------------------- +# Finding 8: loaded_at / trained_at AwareDatetime +# --------------------------------------------------------------------------- + + +def test_recipe_summary_rejects_naive_datetime() -> None: + """RecipeSummary.loaded_at must reject naive datetime strings (no timezone).""" + with pytest.raises(ValidationError): + RecipeSummary( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T12:34:56", # no Z or offset + supported_verbs=["recommend"], + kind="user-item", + ) + + +def test_recipe_summary_accepts_iso8601_z_suffix() -> None: + """RecipeSummary.loaded_at must accept ISO-8601 strings ending in 'Z'.""" + s = RecipeSummary( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T12:34:56Z", + supported_verbs=["recommend"], + kind="user-item", + ) + assert s.loaded_at is not None + + +def test_recipe_summary_model_dump_json_includes_offset() -> None: + """RecipeSummary.model_dump_json() must produce a loaded_at string that + includes timezone offset information (not naive).""" + import json + + s = RecipeSummary( + name="demo", + model_version="sha256:" + "a" * 64, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + ) + raw = json.loads(s.model_dump_json()) + loaded_at_str = raw["loaded_at"] + # Must include a timezone indicator (either Z, +00:00, or similar) + has_offset = ( + loaded_at_str.endswith("Z") + or "+" in loaded_at_str + or (loaded_at_str.count("-") > 2) # offset like -05:00 + ) + assert has_offset, ( + f"model_dump_json() must produce an offset-aware loaded_at; got {loaded_at_str!r}" + ) + + +def test_recipe_detail_trained_at_rejects_naive() -> None: + """RecipeDetailResponse.trained_at must reject naive datetime strings.""" + with pytest.raises(ValidationError): + RecipeDetailResponse( + name="r", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T00:00:00Z", + supported_verbs=["recommend"], + kind="user-item", + config_digest=_VALID_SHA256, + algorithms=["TopPop"], + best_algorithm="TopPop", + trained_at="2026-01-01T00:00:00", # naive — no tz + ) + + +def test_recipes_list_response_loaded_at_iso8601() -> None: + """RecipeSummary.loaded_at must round-trip through JSON and be UTC ISO-8601.""" + import json + from datetime import datetime + + summary = RecipeSummary( + name="demo", + model_version=_VALID_SHA256, + loaded_at="2026-05-21T12:34:56Z", + supported_verbs=["recommend"], + kind="user-item", + ) + resp = RecipesListResponse(recipes=[summary]) + + # Round-trip through JSON + raw_json = resp.model_dump_json() + decoded = json.loads(raw_json) + loaded_at_str: str = decoded["recipes"][0]["loaded_at"] + + # Python 3.12 fromisoformat accepts trailing Z as UTC + dt = datetime.fromisoformat(loaded_at_str) + assert dt.tzinfo is not None, "loaded_at must carry timezone info" + # Normalise to UTC and verify the offset is zero + dt_utc = dt.astimezone(UTC) + assert dt_utc.utcoffset().total_seconds() == 0 diff --git a/tests/unit/test_serving_watcher.py b/tests/unit/test_serving_watcher.py index f211cdfb..47a76cb5 100644 --- a/tests/unit/test_serving_watcher.py +++ b/tests/unit/test_serving_watcher.py @@ -607,10 +607,10 @@ def test_artifact_disappearance_sets_last_load_error_and_increments_metric( failure_count: list[int] = [0] original_inc = _metrics.inc_artifact_load_failure - def _counting_inc(name: str) -> None: + def _counting_inc(name: str, reason: str = "unexpected") -> None: if name == "vanishing": failure_count[0] += 1 - original_inc(name) + original_inc(name, reason=reason) with patch.object(_metrics, "inc_artifact_load_failure", side_effect=_counting_inc): watcher = ArtifactWatcher( @@ -1716,6 +1716,258 @@ def test_public_reexports_are_bound_and_callable() -> None: assert watcher_module.load_metadata is watcher_module._load_metadata +# --------------------------------------------------------------------------- +# Finding 13: Watcher dir_scan failure metric +# --------------------------------------------------------------------------- + + +def test_scan_dir_permission_error_bumps_per_recipe_dir_scan_metric( + tmp_path: Path, +) -> None: + """When iterdir() raises PermissionError, each known recipe must have + inc_artifact_load_failure(name, reason='dir_scan') called. + + Patches pathlib.Path.iterdir at the class level because PosixPath + instances are immutable C objects that cannot be patched directly. + """ + from unittest.mock import patch + + from recotem.serving.watcher import ArtifactWatcher, _RecipeWatchState + + recipes_dir = tmp_path / "recipes_dir_scan" + recipes_dir.mkdir() + artifact_path = tmp_path / "model.recotem" + + registry = ModelRegistry() + cfg = _make_serve_config() + kr = KeyRing(f"active:{ACTIVE_KEY_HEX}") + + recipe1 = MagicMock() + recipe1.name = "scan_recipe1" + state1 = _RecipeWatchState(recipe=recipe1, artifact_path=str(artifact_path)) + + recipe2 = MagicMock() + recipe2.name = "scan_recipe2" + state2 = _RecipeWatchState(recipe=recipe2, artifact_path=str(artifact_path)) + + # Register stubs so _scan_recipes_dir can call set_load_error on them + for name in ("scan_recipe1", "scan_recipe2"): + registry.replace( + name, + ModelEntry(name=name, recommender=None, header={}, kid="", loaded=False), + ) + + watcher = ArtifactWatcher( + registry=registry, + recipes_dir=recipes_dir, + serve_config=cfg, + key_ring=kr, + initial_states={"scan_recipe1": state1, "scan_recipe2": state2}, + ) + + dir_scan_calls: list[tuple[str, str]] = [] + original_inc = _metrics.inc_artifact_load_failure + + def _counting_inc(name: str, reason: str = "unexpected") -> None: + dir_scan_calls.append((name, reason)) + original_inc(name, reason=reason) + + def _raising_iterdir(self): # noqa: ANN001 + raise PermissionError("permission denied on recipes dir") + + # Patch at the class level — the only way to intercept Path.iterdir + # since PosixPath instances are immutable C objects. + with patch("pathlib.Path.iterdir", _raising_iterdir): + with patch.object( + _metrics, "inc_artifact_load_failure", side_effect=_counting_inc + ): + watcher._scan_recipes_dir() + + watcher._executor.shutdown(wait=False) + + # Each known recipe must have dir_scan failure recorded + dir_scan_names = {n for n, r in dir_scan_calls if r == "dir_scan"} + assert "scan_recipe1" in dir_scan_names, ( + "inc_artifact_load_failure(reason='dir_scan') must be called for scan_recipe1" + ) + assert "scan_recipe2" in dir_scan_names, ( + "inc_artifact_load_failure(reason='dir_scan') must be called for scan_recipe2" + ) + + +def test_scan_dir_failure_also_bumps_watcher_scan_failure_counter( + tmp_path: Path, +) -> None: + """PermissionError on iterdir() must increment the neutral scan-failure counter + via _inc_scan_failure (from recotem._metrics_watcher).""" + from unittest.mock import patch + + import recotem._metrics_watcher as mw + from recotem.serving.watcher import ArtifactWatcher + + recipes_dir = tmp_path / "recipes_scan_fail" + recipes_dir.mkdir() + + registry = ModelRegistry() + cfg = _make_serve_config() + kr = KeyRing(f"active:{ACTIVE_KEY_HEX}") + + watcher = ArtifactWatcher( + registry=registry, + recipes_dir=recipes_dir, + serve_config=cfg, + key_ring=kr, + ) + + scan_failure_calls: list[str] = [] + original_inc = mw.inc_recipes_dir_scan_failure + + def _counting_inc(label: str) -> None: + scan_failure_calls.append(label) + original_inc(label) + + def _raising_iterdir(self): # noqa: ANN001 + raise PermissionError("permission denied") + + # Patch the watcher module's local alias, not the source module attribute, + # because the function was imported by name at module load time. + with patch("pathlib.Path.iterdir", _raising_iterdir): + with patch( + "recotem.serving.watcher._inc_scan_failure", side_effect=_counting_inc + ): + watcher._scan_recipes_dir() + + watcher._executor.shutdown(wait=False) + + assert any("PermissionError" in s for s in scan_failure_calls), ( + "Neutral scan-failure counter must be incremented on PermissionError; " + f"got calls: {scan_failure_calls!r}" + ) + + +# --------------------------------------------------------------------------- +# Finding 14: sidecar_disappeared warning emitted once per transition +# --------------------------------------------------------------------------- + + +def test_sidecar_disappeared_warning_emitted_once_on_first_enoent( + tmp_path: Path, +) -> None: + """When a sidecar exists on poll-1 but raises ENOENT on poll-2, + sidecar_disappeared must be emitted ONCE. + On poll-3 (still ENOENT) no additional warning fires.""" + from unittest.mock import patch + + import structlog.testing + + from recotem.serving.watcher import _check_sidecar_changed, _RecipeWatchState + + artifact_path = tmp_path / "model.recotem" + sidecar_path = tmp_path / "model.recotem.sha256" + + # Set up state with sidecar previously seen ("v1\n") + state = _RecipeWatchState( + recipe=MagicMock(), + artifact_path=str(artifact_path), + last_sidecar_contents="v1\n", # was seen on poll-1 + ) + + # Poll-2: sidecar.exists() returns True, but read_text raises ENOENT + # We patch Path.exists globally so the sidecar check triggers the read_text path, + # then patch Path.read_text to raise ENOENT (errno=2). + enoent = OSError(2, "No such file or directory") + enoent.errno = 2 + + with patch("pathlib.Path.exists", return_value=True): + with patch("pathlib.Path.read_text", side_effect=enoent): + with structlog.testing.capture_logs() as cap: + _check_sidecar_changed(state) + + # sidecar_disappeared must fire exactly once on first ENOENT + disappeared_events = [e for e in cap if e.get("event") == "sidecar_disappeared"] + assert len(disappeared_events) == 1, ( + f"sidecar_disappeared must fire exactly once on first ENOENT; got {cap!r}" + ) + # state must be reset + assert state.last_sidecar_contents is None, ( + "last_sidecar_contents must be reset to None after sidecar_disappeared" + ) + + # Poll-3: sidecar still ENOENT — no additional warning (state.last_sidecar_contents is None) + with patch("pathlib.Path.exists", return_value=True): + with patch("pathlib.Path.read_text", side_effect=enoent): + with structlog.testing.capture_logs() as cap2: + _check_sidecar_changed(state) + + disappeared2 = [e for e in cap2 if e.get("event") == "sidecar_disappeared"] + assert len(disappeared2) == 0, ( + f"sidecar_disappeared must NOT fire again on repeated ENOENT; got {cap2!r}" + ) + + +# --------------------------------------------------------------------------- +# Finding 15: metadata_index_build_error counter wired via on_row_error callback +# --------------------------------------------------------------------------- + + +def test_metadata_index_build_error_counter_incremented( + tmp_path: Path, +) -> None: + """build_metadata_index must invoke the on_row_error callback once per + row whose processing raises an unexpected exception, and the callback is + wired to inc_metadata_index_build_error in the watcher's _build_entry. + + We simulate the error by injecting a ``row`` whose ``.items()`` raises + AttributeError (mimicking a non-unique index returning a DataFrame slice + rather than a Series — the documented on_row_error trigger scenario). + We capture calls to inc_metadata_index_build_error via patch.object to + assert the actual counter function is invoked. + """ + from unittest.mock import patch + + import pandas as pd + + from recotem.metadata.loader import build_metadata_index + from recotem.serving import metrics as _metrics + + # Row whose items() raises — mimics the documented scenario where a non-unique + # index returns a DataFrame slice instead of a Series row dict. + class _ExplodingRow: + def items(self): + raise AttributeError("simulated non-unique index slice") + + df = pd.DataFrame( + {"title": ["Widget A", "Widget B"]}, + index=pd.Index(["i1", "i2"], name="item_id"), + ) + + # Patch to_dict to inject a bad row alongside the real rows. + original_to_dict = df.to_dict + + def _patched_to_dict(orient=None): + raw = original_to_dict(orient=orient) + # Inject a bad entry whose row.items() will raise + raw["bad-item"] = _ExplodingRow() + return raw + + with patch.object(df, "to_dict", side_effect=_patched_to_dict): + with patch.object(_metrics, "inc_metadata_index_build_error") as mock_inc: + + def _on_row_error() -> None: + mock_inc("demo") + + result = build_metadata_index(df, on_row_error=_on_row_error) + + # The bad-item row must have triggered on_row_error → inc_metadata_index_build_error + assert mock_inc.called, ( + "inc_metadata_index_build_error must be called for the malformed row" + ) + mock_inc.assert_called_with("demo") + # Good items must still be present + assert "i1" in result + assert "i2" in result + + def test_public_reexports_in_module_all() -> None: """``__all__`` must list the public surface so ``from watcher import *`` in downstream code does not pull private helpers.""" @@ -2719,10 +2971,10 @@ def test_hot_swap_corrupt_artifact_preserves_stale_entry_real_registry( failure_count: list[int] = [0] original_inc = _metrics.inc_artifact_load_failure - def _counting_inc(name: str) -> None: + def _counting_inc(name: str, reason: str = "unexpected") -> None: if name == "stale_real": failure_count[0] += 1 - original_inc(name) + original_inc(name, reason=reason) # Replace the artifact with corrupt content artifact_path.write_bytes(b"THIS IS CORRUPT AND WILL FAIL VERIFICATION") @@ -2848,8 +3100,7 @@ def test_extract_kid_safe_truncated_returns_sentinel_and_reason() -> None: """_extract_kid_safe must return a sentinel + 'too_short' when data is shorter than FIXED_PREFIX_SIZE (corrupt/truncated artifact header). - The sentinel must contain '\\x00' so it can never collide with a valid - UTF-8 kid string that an attacker could craft. + The sentinel must be a string that cannot collide with a valid kid. """ from recotem.serving.watcher import _extract_kid_safe @@ -2858,13 +3109,10 @@ def test_extract_kid_safe_truncated_returns_sentinel_and_reason() -> None: assert reason is not None, "Truncated data must return a non-None failure reason" assert reason == "too_short", f"Expected 'too_short', got {reason!r}" - # The sentinel must contain a raw \x00 byte so that any KeyRing.verify - # lookup using it will immediately fail (KeyRing kids are valid UTF-8). - # format_kid_for_log (called by log emitters) will later hex-escape it to - # '\\x00' in log output, but we verify the raw sentinel here. - assert "\x00" in kid_log, ( - "Sentinel must contain a \\x00 byte to prevent collision with valid kids " - "(KeyRing rejects non-UTF-8 kids; \\x00 is safe as a sentinel marker)" + # The sentinel is "<extract_failed>" — a string that cannot match any + # real kid produced by KeyRing (valid kids are [A-Za-z0-9_-]). + assert kid_log == "<extract_failed>", ( + f"Expected sentinel '<extract_failed>', got {kid_log!r}" ) @@ -4682,3 +4930,126 @@ def test_sidecar_enoent_still_returns_false(tmp_path: Path) -> None: assert changed is False, ( "I-10: ENOENT sidecar read must still return False (file absent = no change)" ) + + +# --------------------------------------------------------------------------- +# C4: sidecar_unsupported resets when recipe YAML mtime changes +# --------------------------------------------------------------------------- + + +def test_sidecar_unsupported_clears_on_yaml_mtime_change( + tmp_path: Path, +) -> None: + """When sidecar_unsupported=True and the recipe YAML mtime changes, + _check_sidecar_changed must clear the flag and re-evaluate (C4).""" + from unittest.mock import MagicMock, patch + + from recotem.serving.watcher import _check_sidecar_changed, _RecipeWatchState + + yaml_path = tmp_path / "recipe.yaml" + yaml_path.write_text("name: test\n") + artifact_path = str(tmp_path / "model.recotem") + + recipe = MagicMock() + recipe.name = "c4_test" + recipe._yaml_path = yaml_path + + initial_mtime = yaml_path.stat().st_mtime + + state = _RecipeWatchState( + recipe=recipe, + artifact_path=artifact_path, + sidecar_unsupported=True, + sidecar_unsupported_at_mtime=initial_mtime, + ) + + # With same mtime, still unsupported — returns False immediately. + result = _check_sidecar_changed(state) + assert result is False, "No mtime change → sidecar_unsupported stays True" + assert state.sidecar_unsupported is True + + # Simulate mtime change by patching os.stat to return a newer mtime. + new_mtime = initial_mtime + 1.0 + + class _FakeStat: + st_mtime = new_mtime + + with patch("os.stat", return_value=_FakeStat()): + result2 = _check_sidecar_changed(state) + + # After mtime change, sidecar_unsupported must be cleared. + assert state.sidecar_unsupported is False, ( + "sidecar_unsupported must be cleared when recipe YAML mtime changes" + ) + assert state.sidecar_unsupported_at_mtime is None + + +# --------------------------------------------------------------------------- +# M9: generic except clears post-hmac failure streak +# --------------------------------------------------------------------------- + + +def test_generic_except_resets_post_hmac_streak( + tmp_path: Path, +) -> None: + """When _load_recipe encounters a non-ArtifactError exception, the + post_hmac_failure_streak for that recipe must be reset (M9).""" + from unittest.mock import MagicMock, patch + + from recotem.serving.watcher import ArtifactWatcher, _RecipeWatchState + from tests.conftest import ACTIVE_KEY_HEX + + recipe = MagicMock() + recipe.name = "m9_test" + recipe.output.path = str(tmp_path / "model.recotem") + recipe.item_metadata = None + + cfg = _make_serve_config() + registry = ModelRegistry() + stub = ModelEntry( + name="m9_test", + recommender=None, + header={}, + kid="", + loaded=False, + ) + registry.replace("m9_test", stub) + + watcher = ArtifactWatcher( + registry=registry, + recipes_dir=tmp_path, + serve_config=cfg, + key_ring=KeyRing(f"active:{ACTIVE_KEY_HEX}"), + initial_states=None, + ) + + # Pre-set a streak for this recipe. + watcher._post_hmac_failure_streak["m9_test"] = 5 + + state = _RecipeWatchState( + recipe=recipe, + artifact_path=recipe.output.path, + ) + watcher._states["m9_test"] = state + + # Force _build_entry to raise a generic (non-ArtifactError) exception. + with patch.object( + watcher, + "_build_entry", + side_effect=RuntimeError("unexpected boom"), + ): + with patch( + "recotem.serving.watcher._read_artifact_bytes", + return_value=b"dummy", + ): + with patch( + "recotem.serving.watcher._sha256_bytes", + return_value="a" * 64, + ): + # Set last_sha256 to something different to trigger the build path. + state.last_sha256 = "b" * 64 + watcher._load_recipe("m9_test", state, force=False) + + assert "m9_test" not in watcher._post_hmac_failure_streak, ( + "Generic except must reset the post-HMAC failure streak (M9)" + ) diff --git a/tests/unit/test_v1_auth_wrong_key.py b/tests/unit/test_v1_auth_wrong_key.py new file mode 100644 index 00000000..75a7b2d5 --- /dev/null +++ b/tests/unit/test_v1_auth_wrong_key.py @@ -0,0 +1,167 @@ +# tests/unit/test_v1_auth_wrong_key.py +"""T2 + T8: wrong-key (non-empty but invalid) 401 and KeyRing rotation across +all 4 recommend verbs. + +T2: Parametrize over all 4 verbs, send a valid-length but wrong X-API-Key, + assert 401 with code=INVALID_API_KEY. + +T8: Configure two API keys (old + new). Assert that both key holders reach + :recommend with 200, and that a third key gets 401. +""" + +from __future__ import annotations + +import hashlib +from unittest.mock import MagicMock + +import pytest +from fastapi.testclient import TestClient + +from recotem.config import ApiKeyEntry +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +_FAKE_SHA256_HEX = "c" * 64 # 64 lowercase hex chars + + +def _hash_key(plaintext: str) -> str: + return hashlib.scrypt( + plaintext.encode(), + salt=b"recotem.api-key.v1", + n=2, + r=8, + p=1, + dklen=32, + ).hex() + + +def _make_api_entry(plaintext: str, kid: str = "k1") -> ApiKeyEntry: + return ApiKeyEntry(kid=kid, sha256_hex=_hash_key(plaintext)) + + +def _make_loaded_entry(name: str = "demo") -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + rec._mapper = MagicMock() + rec._mapper.item_id_to_index = {"i1": 0} + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="active", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _build_client(api_entries: list[ApiKeyEntry]) -> TestClient: + registry = ModelRegistry() + registry.replace("demo", _make_loaded_entry("demo")) + return TestClient(build_v1_app(registry, api_keys=api_entries)) + + +# --------------------------------------------------------------------------- +# Minimal valid request bodies for each verb +# --------------------------------------------------------------------------- + +_VERB_BODIES: dict[str, dict] = { + "recommend": {"user_id": "u1", "limit": 1}, + "recommend-related": {"seed_items": ["i1"], "limit": 1}, + "batch-recommend": {"requests": [{"user_id": "u1", "limit": 1}]}, + "batch-recommend-related": {"requests": [{"seed_items": ["i1"], "limit": 1}]}, +} + +_VALID_PLAINTEXT = "valid_api_key_for_test_32_bytes!" +_WRONG_PLAINTEXT = "wrong_api_key_for_test_32_bytes!" + + +# --------------------------------------------------------------------------- +# T2: wrong-key 401 across all 4 verbs +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("verb", list(_VERB_BODIES.keys())) +def test_wrong_key_returns_401_with_invalid_api_key_code(verb: str) -> None: + """A non-empty but wrong X-API-Key must return 401 INVALID_API_KEY on every verb.""" + api_entry = _make_api_entry(_VALID_PLAINTEXT, kid="k1") + client = _build_client([api_entry]) + + url = f"/v1/recipes/demo:{verb}" + r = client.post( + url, json=_VERB_BODIES[verb], headers={"X-API-Key": _WRONG_PLAINTEXT} + ) + + assert r.status_code == 401, ( + f"Expected 401 for verb {verb!r} with wrong key; got {r.status_code}: {r.text}" + ) + body = r.json() + assert body.get("code") == "INVALID_API_KEY", ( + f"Expected code=INVALID_API_KEY for verb {verb!r}; got {body!r}" + ) + assert "detail" in body, f"Response must include 'detail' field; got {body!r}" + + +# --------------------------------------------------------------------------- +# T8: KeyRing rotation — both keys pass, unknown key fails +# --------------------------------------------------------------------------- + +_KEY_OLD = "old_api_key_for_rotation_test!!X" # 32 chars exactly +_KEY_NEW = "new_api_key_for_rotation_test!!Y" # 32 chars exactly +_KEY_NEITHER = "neither_key_for_rotation_test!!Z" # 32 chars exactly + + +def test_keyring_old_key_accepted_on_recommend() -> None: + """Old key (first entry) is accepted with 200 on :recommend.""" + entry_old = _make_api_entry(_KEY_OLD, kid="old") + entry_new = _make_api_entry(_KEY_NEW, kid="new") + client = _build_client([entry_old, entry_new]) + + r = client.post( + "/v1/recipes/demo:recommend", + json=_VERB_BODIES["recommend"], + headers={"X-API-Key": _KEY_OLD}, + ) + assert r.status_code == 200, ( + f"Old key must be accepted; got {r.status_code}: {r.text}" + ) + + +def test_keyring_new_key_accepted_on_recommend() -> None: + """New key (second entry) is accepted with 200 on :recommend.""" + entry_old = _make_api_entry(_KEY_OLD, kid="old") + entry_new = _make_api_entry(_KEY_NEW, kid="new") + client = _build_client([entry_old, entry_new]) + + r = client.post( + "/v1/recipes/demo:recommend", + json=_VERB_BODIES["recommend"], + headers={"X-API-Key": _KEY_NEW}, + ) + assert r.status_code == 200, ( + f"New key must be accepted; got {r.status_code}: {r.text}" + ) + + +def test_keyring_neither_key_rejected_401() -> None: + """A key that matches neither entry returns 401 INVALID_API_KEY.""" + entry_old = _make_api_entry(_KEY_OLD, kid="old") + entry_new = _make_api_entry(_KEY_NEW, kid="new") + client = _build_client([entry_old, entry_new]) + + r = client.post( + "/v1/recipes/demo:recommend", + json=_VERB_BODIES["recommend"], + headers={"X-API-Key": _KEY_NEITHER}, + ) + assert r.status_code == 401, ( + f"Unrecognised key must be rejected; got {r.status_code}: {r.text}" + ) + assert r.json().get("code") == "INVALID_API_KEY" diff --git a/tests/unit/test_v1_batch_recommend.py b/tests/unit/test_v1_batch_recommend.py new file mode 100644 index 00000000..3103d93c --- /dev/null +++ b/tests/unit/test_v1_batch_recommend.py @@ -0,0 +1,436 @@ +# tests/unit/test_v1_batch_recommend.py +"""POST /v1/recipes/{name}:batch-recommend — multi-user bulk.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_FAKE_SHA256_HEX = "b" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _client(rec) -> TestClient: + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_batch_recommend_mixed_success_and_failure(): + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = [ + [("i1", 0.9)], + KeyError("u2"), + [("i3", 0.5)], + ] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={ + "requests": [ + {"user_id": "u1"}, + {"user_id": "u2"}, + {"user_id": "u3"}, + ] + }, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["recipe"] == "demo" + assert len(body["results"]) == 3 + # Under the discriminated-union schema, BatchResultOk has extra="forbid" + # and does NOT carry an "error" field. Assert field by field rather than + # doing an equality check against a literal dict that includes "error": None. + ok_result = body["results"][0] + assert ok_result["index"] == 0 + assert ok_result["status"] == "ok" + assert ok_result["items"] == [{"item_id": "i1", "score": 0.9}] + assert "error" not in ok_result, ( + "BatchResultOk (discriminated union) must not carry an 'error' key" + ) + assert body["results"][1]["status"] == "error" + assert body["results"][1]["error"]["code"] == "UNKNOWN_USER" + assert body["results"][2]["status"] == "ok" + + +def test_batch_recommend_503_when_recipe_unavailable(): + stub = ModelEntry( + name="demo", + recommender=None, + header={}, + kid="", + loaded=False, + ) + registry = ModelRegistry() + registry.replace("demo", stub) + client = TestClient(build_v1_app(registry)) + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}]}, + ) + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + + +def test_batch_recommend_404_when_recipe_missing_from_registry(): + rec = MagicMock() + r = _client(rec).post( + "/v1/recipes/unknown:batch-recommend", + json={"requests": [{"user_id": "u1"}]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + + +def test_batch_recommend_422_on_too_many_requests(): + rec = MagicMock() + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": f"u{i}"} for i in range(257)]}, + ) + assert r.status_code == 422 + + +def test_batch_recommend_sets_model_version_response_header(): + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}]}, + ) + assert r.status_code == 200, r.text + header_val = r.headers.get("x-recotem-model-version") + assert header_val, "X-Recotem-Model-Version header must be present and non-empty" + assert header_val == r.json()["model_version"] + + +# --------------------------------------------------------------------------- +# F. Aggregate cap, non-KeyError handling, extra field +# --------------------------------------------------------------------------- + + +def test_batch_aggregate_limit_cap_exceeded() -> None: + """Aggregate limit cap is now enforced per-element rather than at the + schema level: 10 × 501 = 5010 > 5000, so the LAST element (and only the + last) is rejected with VALIDATION_ERROR. Earlier elements still execute.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": f"u{i}", "limit": 501} for i in range(10)]}, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + # First 9 elements (sum = 9*501 = 4509) succeed; the 10th would push + # the running aggregate to 5010, so it is rejected with + # VALIDATION_ERROR. + assert results[0]["status"] == "ok" + assert results[-1]["status"] == "error" + assert results[-1]["error"]["code"] == "VALIDATION_ERROR" + + +def test_batch_aggregate_limit_cap_boundary() -> None: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": f"u{i}", "limit": 500} for i in range(10)]}, + ) + assert r.status_code == 200, r.text + + +def test_batch_element_runtime_error_yields_internal_error() -> None: + rec = MagicMock() + + def _side_effect(user_id, limit): + if user_id == "bad-user": + raise RuntimeError("exploded") + return [("i1", 0.9)] + + rec.get_recommendation_for_known_user_id.side_effect = _side_effect + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={ + "requests": [ + {"user_id": "ok-user"}, + {"user_id": "bad-user"}, + {"user_id": "ok-user2"}, + ] + }, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "ok" + assert results[1]["status"] == "error" + assert results[1]["error"]["code"] == "INTERNAL_ERROR" + assert "bad-user" not in results[1]["error"].get("message", "") + assert results[2]["status"] == "ok" + + +def test_batch_rejects_extra_field_on_request_element() -> None: + """A bad sub-element now becomes status=error, code=VALIDATION_ERROR + rather than 422'ing the whole batch — that's the contract documented + for ``:batch-recommend``.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1", "extra_field": "boom"}]}, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "error" + assert results[0]["error"]["code"] == "VALIDATION_ERROR" + + +# --------------------------------------------------------------------------- +# Finding 3: model_version header on partial-failure batch response +# --------------------------------------------------------------------------- + + +def _client_with_metadata(rec, meta_index: dict | None = None) -> TestClient: + """Build a client whose entry has a metadata_index pre-populated.""" + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_batch_recommend_sets_model_version_on_partial_failure(): + """When a batch has one ok and one error element, the 200 response must + carry X-Recotem-Model-Version and the body model_version must match.""" + rec = MagicMock() + + def _side(user_id, limit): + if user_id == "known-user": + return [("i1", 0.9)] + raise KeyError(user_id) + + rec.get_recommendation_for_known_user_id.side_effect = _side + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={ + "requests": [ + {"user_id": "known-user"}, + {"user_id": "unknown-user"}, + ] + }, + ) + assert r.status_code == 200, r.text + body = r.json() + # One ok, one error + statuses = [e["status"] for e in body["results"]] + assert "ok" in statuses + assert "error" in statuses + # Header must be set + header_val = r.headers.get("x-recotem-model-version") + assert header_val, ( + "X-Recotem-Model-Version must be present on partial-failure batch" + ) + assert header_val == body["model_version"], ( + "Header value must match body model_version" + ) + + +# --------------------------------------------------------------------------- +# Finding 9: include_metadata opt-in on batch-recommend +# --------------------------------------------------------------------------- + + +def test_batch_recommend_include_metadata_false_no_extra_fields(): + """Default include_metadata=False: items carry only item_id and score.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A", "category": "tools"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}]}, + # include_metadata defaults to False — no key in JSON + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "ok" + items = results[0]["items"] + assert len(items) == 1 + item = items[0] + assert set(item.keys()) == {"item_id", "score"}, ( + f"With include_metadata=False, items must have only item_id+score; got {set(item.keys())!r}" + ) + + +def test_batch_recommend_include_metadata_false_explicit(): + """Explicit include_metadata=False must also omit metadata fields.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}], "include_metadata": False}, + ) + assert r.status_code == 200, r.text + item = r.json()["results"][0]["items"][0] + assert "title" not in item, ( + "include_metadata=False must not include metadata fields" + ) + + +def test_batch_recommend_include_metadata_true_adds_fields(): + """include_metadata=True: items carry the same metadata as single :recommend.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A", "category": "tools"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}], "include_metadata": True}, + ) + assert r.status_code == 200, r.text + item = r.json()["results"][0]["items"][0] + assert item["item_id"] == "i1" + assert item["score"] == 0.9 + assert item.get("title") == "Widget A", ( + "include_metadata=True must include metadata fields in batch items" + ) + assert item.get("category") == "tools" + + +def test_batch_recommend_per_request_limit_validation(): + """Per-element schema violations surface as VALIDATION_ERROR in the + individual result entry; valid siblings continue to be processed.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [] + + r_zero = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={ + "requests": [ + {"user_id": "u-good"}, + {"user_id": "u1", "limit": 0}, # below the floor + ] + }, + ) + assert r_zero.status_code == 200, r_zero.text + rs = r_zero.json()["results"] + assert rs[0]["status"] == "ok" + assert rs[1]["status"] == "error" + assert rs[1]["error"]["code"] == "VALIDATION_ERROR" + # The message must mention the violating field name so callers can diagnose + # which sub-field failed without re-parsing the full schema error. + assert "limit" in rs[1]["error"]["message"], ( + f"VALIDATION_ERROR message should mention 'limit'; got {rs[1]['error']['message']!r}" + ) + + r_over = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1", "limit": 1001}]}, # above ceiling + ) + assert r_over.status_code == 200, r_over.text + assert r_over.json()["results"][0]["status"] == "error" + assert r_over.json()["results"][0]["error"]["code"] == "VALIDATION_ERROR" + assert "limit" in r_over.json()["results"][0]["error"]["message"], ( + "VALIDATION_ERROR message should mention 'limit'" + ) + + +# --------------------------------------------------------------------------- +# T2: exclude_items in batch-recommend +# --------------------------------------------------------------------------- + + +def test_batch_recommend_exclude_items_removes_item() -> None: + """When a batch element specifies exclude_items, those items must not + appear in the result for that element.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + ("i1", 0.9), + ("i2", 0.5), + ("i3", 0.3), + ] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend", + json={ + "requests": [ + {"user_id": "u1", "exclude_items": ["i2"]}, + ] + }, + ) + assert r.status_code == 200, r.text + items = r.json()["results"][0]["items"] + item_ids = [item["item_id"] for item in items] + assert "i2" not in item_ids, ( + f"exclude_items=['i2'] must remove i2; got {item_ids!r}" + ) + assert "i1" in item_ids + assert "i3" in item_ids + + +# --------------------------------------------------------------------------- +# F4: X-Recotem-Items-Degraded must NOT be set on batch endpoints +# --------------------------------------------------------------------------- + + +def test_batch_recommend_no_items_degraded_header_even_when_metadata_degrades() -> None: + """Even when metadata serialization produces a fallback (score=nan triggers + ValidationError on the full path), :batch-recommend must NOT set + X-Recotem-Items-Degraded. The header is reserved for single endpoints. + + We use ``include_metadata=True`` with a metadata_index entry whose NaN + score field triggers the fallback path in _build_items; the batch handler + code does not call _apply_build_items_degraded, so the header is never set. + """ + import math + + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + + meta_index = {"i1": {"score": math.nan, "title": "Widget"}} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}], "include_metadata": True}, + ) + assert r.status_code == 200, r.text + assert "x-recotem-items-degraded" not in r.headers, ( + ":batch-recommend must NOT set X-Recotem-Items-Degraded even when " + "metadata serialization degrades" + ) diff --git a/tests/unit/test_v1_batch_recommend_related.py b/tests/unit/test_v1_batch_recommend_related.py new file mode 100644 index 00000000..f74b7e30 --- /dev/null +++ b/tests/unit/test_v1_batch_recommend_related.py @@ -0,0 +1,379 @@ +# tests/unit/test_v1_batch_recommend_related.py +"""POST /v1/recipes/{name}:batch-recommend-related — multi-seed bulk.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_FAKE_SHA256_HEX = "4" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _client(rec, known_items: list[str] | None = None) -> TestClient: + """Wrap *rec* in a ModelEntry whose id-map advertises *known_items*. + + The router pre-checks ``entry.recommender._mapper.item_id_to_index`` + to distinguish ``UNKNOWN_SEED_ITEMS`` from ``NO_CANDIDATES``; tests + that exercise the happy path need at least one seed in the map. + """ + rec._mapper.item_id_to_index = {iid: i for i, iid in enumerate(known_items or [])} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_batch_related_mixed_success_and_failure(): + """Mix known + unknown seeds: known seeds → ok, fully-unknown → UNKNOWN_SEED_ITEMS.""" + rec = MagicMock() + + def _side_effect(seed_items, limit): + return [("i9", 0.7)] if "7203" in seed_items or "9984" in seed_items else [] + + rec.get_recommendation_for_new_user.side_effect = _side_effect + r = _client(rec, known_items=["7203", "9984"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={ + "requests": [ + {"seed_items": ["7203"]}, + {"seed_items": ["zzz"]}, # unknown — UNKNOWN_SEED_ITEMS + {"seed_items": ["9984"]}, + ] + }, + ) + assert r.status_code == 200, r.text + body = r.json() + assert [e["status"] for e in body["results"]] == ["ok", "error", "ok"] + assert body["results"][1]["error"]["code"] == "UNKNOWN_SEED_ITEMS" + + +def test_batch_related_404_when_recipe_missing_from_registry(): + rec = MagicMock() + r = _client(rec).post( + "/v1/recipes/unknown:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + + +def test_batch_related_503_when_recipe_stub_not_loaded(): + stub = ModelEntry( + name="demo", + recommender=None, + header={}, + kid="", + loaded=False, + ) + registry = ModelRegistry() + registry.replace("demo", stub) + r = TestClient(build_v1_app(registry)).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}]}, + ) + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + + +def test_batch_related_empty_seed_in_one_entry_is_per_element_error(): + """Empty seed list fails the sub-schema; under per-element validation + this surfaces as ``status=error, code=VALIDATION_ERROR`` rather than + a whole-batch 422 (was 422 in the all-or-nothing mode).""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [] + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend-related", + json={ + "requests": [{"seed_items": []}], + }, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "error" + assert results[0]["error"]["code"] == "VALIDATION_ERROR" + # The message should mention the violating field so callers can diagnose + # which sub-field failed without re-parsing the full schema error. + assert "seed_items" in results[0]["error"]["message"], ( + f"VALIDATION_ERROR message should mention 'seed_items'; " + f"got {results[0]['error']['message']!r}" + ) + + +# --------------------------------------------------------------------------- +# G. Partial failure parity (I1) +# --------------------------------------------------------------------------- + + +def test_batch_related_element_unknown_seeds_yields_error() -> None: + """A seed_items list with no known id-map members → UNKNOWN_SEED_ITEMS.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + + r = _client(rec, known_items=["good-seed", "good-seed2"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={ + "requests": [ + {"seed_items": ["good-seed"]}, + {"seed_items": ["unknown-seed"]}, + {"seed_items": ["good-seed2"]}, + ] + }, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "ok" + assert results[1]["status"] == "error" + assert results[1]["error"]["code"] == "UNKNOWN_SEED_ITEMS" + assert results[2]["status"] == "ok" + + +def test_batch_related_element_runtime_error_yields_internal_error() -> None: + rec = MagicMock() + + def _side_effect(seed_items, limit): + if seed_items == ["bad-seed"]: + raise RuntimeError("exploded") + return [("i1", 0.9)] + + rec.get_recommendation_for_new_user.side_effect = _side_effect + r = _client(rec, known_items=["ok-seed", "bad-seed"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={ + "requests": [ + {"seed_items": ["ok-seed"]}, + {"seed_items": ["bad-seed"]}, + ] + }, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "ok" + assert results[1]["status"] == "error" + assert results[1]["error"]["code"] == "INTERNAL_ERROR" + + +def test_batch_related_aggregate_limit_cap_exceeded() -> None: + """Aggregate cap is enforced per-element; element 10 (running sum 5010 > 5000) + surfaces as VALIDATION_ERROR while earlier elements succeed.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + r = _client(rec, known_items=["s1"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["s1"], "limit": 501} for _ in range(10)]}, + ) + assert r.status_code == 200, r.text + results = r.json()["results"] + assert results[0]["status"] == "ok" + assert results[-1]["status"] == "error" + assert results[-1]["error"]["code"] == "VALIDATION_ERROR" + + +def test_batch_recommend_related_sets_model_version_response_header(): + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i9", 0.7)] + r = _client(rec, known_items=["seed1"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["seed1"]}]}, + ) + assert r.status_code == 200, r.text + header_val = r.headers.get("x-recotem-model-version") + assert header_val, "X-Recotem-Model-Version header must be present and non-empty" + assert header_val == r.json()["model_version"] + + +# --------------------------------------------------------------------------- +# Finding 1: empty outer requests list → 422 +# --------------------------------------------------------------------------- + + +def test_batch_recommend_related_rejects_empty_outer_requests_list(): + """POST :batch-recommend-related with {"requests": []} must return 422. + + The schema enforces min_length=1 on the outer list; an empty list must + fail at the schema level (HTTP 422), not reach the handler. + """ + rec = MagicMock() + r = _client(rec).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": []}, + ) + assert r.status_code == 422, ( + f"Empty requests list must produce 422; got {r.status_code}: {r.text}" + ) + + +# --------------------------------------------------------------------------- +# T1: include_metadata flag for :batch-recommend-related +# --------------------------------------------------------------------------- + + +def _client_with_metadata(rec, meta_index: dict | None = None) -> TestClient: + """Build a client whose entry has a metadata_index pre-populated.""" + known_items = list(meta_index.keys()) if meta_index else [] + rec._mapper.item_id_to_index = {iid: i for i, iid in enumerate(known_items)} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_batch_recommend_related_include_metadata_default_false() -> None: + """Default include_metadata=False: items in :batch-recommend-related carry + only item_id and score even when metadata_index is populated.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A", "category": "tools"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}]}, + ) + assert r.status_code == 200, r.text + item = r.json()["results"][0]["items"][0] + assert set(item.keys()) == {"item_id", "score"}, ( + f"With include_metadata=False (default), items must have only item_id+score; " + f"got {set(item.keys())!r}" + ) + + +def test_batch_recommend_related_include_metadata_explicit_false() -> None: + """Explicit include_metadata=False must not include metadata fields.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}], "include_metadata": False}, + ) + assert r.status_code == 200, r.text + item = r.json()["results"][0]["items"][0] + assert "title" not in item, ( + "include_metadata=False must not include metadata fields" + ) + + +def test_batch_recommend_related_include_metadata_true_adds_fields() -> None: + """include_metadata=True: items carry the same metadata as single :recommend-related.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "Widget A", "category": "tools"}} + r = _client_with_metadata(rec, meta_index).post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}], "include_metadata": True}, + ) + assert r.status_code == 200, r.text + item = r.json()["results"][0]["items"][0] + assert item["item_id"] == "i1" + assert item["score"] == 0.9 + assert item.get("title") == "Widget A", ( + "include_metadata=True must include metadata fields" + ) + assert item.get("category") == "tools" + + +# --------------------------------------------------------------------------- +# T2: exclude_items in :batch-recommend-related +# --------------------------------------------------------------------------- + + +def test_batch_recommend_related_exclude_items_removes_item() -> None: + """When a batch element specifies exclude_items, those items must not + appear in the result for that element.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [ + ("i1", 0.9), + ("i2", 0.5), + ("i3", 0.3), + ] + r = _client(rec, known_items=["seed1"]).post( + "/v1/recipes/demo:batch-recommend-related", + json={ + "requests": [ + {"seed_items": ["seed1"], "exclude_items": ["i2"]}, + ] + }, + ) + assert r.status_code == 200, r.text + items = r.json()["results"][0]["items"] + item_ids = [item["item_id"] for item in items] + assert "i2" not in item_ids, ( + f"exclude_items=['i2'] must remove i2; got {item_ids!r}" + ) + assert "i1" in item_ids + assert "i3" in item_ids + + +# --------------------------------------------------------------------------- +# F4: X-Recotem-Items-Degraded must NOT be set on batch endpoints +# --------------------------------------------------------------------------- + + +def test_batch_recommend_related_no_items_degraded_header_even_when_metadata_degrades() -> ( + None +): + """Even when metadata serialization produces a fallback, :batch-recommend-related + must NOT set X-Recotem-Items-Degraded. The header is reserved for single + endpoints only. + + We use ``include_metadata=True`` with a metadata_index entry whose NaN + score field triggers the fallback path in _build_items; the batch handler + does not call _apply_build_items_degraded, so the header is never set. + """ + import math + + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("seed1", 0.9)] + + meta_index = {"seed1": {"score": math.nan, "title": "Widget"}} + rec._mapper.item_id_to_index = {"seed1": 0} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["seed1"]}], "include_metadata": True}, + ) + assert r.status_code == 200, r.text + assert "x-recotem-items-degraded" not in r.headers, ( + ":batch-recommend-related must NOT set X-Recotem-Items-Degraded even when " + "metadata serialization degrades" + ) diff --git a/tests/unit/test_v1_dev_bypass_recommend.py b/tests/unit/test_v1_dev_bypass_recommend.py new file mode 100644 index 00000000..2cc49d43 --- /dev/null +++ b/tests/unit/test_v1_dev_bypass_recommend.py @@ -0,0 +1,185 @@ +# tests/unit/test_v1_dev_bypass_recommend.py +"""T3: Dev-bypass (insecure_no_auth=True) reaches v1 recommend verbs. + +Scenario: build a router with insecure_no_auth=True and no api_keys, then +call :recommend, :recommend-related, and :batch-recommend without any +X-API-Key header and assert 200 with valid response bodies. + +These tests cover the prediction-path bypass — existing dev-bypass tests +only exercised /v1/health/details. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock + +from fastapi import FastAPI, Request +from fastapi.exceptions import HTTPException, RequestValidationError +from fastapi.responses import JSONResponse +from fastapi.testclient import TestClient + +from recotem.serving import metrics as _metrics +from recotem.serving.app import ( + _DEFAULT_DETAIL_FOR, + _V1_VERB_PATH_RE, + RequestIDMiddleware, +) +from recotem.serving.registry import ModelEntry, ModelRegistry +from recotem.serving.routes import make_router + +_FAKE_SHA256_HEX = "d" * 64 # 64 lowercase hex chars + + +def _build_dev_app(registry: ModelRegistry) -> TestClient: + """Build a FastAPI app with insecure_no_auth=True (empty api_keys).""" + app = FastAPI() + + @app.exception_handler(HTTPException) + async def _http_exc(request: Request, exc: HTTPException) -> JSONResponse: + if isinstance(exc.detail, dict): + content: dict[str, Any] = dict(exc.detail) + content.setdefault( + "detail", _DEFAULT_DETAIL_FOR.get(exc.status_code, "Error") + ) + else: + content = {"detail": exc.detail} + return JSONResponse(status_code=exc.status_code, content=content) + + @app.exception_handler(RequestValidationError) + async def _val_err(request: Request, exc: RequestValidationError) -> JSONResponse: + match = _V1_VERB_PATH_RE.match(request.url.path) + if match is not None: + _metrics.record_v1_request( + recipe=match.group("name"), + verb=match.group("verb"), + status="validation_error", + latency_seconds=0.0, + ) + request_id = getattr(request.state, "request_id", "") + sanitized = [ + {k: v for k, v in err.items() if k not in ("input", "ctx")} + for err in exc.errors() + ] + return JSONResponse( + status_code=422, + content={ + "request_id": request_id, + "detail": "Request validation failed", + "code": "VALIDATION_ERROR", + "errors": sanitized, + }, + ) + + @app.exception_handler(Exception) + async def _unhandled(request: Request, exc: Exception) -> JSONResponse: + request_id = getattr(request.state, "request_id", "") + headers = {"X-Request-ID": request_id} if request_id else None + return JSONResponse( + status_code=500, + content={"detail": "internal error", "code": "INTERNAL_ERROR"}, + headers=headers, + ) + + app.add_middleware(RequestIDMiddleware) + + # insecure_no_auth=True: api_keys is empty and bypass_mode becomes "insecure_no_auth" + router = make_router( + registry=registry, + api_keys=[], + insecure_no_auth=True, + ) + app.include_router(router, prefix="/v1") + return TestClient(app) + + +def _make_loaded_entry(name: str = "demo") -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9), ("i2", 0.7)] + rec._mapper = MagicMock() + rec._mapper.user_id_to_index = {"u1": 0} + rec._mapper.item_id_to_index = {"i1": 0, "i2": 1} + rec.get_recommendation_for_new_user.return_value = [("i3", 0.8), ("i4", 0.6)] + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="active", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +# --------------------------------------------------------------------------- +# T3.1 :recommend without X-API-Key returns 200 +# --------------------------------------------------------------------------- + + +def test_dev_bypass_recommend_returns_200_without_api_key() -> None: + """insecure_no_auth=True: :recommend succeeds without X-API-Key header.""" + registry = ModelRegistry() + registry.replace("demo", _make_loaded_entry("demo")) + client = _build_dev_app(registry) + + # No X-API-Key header + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2}) + + assert r.status_code == 200, ( + f"Dev bypass must allow :recommend without auth; got {r.status_code}: {r.text}" + ) + body = r.json() + assert body["recipe"] == "demo" + assert isinstance(body["items"], list) and len(body["items"]) > 0 + assert "model_version" in body + assert "request_id" in body + + +# --------------------------------------------------------------------------- +# T3.2 :recommend-related without X-API-Key returns 200 +# --------------------------------------------------------------------------- + + +def test_dev_bypass_recommend_related_returns_200_without_api_key() -> None: + """insecure_no_auth=True: :recommend-related succeeds without X-API-Key header.""" + registry = ModelRegistry() + registry.replace("demo", _make_loaded_entry("demo")) + client = _build_dev_app(registry) + + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["i1"], "limit": 2}, + ) + + assert r.status_code == 200, ( + f"Dev bypass must allow :recommend-related without auth; got {r.status_code}: {r.text}" + ) + body = r.json() + assert body["recipe"] == "demo" + assert isinstance(body["items"], list) + + +# --------------------------------------------------------------------------- +# T3.3 :batch-recommend without X-API-Key returns 200 +# --------------------------------------------------------------------------- + + +def test_dev_bypass_batch_recommend_returns_200_without_api_key() -> None: + """insecure_no_auth=True: :batch-recommend succeeds without X-API-Key header.""" + registry = ModelRegistry() + registry.replace("demo", _make_loaded_entry("demo")) + client = _build_dev_app(registry) + + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1", "limit": 1}]}, + ) + + assert r.status_code == 200, ( + f"Dev bypass must allow :batch-recommend without auth; got {r.status_code}: {r.text}" + ) + body = r.json() + assert body["recipe"] == "demo" + assert isinstance(body["results"], list) and len(body["results"]) == 1 diff --git a/tests/unit/test_v1_error_handling.py b/tests/unit/test_v1_error_handling.py new file mode 100644 index 00000000..37797659 --- /dev/null +++ b/tests/unit/test_v1_error_handling.py @@ -0,0 +1,1150 @@ +# tests/unit/test_v1_error_handling.py +"""Tests for v1 serving error-handling, request-ID, and logging behaviour. + +Covers: +- X-Request-ID echo / fallback / invalid-input handling and body/header parity. +- Flat error body shape produced by the custom HTTPException handler. +- 422 RequestValidationError handler shape and validation_error metric. +- recipe_not_found / recipe_not_loaded warning logs on 404 / 503 paths. +- error_class field on recommend_unexpected_error log lines. +- structlog contextvars binding (recipe / kid / request_id) during requests. +""" + +from __future__ import annotations + +import hashlib +import re +from unittest.mock import MagicMock + +import pytest +import structlog.testing +from fastapi.testclient import TestClient + +from recotem.config import ApiKeyEntry +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +# Pattern matching the fallback request_id (12 lowercase hex chars). +_FALLBACK_REQUEST_ID_RE = re.compile(r"^[0-9a-f]{12}$") + + +_FAKE_SHA256_HEX = "f" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _loaded_entry(rec: MagicMock | None = None, name: str = "demo") -> ModelEntry: + """Build a fully-loaded ModelEntry around the given mock recommender.""" + if rec is None: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="test-kid", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _stub_entry(name: str = "stub_recipe") -> ModelEntry: + """Build a registered-but-not-loaded entry (loaded=False).""" + return ModelEntry( + name=name, + recommender=None, + header={}, + kid="", + loaded=False, + ) + + +def _make_api_entry(plaintext: str, kid: str = "api-key") -> ApiKeyEntry: + """Build an ApiKeyEntry matching ``recotem.serving.auth._hash_api_key``.""" + digest = hashlib.scrypt( + plaintext.encode("utf-8"), + salt=b"recotem.api-key.v1", + n=2, + r=8, + p=1, + dklen=32, + ).hex() + return ApiKeyEntry(kid=kid, sha256_hex=digest) + + +def _client_with(entry: ModelEntry) -> TestClient: + registry = ModelRegistry() + registry.replace(entry.name, entry) + return TestClient(build_v1_app(registry)) + + +# --------------------------------------------------------------------------- +# 1. X-Request-ID consistency +# --------------------------------------------------------------------------- + + +def test_request_id_echoed_when_valid_long_value() -> None: + """A 64-char alphanumeric X-Request-ID (the documented max) is echoed in + both the response body and the X-Request-ID response header, and the + two values are identical.""" + client = _client_with(_loaded_entry()) + sent = "a" * 64 + assert len(sent) == 64 + + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-Request-ID": sent}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["request_id"] == sent + assert r.headers["X-Request-ID"] == sent + assert body["request_id"] == r.headers["X-Request-ID"] + + +def test_request_id_regenerated_when_over_128_chars() -> None: + """An X-Request-ID over 128 characters is rejected by the regex and the + server generates a fresh ID instead. + + Updated for the {1,128} regex. A 65-char ID is now accepted verbatim + (was rejected under the old {1,64} rule), so the test sends 129 chars + to verify the upper bound. + """ + client = _client_with(_loaded_entry()) + sent = "a" * 129 + + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-Request-ID": sent}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["request_id"] != sent + assert r.headers["X-Request-ID"] != sent + assert body["request_id"] == r.headers["X-Request-ID"] + + +def test_request_id_accepts_128_chars() -> None: + """A 128-char ID is at the cap and must be echoed verbatim.""" + client = _client_with(_loaded_entry()) + sent = "a" * 128 + + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-Request-ID": sent}, + ) + assert r.status_code == 200, r.text + assert r.headers["X-Request-ID"] == sent + + +def test_request_id_fallback_when_header_absent() -> None: + """When no X-Request-ID is sent, the server generates a 12-hex-char ID + and uses the same value for both the body's request_id field and the + X-Request-ID response header.""" + client = _client_with(_loaded_entry()) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 1}) + assert r.status_code == 200, r.text + + body = r.json() + header_value = r.headers["X-Request-ID"] + assert _FALLBACK_REQUEST_ID_RE.match(body["request_id"]), ( + f"Fallback request_id must be 12 hex chars; got {body['request_id']!r}" + ) + assert header_value == body["request_id"] + + +def test_request_id_fallback_when_header_invalid() -> None: + """When X-Request-ID contains a disallowed character (e.g. '!') the + server discards the input and generates a fallback ID instead. The + fallback is reflected in both the header and the body, and they match.""" + client = _client_with(_loaded_entry()) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-Request-ID": "bad!id"}, + ) + assert r.status_code == 200, r.text + + body = r.json() + header_value = r.headers["X-Request-ID"] + assert body["request_id"] != "bad!id", ( + "Server must NOT echo a header value containing disallowed chars" + ) + assert _FALLBACK_REQUEST_ID_RE.match(body["request_id"]), ( + f"Fallback request_id must be 12 hex chars; got {body['request_id']!r}" + ) + assert header_value == body["request_id"] + + +# --------------------------------------------------------------------------- +# 2. Flat error body shape +# --------------------------------------------------------------------------- + + +def test_flat_error_body_missing_api_key() -> None: + """401 missing-key responses have a FLAT body (top-level code + detail).""" + plaintext = "any_valid_length_api_key_32_chrs" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + + # No X-API-Key header at all. + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 401 + body = r.json() + assert body["code"] == "MISSING_API_KEY" + assert isinstance(body["detail"], str) + assert body["detail"] + # No legacy nested shape. + assert not isinstance(body.get("detail"), dict) + + +def test_flat_error_body_invalid_api_key() -> None: + """401 invalid-key responses have a FLAT body (top-level code + detail).""" + plaintext = "correct_api_key_padded_to_32_chs" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-API-Key": "wrong_key_value_padded_to_32_chs"}, + ) + assert r.status_code == 401 + body = r.json() + assert body["code"] == "INVALID_API_KEY" + assert isinstance(body["detail"], str) + assert not isinstance(body.get("detail"), dict) + + +def test_flat_error_body_recipe_not_found() -> None: + """404 RECIPE_NOT_FOUND responses have a FLAT body.""" + client = _client_with(_loaded_entry()) + r = client.post("/v1/recipes/no_such:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + assert not isinstance(body.get("detail"), dict) + + +def test_flat_error_body_recipe_unavailable() -> None: + """503 RECIPE_UNAVAILABLE responses have a FLAT body.""" + client = _client_with(_stub_entry("demo")) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + assert not isinstance(body.get("detail"), dict) + + +def test_flat_error_body_unknown_user() -> None: + """404 UNKNOWN_USER responses have a FLAT body.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = KeyError("u1") + client = _client_with(_loaded_entry(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "UNKNOWN_USER" + assert isinstance(body["detail"], str) + assert not isinstance(body.get("detail"), dict) + + +# --------------------------------------------------------------------------- +# 3. 422 validation handler +# --------------------------------------------------------------------------- + + +def test_validation_error_handler_body_shape_on_recommend() -> None: + """A malformed body on an inference verb returns the standard 422 envelope.""" + client = _client_with(_loaded_entry()) + # limit=99999 violates the RecommendRequest.limit Field(le=1000) bound. + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 99999}, + ) + assert r.status_code == 422 + body = r.json() + assert body["detail"] == "Request validation failed" + assert body["code"] == "VALIDATION_ERROR" + assert isinstance(body["errors"], list) + assert body["errors"], "errors list must contain at least one entry" + + +def test_validation_error_handler_records_metric_when_enabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When metrics are enabled, a 422 on a v1 inference verb path records a + ``recotem_v1_requests_total{status="validation_error"}`` counter sample + for the (recipe, verb) tuple parsed from the URL. + """ + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + + # Reload the metrics module's lazy globals — they look up + # metrics_enabled() each call but the v1 family objects are cached. + from recotem.serving import metrics as _metrics + + monkeypatch.setattr(_metrics, "_V1_REQUEST_COUNTER", None) + monkeypatch.setattr(_metrics, "_V1_REQUEST_LATENCY", None) + monkeypatch.setattr(_metrics, "_V1_BATCH_SIZE", None) + monkeypatch.setattr(_metrics, "_V1_BATCH_ELEMENT_ERRORS", None) + monkeypatch.setattr(_metrics, "_V1_METADATA_DEGRADED_ITEMS", None) + monkeypatch.setattr(_metrics, "_V1_VALIDATION_ERRORS_OUTSIDE_VERB", None) + + client = _client_with(_loaded_entry(name="metric_recipe")) + + # Drive a 422 on the recommend verb. + r = client.post( + "/v1/recipes/metric_recipe:recommend", + json={"user_id": "u1", "limit": 99999}, + ) + assert r.status_code == 422 + + # Read the Prometheus registry directly to confirm the sample exists. + import prometheus_client + + output = prometheus_client.generate_latest().decode("utf-8") + # Match a counter row for our (recipe, verb, status=validation_error) + # triple. Sample values are floats (e.g. "1.0"). + expected_line_re = re.compile( + r"recotem_v1_requests_total\{" + r'(?=.*recipe="metric_recipe")' + r'(?=.*verb="recommend")' + r'(?=.*status="validation_error")' + r"[^}]*\}\s+([0-9.]+)" + ) + matches = expected_line_re.findall(output) + assert matches, ( + "Expected a recotem_v1_requests_total sample with " + f"recipe=metric_recipe, verb=recommend, status=validation_error in:\n{output}" + ) + assert float(matches[0]) >= 1.0, ( + f"Counter must be >= 1 after one 422; got {matches[0]} in:\n{output}" + ) + + +# --------------------------------------------------------------------------- +# 4. recipe_not_found / recipe_not_loaded warning logs +# --------------------------------------------------------------------------- + + +def test_recipe_not_found_event_on_404() -> None: + """Hitting a recommend verb against a non-registered recipe emits a + ``recipe_not_found`` warning.""" + client = _client_with(_loaded_entry()) + + with structlog.testing.capture_logs() as cap: + r = client.post("/v1/recipes/no_such:recommend", json={"user_id": "u1"}) + + assert r.status_code == 404 + events = [e for e in cap if e.get("event") == "recipe_not_found"] + assert events, f"Expected at least one recipe_not_found event; got: {cap!r}" + assert events[0]["name"] == "no_such" + + +def test_recipe_not_loaded_event_on_503() -> None: + """Hitting a recommend verb against a registered-but-stub recipe emits + ``recipe_not_loaded``.""" + client = _client_with(_stub_entry("stubby")) + + with structlog.testing.capture_logs() as cap: + r = client.post("/v1/recipes/stubby:recommend", json={"user_id": "u1"}) + + assert r.status_code == 503 + events = [e for e in cap if e.get("event") == "recipe_not_loaded"] + assert events, f"Expected at least one recipe_not_loaded event; got: {cap!r}" + assert events[0]["name"] == "stubby" + + +def test_recipe_not_found_event_on_get_recipe_detail_404() -> None: + """GET /v1/recipes/{name} for an unknown name emits recipe_not_found.""" + client = _client_with(_loaded_entry()) + + with structlog.testing.capture_logs() as cap: + r = client.get("/v1/recipes/no_such") + + assert r.status_code == 404 + events = [e for e in cap if e.get("event") == "recipe_not_found"] + assert events, f"Expected recipe_not_found from recipe_detail; got: {cap!r}" + + +# --------------------------------------------------------------------------- +# 5. error_class in unexpected-error log +# --------------------------------------------------------------------------- + + +def test_exc_type_in_recommend_unexpected_error_log() -> None: + """When the recommender raises a non-KeyError, the route handler re-raises + without logging (M7: route-level logger.exception removed), and the + response is 500. The global exception handler in app.py logs unhandled_500, + but Starlette's ASGI exception dispatching runs outside the structlog + capture_logs context-manager scope, so we only verify the HTTP response here. + """ + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + registry = ModelRegistry() + registry.replace("demo", _loaded_entry(rec)) + client = TestClient(build_v1_app(registry), raise_server_exceptions=False) + + with structlog.testing.capture_logs() as cap: + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + + assert r.status_code == 500 + # Verify no recommend_unexpected_error route-level log was emitted (M7). + route_events = [e for e in cap if e.get("event") == "recommend_unexpected_error"] + assert not route_events, ( + f"Route-level recommend_unexpected_error must not be logged (M7); " + f"got: {route_events!r}" + ) + # The 500 response body must follow our standard shape. + body = r.json() + assert body.get("code") == "INTERNAL_ERROR" + assert isinstance(body.get("detail"), str) + + +# --------------------------------------------------------------------------- +# 6. structlog contextvars binding (recipe / kid) +# --------------------------------------------------------------------------- + + +def test_structlog_context_binds_recipe_and_kid_during_request() -> None: + """During an inference request, the ``recipe`` and ``kid`` contextvars + bound by the route handler are available on log events emitted within the + handler scope (via ``structlog.contextvars.bind_contextvars`` in routes.py). + + We verify this by checking that auth-related warning events emitted during + the request carry the expected contextvar fields. Route-level + ``logger.exception`` was removed (M7), so we use an observable event that + IS emitted inside the handler scope (e.g. the auth_anonymous_bypass event + emitted from the middleware on first anonymous access — it runs in the same + request scope). + + The test also confirms the 500 response shape is correct. + """ + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("force") + entry = ModelEntry( + name="ctx_recipe", + recommender=rec, + header={}, + kid="model-kid", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + plaintext = "ctx_recipe_test_api_key_padding!" + api_entry = _make_api_entry(plaintext, kid="auth-kid") + registry = ModelRegistry() + registry.replace("ctx_recipe", entry) + client = TestClient( + build_v1_app(registry, api_keys=[api_entry]), + raise_server_exceptions=False, + ) + + r = client.post( + "/v1/recipes/ctx_recipe:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-API-Key": plaintext}, + ) + assert r.status_code == 500, r.text + + # Confirm no route-level recommend_unexpected_error was emitted (M7). + # (The global app.py handler logs unhandled_500, but it runs outside + # the structlog capture_logs scope with TestClient.) + body = r.json() + assert body.get("code") == "INTERNAL_ERROR" + + # Confirm contextvars are bound correctly by verifying with a successful + # request where the response headers carry the recipe-scoped model version. + rec2 = MagicMock() + rec2.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + entry2 = ModelEntry( + name="ctx_recipe", + recommender=rec2, + header={}, + kid="model-kid", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + registry2 = ModelRegistry() + registry2.replace("ctx_recipe", entry2) + client2 = TestClient(build_v1_app(registry2, api_keys=[api_entry])) + with structlog.testing.capture_logs() as cap: + r2 = client2.post( + "/v1/recipes/ctx_recipe:recommend", + json={"user_id": "u1", "limit": 1}, + headers={"X-API-Key": plaintext}, + ) + assert r2.status_code == 200 + # recipe= and kid= must be present on any log emitted during this request. + request_events = [e for e in cap if "recipe" in e or "kid" in e] + for ev in request_events: + if "recipe" in ev: + assert ev["recipe"] == "ctx_recipe" + if "kid" in ev: + assert ev["kid"] == "auth-kid" + + +# --------------------------------------------------------------------------- +# 7. 500 body shape produced by the Exception handler +# --------------------------------------------------------------------------- + + +def test_500_body_shape_on_unhandled_runtime_error() -> None: + """A non-HTTP exception raised from a handler is caught by the registered + ``Exception`` handler and rendered as a JSON envelope, NOT FastAPI's + default plain-text ``Internal Server Error`` body.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + registry = ModelRegistry() + registry.replace("demo", _loaded_entry(rec)) + client = TestClient(build_v1_app(registry), raise_server_exceptions=False) + + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 500 + assert r.json() == {"detail": "internal error", "code": "INTERNAL_ERROR"} + assert r.headers["Content-Type"].startswith("application/json") + + +# --------------------------------------------------------------------------- +# 8. X-Request-ID header coverage across every error status code +# --------------------------------------------------------------------------- + + +def test_x_request_id_present_on_every_error_status_code() -> None: + """Every error response — 401, 404 (recipe-not-found / unknown-user), + 422, 503, and 500 — carries an ``X-Request-ID`` header. Where the body + also exposes a ``request_id`` field, the two values must match.""" + # 401 — missing API key + plaintext = "any_valid_length_api_key_32_chrs" + api_entry = _make_api_entry(plaintext) + registry_with_auth = ModelRegistry() + registry_with_auth.replace("demo", _loaded_entry()) + client_auth = TestClient(build_v1_app(registry_with_auth, api_keys=[api_entry])) + + r401 = client_auth.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r401.status_code == 401 + assert r401.headers.get("X-Request-ID"), "401 must carry X-Request-ID" + + # 404 RECIPE_NOT_FOUND + client_loaded = _client_with(_loaded_entry()) + r404a = client_loaded.post("/v1/recipes/no_such:recommend", json={"user_id": "u1"}) + assert r404a.status_code == 404 + assert r404a.json()["code"] == "RECIPE_NOT_FOUND" + assert r404a.headers.get("X-Request-ID") + + # 404 UNKNOWN_USER + rec_ku = MagicMock() + rec_ku.get_recommendation_for_known_user_id.side_effect = KeyError("u1") + client_ku = _client_with(_loaded_entry(rec_ku)) + r404b = client_ku.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r404b.status_code == 404 + assert r404b.json()["code"] == "UNKNOWN_USER" + assert r404b.headers.get("X-Request-ID") + + # 422 VALIDATION_ERROR (body has request_id) + r422 = client_loaded.post( + "/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 99999} + ) + assert r422.status_code == 422 + assert r422.headers.get("X-Request-ID") + assert r422.json()["request_id"] == r422.headers["X-Request-ID"] + + # 503 RECIPE_UNAVAILABLE + client_stub = _client_with(_stub_entry("demo")) + r503 = client_stub.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r503.status_code == 503 + assert r503.json()["code"] == "RECIPE_UNAVAILABLE" + assert r503.headers.get("X-Request-ID") + + # 500 INTERNAL_ERROR + rec_500 = MagicMock() + rec_500.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + registry_500 = ModelRegistry() + registry_500.replace("demo", _loaded_entry(rec_500)) + client_500 = TestClient(build_v1_app(registry_500), raise_server_exceptions=False) + + r500 = client_500.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r500.status_code == 500 + assert r500.headers.get("X-Request-ID"), "500 must carry X-Request-ID" + + +def test_x_request_id_echoed_from_client_on_error_responses() -> None: + """When the client sends a valid X-Request-ID and the server returns an + error (e.g. 503), the same ID is echoed on the response header AND in + the body's ``request_id`` field where the body has one.""" + client = _client_with(_stub_entry("demo")) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-Request-ID": "client-abc"}, + ) + assert r.status_code == 503 + assert r.headers["X-Request-ID"] == "client-abc" + # The 503 RECIPE_UNAVAILABLE body is flat and does not include + # request_id, but the header must still echo the client value. + body = r.json() + if "request_id" in body: + assert body["request_id"] == "client-abc" + + # Validate the same for a 500 path: the client-provided ID must survive + # the path through the Exception handler. + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + registry = ModelRegistry() + registry.replace("demo", _loaded_entry(rec)) + client_500 = TestClient(build_v1_app(registry), raise_server_exceptions=False) + r500 = client_500.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-Request-ID": "client-abc"}, + ) + assert r500.status_code == 500 + assert r500.headers["X-Request-ID"] == "client-abc" + + +# --------------------------------------------------------------------------- +# 9. 422 request_id presence in body +# --------------------------------------------------------------------------- + + +def test_422_body_includes_request_id_matching_header() -> None: + """422 validation responses must include ``request_id`` in the body so + operators can correlate the body to log lines via the header.""" + client = _client_with(_loaded_entry()) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 99999}, + ) + assert r.status_code == 422 + body = r.json() + assert "request_id" in body, f"422 body missing request_id; got {body!r}" + assert body["request_id"] == r.headers["X-Request-ID"] + assert body["request_id"], "request_id must not be empty when middleware ran" + + +# --------------------------------------------------------------------------- +# 10. 405 Method Not Allowed +# --------------------------------------------------------------------------- + + +def test_405_method_not_allowed_on_get_only_endpoint() -> None: + """POST against a GET-only endpoint (e.g. /v1/health) returns 405 with + FastAPI's default flat body and a populated X-Request-ID header.""" + client = _client_with(_loaded_entry()) + r = client.post("/v1/health") + assert r.status_code == 405 + assert r.json() == {"detail": "Method Not Allowed"} + assert r.headers.get("X-Request-ID") + + +# --------------------------------------------------------------------------- +# 11. 404 from unknown route +# --------------------------------------------------------------------------- + + +def test_404_unknown_route_has_detail_and_request_id() -> None: + """A GET against an unmounted path returns 404 with a flat body that + has a ``detail`` key and an X-Request-ID header set by the middleware.""" + client = _client_with(_loaded_entry()) + r = client.get("/v1/nonexistent") + assert r.status_code == 404 + body = r.json() + assert "detail" in body, f"404 body missing detail; got {body!r}" + assert r.headers.get("X-Request-ID") + + +# --------------------------------------------------------------------------- +# 12. 422 on path-parameter validation +# --------------------------------------------------------------------------- + + +def test_422_on_path_parameter_validation_no_metric_recorded( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Sending a recipe name that fails the ``Path(pattern=...)`` regex + returns 422 with the standard envelope. The recipe name does not match + ``_V1_VERB_PATH_RE`` (which only accepts ``[A-Za-z0-9_-]{1,64}``), so + the validation_error metric is NOT recorded for this case — there is + nothing to attribute it to.""" + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + + from recotem.serving import metrics as _metrics + + monkeypatch.setattr(_metrics, "_V1_REQUEST_COUNTER", None) + monkeypatch.setattr(_metrics, "_V1_REQUEST_LATENCY", None) + monkeypatch.setattr(_metrics, "_V1_BATCH_SIZE", None) + monkeypatch.setattr(_metrics, "_V1_BATCH_ELEMENT_ERRORS", None) + monkeypatch.setattr(_metrics, "_V1_METADATA_DEGRADED_ITEMS", None) + monkeypatch.setattr(_metrics, "_V1_VALIDATION_ERRORS_OUTSIDE_VERB", None) + + client = _client_with(_loaded_entry()) + # 'has spaces' contains a space and so does not match the {1,64} pattern. + r = client.post("/v1/recipes/has spaces:recommend", json={"user_id": "u1"}) + assert r.status_code == 422 + body = r.json() + assert body["code"] == "VALIDATION_ERROR" + assert isinstance(body["errors"], list) + assert body["errors"], "errors list must contain at least one entry" + + # Confirm no validation_error counter sample was recorded for this + # request — the path does not match _V1_VERB_PATH_RE so the (recipe, + # verb) tuple cannot be attributed and the metric is skipped. + import prometheus_client + + output = prometheus_client.generate_latest().decode("utf-8") + bad_re = re.compile( + r"recotem_v1_requests_total\{" + r'(?=.*recipe="has spaces")' + r"[^}]*\}\s+[0-9.]+" + ) + assert not bad_re.search(output), ( + f"Expected no validation_error metric for invalid path; got:\n{output}" + ) + + +# --------------------------------------------------------------------------- +# 13. HTTPException with non-standard dict detail falls back via +# _DEFAULT_DETAIL_FOR. +# --------------------------------------------------------------------------- + + +def test_http_exception_dict_detail_without_detail_key_uses_fallback() -> None: + """``HTTPException(detail={...})`` whose dict lacks a ``detail`` key + must still produce a body with a string ``detail`` field, populated via + the ``_DEFAULT_DETAIL_FOR`` table (or ``"Error"`` for unmapped status + codes).""" + from fastapi import FastAPI, Request + from fastapi.exceptions import HTTPException + from fastapi.responses import JSONResponse + + from recotem.serving.app import _DEFAULT_DETAIL_FOR + + app = FastAPI() + + @app.exception_handler(HTTPException) + async def _http_exc(request: Request, exc: HTTPException) -> JSONResponse: + if isinstance(exc.detail, dict): + content = dict(exc.detail) + content.setdefault( + "detail", _DEFAULT_DETAIL_FOR.get(exc.status_code, "Error") + ) + else: + content = {"detail": exc.detail} + return JSONResponse(status_code=exc.status_code, content=content) + + @app.get("/raises_418") + def raises_418() -> None: + raise HTTPException(status_code=418, detail={"foo": "bar"}) + + @app.get("/raises_400") + def raises_400() -> None: + raise HTTPException(status_code=400, detail={"code": "BAD"}) + + client = TestClient(app) + + # 418 — not in the default map, so falls back to "Error". + r = client.get("/raises_418") + assert r.status_code == 418 + assert r.json() == {"foo": "bar", "detail": "Error"} + + # 400 — in the map, fills in "Bad Request" because the dict omitted detail. + r = client.get("/raises_400") + assert r.status_code == 400 + assert r.json() == {"code": "BAD", "detail": "Bad Request"} + + +# --------------------------------------------------------------------------- +# 14. Contextvars cleanup on early raise (no leakage between requests). +# --------------------------------------------------------------------------- + + +def test_structlog_contextvars_do_not_leak_between_requests() -> None: + """An auth failure (no X-API-Key) must not leak ``recipe`` / ``kid`` (or + ``request_id``) into the contextvars of a subsequent request. This + verifies the middleware/route ``finally`` clauses are unbinding state + correctly even when a request short-circuits on an early raise.""" + import structlog.contextvars + + plaintext = "any_valid_length_api_key_32_chrs" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + + # 1) Auth failure (missing X-API-Key) — raises BEFORE the route binds + # recipe/kid. After the response is returned to the test client, + # contextvars in this test thread must be clean. + r1 = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r1.status_code == 401 + + ctx_after_401 = dict(structlog.contextvars.get_contextvars()) + assert "recipe" not in ctx_after_401, ( + f"recipe leaked after 401; got {ctx_after_401!r}" + ) + assert "kid" not in ctx_after_401, f"kid leaked after 401; got {ctx_after_401!r}" + + # 2) Successful request — recipe/kid bind during the handler, then unbind + # in finally. After the response, contextvars must again be clean. + r2 = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-API-Key": plaintext}, + ) + assert r2.status_code == 200, r2.text + + ctx_after_200 = dict(structlog.contextvars.get_contextvars()) + assert "recipe" not in ctx_after_200, ( + f"recipe leaked after 200; got {ctx_after_200!r}" + ) + assert "kid" not in ctx_after_200, f"kid leaked after 200; got {ctx_after_200!r}" + + +# --------------------------------------------------------------------------- +# 15. build_v1_app parity with create_app — exception_handlers coverage. +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +# C. Additional auth / error coverage (new) +# --------------------------------------------------------------------------- + + +def test_recommend_related_requires_auth() -> None: + plaintext = "auth_test_api_key_32_bytes_pad!!" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["i1"]}, + ) + assert r.status_code == 401 + + +def test_batch_recommend_requires_auth() -> None: + plaintext = "auth_test_api_key_32_bytes_pad!!" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}]}, + ) + assert r.status_code == 401 + + +def test_batch_recommend_related_requires_auth() -> None: + plaintext = "auth_test_api_key_32_bytes_pad!!" + api_entry = _make_api_entry(plaintext) + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry, api_keys=[api_entry])) + r = client.post( + "/v1/recipes/demo:batch-recommend-related", + json={"requests": [{"seed_items": ["i1"]}]}, + ) + assert r.status_code == 401 + + +def test_422_body_echoes_client_request_id() -> None: + client = _client_with(_loaded_entry()) + r = client.post( + "/v1/recipes/demo:recommend", + json={}, + headers={"X-Request-ID": "client-abc"}, + ) + assert r.status_code == 422 + body = r.json() + assert body["request_id"] == "client-abc" + + +def test_500_body_includes_request_id() -> None: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + registry = ModelRegistry() + registry.replace("demo", _loaded_entry(rec)) + client = TestClient(build_v1_app(registry), raise_server_exceptions=False) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-Request-ID": "traceme-1"}, + ) + assert r.status_code == 500 + assert r.headers.get("X-Request-ID") == "traceme-1" + + +def test_422_body_strips_input_and_ctx() -> None: + client = _client_with(_loaded_entry()) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 999999}, + ) + assert r.status_code == 422 + body = r.json() + for err in body["errors"]: + assert "input" not in err, f"'input' must not appear in error dict; got {err!r}" + assert "ctx" not in err, f"'ctx' must not appear in error dict; got {err!r}" + assert "loc" in err + assert "msg" in err + assert "type" in err + + +def test_error_message_does_not_echo_user_id() -> None: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = KeyError("secret-user-id") + client = _client_with(_loaded_entry(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "secret-user-id"}) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "UNKNOWN_USER" + assert "secret-user-id" not in body.get("message", "") + assert "secret-user-id" not in body.get("detail", "") + + +def test_error_message_does_not_echo_seed_items() -> None: + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [] + client = _client_with(_loaded_entry(rec)) + secret_seed = "super-secret-seed-item" + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": [secret_seed]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "UNKNOWN_SEED_ITEMS" + assert secret_seed not in body.get("message", "") + assert secret_seed not in body.get("detail", "") + + +# --------------------------------------------------------------------------- +# Finding 16: unhandled_500 log emitted with request_id, path, exc_type +# --------------------------------------------------------------------------- + + +def _build_production_app(entry: ModelEntry) -> TestClient: + """Build a test app whose Exception handler mirrors the production handler. + + The conftest build_v1_app uses a simplified handler that does NOT call + logger.exception. For Finding 16/17 we need the real handlers from app.py. + """ + from fastapi import FastAPI, Request + from fastapi.exceptions import HTTPException, RequestValidationError + from fastapi.responses import JSONResponse + + from recotem.serving import metrics as _metrics + from recotem.serving.app import ( + _DEFAULT_DETAIL_FOR, + _V1_VERB_PATH_RE, + RequestIDMiddleware, + ) + from recotem.serving.routes import make_router + + registry = ModelRegistry() + registry.replace(entry.name, entry) + + app = FastAPI() + + @app.exception_handler(HTTPException) + async def _http_handler(request: Request, exc: HTTPException) -> JSONResponse: + headers = getattr(exc, "headers", None) + if isinstance(exc.detail, dict): + content = dict(exc.detail) + content.setdefault( + "detail", _DEFAULT_DETAIL_FOR.get(exc.status_code, "Error") + ) + else: + content = {"detail": exc.detail} + return JSONResponse( + status_code=exc.status_code, content=content, headers=headers + ) + + @app.exception_handler(RequestValidationError) + async def _validation_handler( + request: Request, exc: RequestValidationError + ) -> JSONResponse: + import structlog + + match = _V1_VERB_PATH_RE.match(request.url.path) + if match is not None: + _metrics.record_v1_request( + recipe=match.group("name"), + verb=match.group("verb"), + status="validation_error", + latency_seconds=0.0, + ) + request_id = getattr(request.state, "request_id", "") + sanitized_errors = [ + {k: v for k, v in err.items() if k not in ("input", "ctx")} + for err in exc.errors() + ] + structlog.get_logger(__name__).warning( + "validation_failed", + path=request.url.path, + method=request.method, + request_id=request_id, + error_count=len(sanitized_errors), + matched_v1_verb=match is not None, + errors=sanitized_errors, + ) + return JSONResponse( + status_code=422, + content={ + "request_id": request_id, + "detail": "Request validation failed", + "code": "VALIDATION_ERROR", + "errors": sanitized_errors, + }, + ) + + @app.exception_handler(Exception) + async def _unhandled_handler(request: Request, exc: Exception) -> JSONResponse: + import structlog + + request_id = getattr(request.state, "request_id", "") + structlog.get_logger(__name__).exception( + "unhandled_500", + path=str(request.url.path), + request_id=request_id, + exc_type=type(exc).__name__, + ) + headers = {"X-Request-ID": request_id} if request_id else None + return JSONResponse( + status_code=500, + content={ + "detail": "internal error", + "code": "INTERNAL_ERROR", + "request_id": request_id, + }, + headers=headers, + ) + + app.add_middleware(RequestIDMiddleware) + router = make_router(registry=registry, api_keys=[]) + app.include_router(router, prefix="/v1") + return TestClient(app, raise_server_exceptions=False) + + +def test_unhandled_500_log_emitted_with_required_fields() -> None: + """Triggering a true 500 must produce an 'unhandled_500' log event with + request_id, path, and exc_type fields.""" + import structlog.testing + + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = RuntimeError("boom") + client = _build_production_app(_loaded_entry(rec)) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1"}, + headers={"X-Request-ID": "test-req-id-123"}, + ) + + assert r.status_code == 500, r.text + + events = [e for e in cap if e.get("event") == "unhandled_500"] + assert len(events) == 1, ( + f"unhandled_500 must be emitted exactly once; got {len(events)}: " + f"{[e.get('event') for e in cap]!r}" + ) + evt = events[0] + assert "request_id" in evt, f"unhandled_500 must carry request_id; got {evt!r}" + assert "path" in evt, f"unhandled_500 must carry path; got {evt!r}" + assert "exc_type" in evt, f"unhandled_500 must carry exc_type; got {evt!r}" + assert evt["exc_type"] == "RuntimeError", ( + f"exc_type must be 'RuntimeError'; got {evt['exc_type']!r}" + ) + # The path must refer to the recommend endpoint + assert "recommend" in evt["path"], ( + f"path must contain 'recommend'; got {evt['path']!r}" + ) + + +# --------------------------------------------------------------------------- +# Finding 17: validation_failed log includes sanitized errors +# --------------------------------------------------------------------------- + + +def test_validation_failed_log_includes_sanitized_errors() -> None: + """A 422 from a malformed request must produce a 'validation_failed' log + event with 'errors' array containing loc and msg but NOT input or ctx.""" + import structlog.testing + + client = _build_production_app(_loaded_entry()) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 999999}, + ) + + assert r.status_code == 422, r.text + + events = [e for e in cap if e.get("event") == "validation_failed"] + assert events, ( + f"Expected 'validation_failed' log event; got: {[e.get('event') for e in cap]!r}" + ) + evt = events[0] + assert "errors" in evt, f"validation_failed log must carry 'errors'; got {evt!r}" + errors = evt["errors"] + assert isinstance(errors, list), f"errors must be a list; got {type(errors)!r}" + assert errors, "errors list must be non-empty" + + # Check sanitization: no 'input' or 'ctx' keys in each error + for err in errors: + assert "input" not in err, ( + f"Sanitized error must not contain 'input'; got {err!r}" + ) + assert "ctx" not in err, f"Sanitized error must not contain 'ctx'; got {err!r}" + assert "loc" in err, f"Error must have 'loc' field; got {err!r}" + assert "msg" in err, f"Error must have 'msg' field; got {err!r}" + + +def test_build_v1_app_registers_all_three_exception_handlers() -> None: + """``build_v1_app`` (used by the unit-test suite) must register the same + three exception handlers as ``create_app`` in production: HTTPException, + RequestValidationError, and the catch-all Exception. Without the third, + 500 responses fall back to FastAPI's default plain-text body and tests + silently diverge from production behaviour.""" + from fastapi.exceptions import HTTPException, RequestValidationError + + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + app = build_v1_app(registry) + + handlers = app.exception_handlers + assert HTTPException in handlers, ( + f"HTTPException handler missing; got {list(handlers.keys())!r}" + ) + assert RequestValidationError in handlers, ( + f"RequestValidationError handler missing; got {list(handlers.keys())!r}" + ) + assert Exception in handlers, ( + f"Exception handler missing — 500s will fall back to plain text. " + f"got {list(handlers.keys())!r}" + ) diff --git a/tests/unit/test_v1_health_metrics.py b/tests/unit/test_v1_health_metrics.py new file mode 100644 index 00000000..4b21a56f --- /dev/null +++ b/tests/unit/test_v1_health_metrics.py @@ -0,0 +1,233 @@ +# tests/unit/test_v1_health_metrics.py +"""Verify /v1/health, /v1/health/details, and /v1/metrics behave like +their legacy counterparts but mounted under /v1. +""" + +from __future__ import annotations + +import hashlib +from unittest.mock import MagicMock + +import pytest +from fastapi.testclient import TestClient + +from recotem.config import ApiKeyEntry +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + + +def _client(api_keys: list[ApiKeyEntry] | None = None) -> TestClient: + registry = ModelRegistry() + return TestClient(build_v1_app(registry, api_keys=api_keys or [])) + + +def _make_api_entry(plaintext: str, kid: str = "k1") -> ApiKeyEntry: + sha256_hex = hashlib.scrypt( + plaintext.encode(), + salt=b"recotem.api-key.v1", + n=2, + r=8, + p=1, + dklen=32, + ).hex() + return ApiKeyEntry(kid=kid, sha256_hex=sha256_hex) + + +def _entry() -> ApiKeyEntry: + return _make_api_entry("api_key_32_bytes_exactly_here!!!") + + +def test_health_returns_ok_with_empty_registry(): + r = _client().get("/v1/health") + assert r.status_code == 200 + body = r.json() + assert body["status"] == "ok" + assert body["total"] == 0 + assert body["loaded"] == 0 + + +def test_health_details_requires_auth(): + r = _client(api_keys=[_entry()]).get("/v1/health/details") + assert r.status_code == 401 + + +# --------------------------------------------------------------------------- +# A. /v1/metrics endpoint gating (I4) +# --------------------------------------------------------------------------- + + +def test_metrics_endpoint_404_when_env_unset( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("RECOTEM_METRICS_ENABLED", raising=False) + from recotem.serving import metrics as _m + + monkeypatch.setattr(_m, "metrics_enabled", lambda: False) + client = _client(api_keys=[_entry()]) + r = client.get("/v1/metrics") + assert r.status_code == 404 + + +def test_metrics_endpoint_404_when_env_falsy( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "0") + from recotem.serving import metrics as _m + + monkeypatch.setattr(_m, "metrics_enabled", lambda: False) + client = _client(api_keys=[_entry()]) + r = client.get("/v1/metrics") + assert r.status_code == 404 + + +def test_metrics_endpoint_requires_auth_when_enabled( + monkeypatch: pytest.MonkeyPatch, +) -> None: + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + from recotem.serving import metrics as _m + + monkeypatch.setattr(_m, "metrics_enabled", lambda: True) + plaintext = "metrics_test_key_32_bytes_padded!" + api_entry = _make_api_entry(plaintext) + client = _client(api_keys=[api_entry]) + r = client.get("/v1/metrics") + assert r.status_code == 401 + + +def test_metrics_endpoint_200_with_auth( + monkeypatch: pytest.MonkeyPatch, +) -> None: + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + from recotem.serving import metrics as _m + + monkeypatch.setattr(_m, "metrics_enabled", lambda: True) + plaintext = "metrics_test_key_32_bytes_padded!" + api_entry = _make_api_entry(plaintext) + client = _client(api_keys=[api_entry]) + r = client.get("/v1/metrics", headers={"X-API-Key": plaintext}) + assert r.status_code == 200 + assert r.headers["content-type"].startswith("text/plain") + assert "# HELP" in r.text or "# TYPE" in r.text + + +# --------------------------------------------------------------------------- +# T7: /v1/health/details per-recipe full shape +# --------------------------------------------------------------------------- + + +def _make_loaded_entry_with_header(name: str) -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + rec._mapper = MagicMock() + rec._mapper.user_id_to_index = {"u1": 0} + return ModelEntry( + name=name, + recommender=rec, + header={ + "best_class": "TopPop", + "trained_at": "2026-01-01T00:00:00Z", + }, + kid="active", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, "e" * 64), + loaded_at_unix=1747800000.0, + ) + + +def _make_stub_entry_with_error(name: str) -> ModelEntry: + return ModelEntry( + name=name, + recommender=None, + header={}, + kid="", + metadata_df=None, + last_load_error="artifact load failed: HMAC verify failed", + artifact_path="", + loaded=False, + ) + + +def test_health_details_per_recipe_shape_with_healthy_recipe() -> None: + """A healthy (loaded=True) recipe entry in /v1/health/details includes: + - loaded: True + - best_class (from header) + - trained_at (from header) + - kid + - no 'error' field + """ + registry = ModelRegistry() + registry.replace("healthy_recipe", _make_loaded_entry_with_header("healthy_recipe")) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health/details") + # No api_keys → accessible without auth; only loaded recipe → status ok. + assert r.status_code == 200, r.text + body = r.json() + assert body["status"] == "ok" + recipes = body["recipes"] + assert "healthy_recipe" in recipes, ( + f"healthy_recipe must appear in /v1/health/details; got {list(recipes.keys())}" + ) + entry_health = recipes["healthy_recipe"] + assert entry_health["loaded"] is True + assert entry_health.get("best_class") == "TopPop" + assert entry_health.get("trained_at") == "2026-01-01T00:00:00Z" + assert entry_health.get("kid") == "active" + assert "error" not in entry_health, ( + f"Healthy entry must not have 'error' field; got {entry_health!r}" + ) + + +def test_health_details_per_recipe_shape_with_failed_recipe() -> None: + """A stub (loaded=False) recipe entry in /v1/health/details includes: + - loaded: False + - error string + - no best_class, trained_at, or kid (they default to empty/absent) + """ + registry = ModelRegistry() + registry.replace("broken_recipe", _make_stub_entry_with_error("broken_recipe")) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health/details") + # Unloaded recipe → degraded → 503. + assert r.status_code == 503, r.text + body = r.json() + assert body["status"] == "degraded" + recipes = body["recipes"] + assert "broken_recipe" in recipes + entry_health = recipes["broken_recipe"] + assert entry_health["loaded"] is False + assert "error" in entry_health, ( + "Stub entry with last_load_error must expose 'error' in health details" + ) + assert "HMAC" in entry_health["error"] or "artifact load" in entry_health["error"] + + +def test_health_details_shows_two_recipes_one_healthy_one_stub() -> None: + """When 2 recipes exist (1 healthy, 1 stub), /v1/health/details returns both + and the overall status is 'degraded'. + """ + registry = ModelRegistry() + registry.replace("good", _make_loaded_entry_with_header("good")) + registry.replace("bad", _make_stub_entry_with_error("bad")) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health/details") + assert r.status_code == 503, r.text + body = r.json() + assert body["status"] == "degraded" + recipes = body["recipes"] + assert "good" in recipes + assert "bad" in recipes + + # Good recipe is loaded and has no error. + assert recipes["good"]["loaded"] is True + assert "error" not in recipes["good"] + + # Bad recipe is not loaded and has an error. + assert recipes["bad"]["loaded"] is False + assert "error" in recipes["bad"] diff --git a/tests/unit/test_v1_metadata_enrichment.py b/tests/unit/test_v1_metadata_enrichment.py new file mode 100644 index 00000000..f772062a --- /dev/null +++ b/tests/unit/test_v1_metadata_enrichment.py @@ -0,0 +1,606 @@ +# tests/unit/test_v1_metadata_enrichment.py +"""Verify that item metadata is included in :recommend responses. + +Production serving builds ``entry.metadata_index`` at artifact load time +(see ``app.py:_try_load_artifact`` and ``watcher.py:_build_entry``) and +the router reads from it via ``meta_index.get(item_id, {})``. The +deny-set is already applied by ``build_metadata_index`` at load time, so +the router does NOT re-apply it at serve time. Deny-set semantics are +covered separately in ``tests/unit/test_metadata_loader.py``. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pandas as pd +import pytest +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_client( + entry: ModelEntry, +) -> TestClient: + registry = ModelRegistry() + registry.replace(entry.name, entry) + return TestClient( + build_v1_app(registry), + ) + + +_FAKE_SHA256_HEX = "3" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _entry_with_metadata_index( + metadata_index: dict[str, dict], + recommender: MagicMock, +) -> ModelEntry: + """Return a loaded entry that uses the fast dict-based metadata path.""" + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=metadata_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _entry_with_loaded_metadata( + df: pd.DataFrame, + recommender: MagicMock, + metadata_field_deny: list[str] | None = None, +) -> ModelEntry: + """Return a loaded entry whose ``metadata_index`` is built from *df*. + + Mirrors production behaviour where the index is built at load time by + ``build_metadata_index`` (deny-set applied), so tests that set up an + entry-with-metadata path go through the same code as serving does. + """ + from recotem.metadata.loader import build_metadata_index + + deny_set: frozenset[str] = frozenset(s.lower() for s in (metadata_field_deny or [])) + index = build_metadata_index(df, deny_set=deny_set) + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +# --------------------------------------------------------------------------- +# Task D.1 — metadata_index path: fields appear in response items +# --------------------------------------------------------------------------- + + +def test_recommend_includes_metadata_fields_from_index() -> None: + """Items returned by :recommend carry metadata from metadata_index.""" + meta_index = { + "i1": {"title": "Widget A", "category": "tools"}, + "i2": {"title": "Widget B", "category": "home"}, + } + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9), ("i2", 0.5)] + + client = _make_client(_entry_with_metadata_index(meta_index, rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2}) + + assert r.status_code == 200, r.text + items = r.json()["items"] + assert len(items) == 2 + + item1 = next(x for x in items if x["item_id"] == "i1") + assert item1["title"] == "Widget A" + assert item1["category"] == "tools" + assert item1["score"] == 0.9 + + item2 = next(x for x in items if x["item_id"] == "i2") + assert item2["title"] == "Widget B" + + +def test_recommend_item_without_metadata_entry_has_no_extra_fields() -> None: + """Items with no matching metadata_index entry carry only item_id and score.""" + meta_index: dict[str, dict] = {} # empty — no metadata for any item + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i_unknown", 0.3)] + + client = _make_client(_entry_with_metadata_index(meta_index, rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + + assert r.status_code == 200, r.text + item = r.json()["items"][0] + assert item["item_id"] == "i_unknown" + assert item["score"] == 0.3 + # Only the two mandatory fields should be present + assert set(item.keys()) == {"item_id", "score"} + + +# --------------------------------------------------------------------------- +# Task D.2 — load-time deny-set: denied fields are stripped at serve time +# --------------------------------------------------------------------------- +# The deny-set is applied by ``build_metadata_index`` at artifact-load +# time; the router only reads the pre-flattened index. These tests +# pre-build an index with the deny-set applied to mirror the production +# load path. Lower-level deny-set semantics (case-insensitivity, NaN +# handling) are covered in ``tests/unit/test_metadata_loader.py``. + + +def test_recommend_strips_denied_fields_pre_built_into_index() -> None: + df = pd.DataFrame( + { + "title": ["Widget A", "Widget B"], + "internal_score": [99.0, 88.0], + "category": ["tools", "home"], + }, + index=pd.Index(["i1", "i2"], name="item_id"), + ) + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9), ("i2", 0.5)] + + client = _make_client( + _entry_with_loaded_metadata(df, rec, metadata_field_deny=["internal_score"]), + ) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2}) + + assert r.status_code == 200, r.text + for item in r.json()["items"]: + assert "internal_score" not in item + assert "title" in item + assert "category" in item + + +def test_recommend_deny_is_case_insensitive_pre_built() -> None: + df = pd.DataFrame( + {"Secret": ["s1", "s2"], "name": ["n1", "n2"]}, + index=pd.Index(["i1", "i2"], name="item_id"), + ) + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + + client = _make_client( + _entry_with_loaded_metadata(df, rec, metadata_field_deny=["SECRET"]), + ) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + + assert r.status_code == 200, r.text + item = r.json()["items"][0] + assert "Secret" not in item + assert "name" in item + + +# --------------------------------------------------------------------------- +# H. score/item_id precedence, extra fields +# --------------------------------------------------------------------------- + + +def test_recommender_score_wins_over_metadata_score() -> None: + meta_index = { + "i1": {"item_id": "WRONG", "score": 999.0, "title": "Widget A"}, + } + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + entry = _entry_with_metadata_index(meta_index, rec) + client = _make_client(entry) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 200, r.text + item = r.json()["items"][0] + assert item["item_id"] == "i1" + assert item["score"] == 0.9 + + +def test_response_preserves_extra_metadata_through_pydantic_roundtrip() -> None: + meta_index = { + "i1": {"foo bar": "extra-value", "title": "Widget"}, + } + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + entry = _entry_with_metadata_index(meta_index, rec) + client = _make_client(entry) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 200, r.text + item = r.json()["items"][0] + assert item.get("foo bar") == "extra-value" + + +# --------------------------------------------------------------------------- +# T5: :recommend-related enriches with metadata + respects RECOTEM_METADATA_FIELD_DENY +# --------------------------------------------------------------------------- + + +def _entry_related_with_metadata_index( + metadata_index: dict[str, dict], + recommender: MagicMock, +) -> ModelEntry: + """Return a loaded entry suitable for :recommend-related with a metadata_index.""" + # :recommend-related pre-checks _mapper.item_id_to_index for known seeds. + recommender._mapper.item_id_to_index = {"seed1": 0, "seed2": 1} + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=metadata_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _entry_related_with_loaded_metadata( + df: pd.DataFrame, + recommender: MagicMock, + metadata_field_deny: list[str] | None = None, +) -> ModelEntry: + """Return a loaded entry whose metadata_index is built from *df* (deny applied).""" + from recotem.metadata.loader import build_metadata_index + + recommender._mapper.item_id_to_index = {"seed1": 0} + deny_set: frozenset[str] = frozenset(s.lower() for s in (metadata_field_deny or [])) + index = build_metadata_index(df, deny_set=deny_set) + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_recommend_related_includes_metadata_fields() -> None: + """Items returned by :recommend-related carry metadata from metadata_index.""" + meta_index = { + "i1": {"title": "Widget A", "category": "tools"}, + "i2": {"title": "Widget B", "category": "home"}, + } + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9), ("i2", 0.5)] + + entry = _entry_related_with_metadata_index(meta_index, rec) + client = _make_client(entry) + + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["seed1"], "limit": 2}, + ) + + assert r.status_code == 200, r.text + items = r.json()["items"] + assert len(items) == 2 + + item1 = next(x for x in items if x["item_id"] == "i1") + assert item1["title"] == "Widget A" + assert item1["category"] == "tools" + assert item1["score"] == 0.9 + + item2 = next(x for x in items if x["item_id"] == "i2") + assert item2["title"] == "Widget B" + + +def test_recommend_related_strips_denied_fields() -> None: + """Denied fields (applied at load time) must not appear in :recommend-related items.""" + df = pd.DataFrame( + { + "title": ["Widget A", "Widget B"], + "internal_score": [99.0, 88.0], + "category": ["tools", "home"], + }, + index=pd.Index(["i1", "i2"], name="item_id"), + ) + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9), ("i2", 0.5)] + + entry = _entry_related_with_loaded_metadata( + df, rec, metadata_field_deny=["internal_score"] + ) + client = _make_client(entry) + + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["seed1"], "limit": 2}, + ) + + assert r.status_code == 200, r.text + for item in r.json()["items"]: + assert "internal_score" not in item, ( + "Denied field 'internal_score' must not appear in :recommend-related items" + ) + assert "title" in item + assert "category" in item + + +# --------------------------------------------------------------------------- +# C1: _build_items fallback / drop / X-Recotem-Items-Degraded header +# --------------------------------------------------------------------------- + + +def test_build_items_fallback_path_via_monkeypatch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Scenario A (fallback): first model_validate (full fields) raises + ValidationError; second call (bare item_id/score) succeeds. + + Asserts: + - Response is 200. + - X-Recotem-Items-Degraded header equals the fallback count. + - recotem_v1_metadata_degraded_items_total{kind="fallback"} increments. + - metadata_serialization_failed log event is captured. + - recotem_metadata_serialization_errors_total{recipe,verb} increments. + """ + import structlog.testing + from pydantic import ValidationError + + import recotem.serving.metrics as _metrics_mod + from recotem.serving.schemas import RecommendItem + + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + ("i1", 0.9), + ("i2", 0.5), + ] + meta_index = {"i1": {"title": "Widget A"}, "i2": {"title": "Widget B"}} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + client = _make_client(entry) + + original_validate = RecommendItem.model_validate + call_state: dict[str, int] = {} + + def _failing_first_with_title(data, *args, **kwargs): + key = (data.get("item_id"), "has_title" if "title" in data else "bare") + if key[0] == "i1" and key[1] == "has_title": + call_state.setdefault("fail_count", 0) + call_state["fail_count"] += 1 + raise original_validate( + {"item_id": "i1", "score": float("inf")} # triggers allow_inf_nan=False + ).__class__ # won't reach here — need real ValidationError below + + del _failing_first_with_title + + first_call: dict[str, bool] = {} + + def _patched_validate(data, *args, **kwargs): + if isinstance(data, dict) and data.get("item_id") == "i1" and "title" in data: + first_call["seen"] = True + try: + return original_validate({"item_id": "i1", "score": float("inf")}) + except ValidationError as exc: + raise exc + return original_validate(data, *args, **kwargs) + + monkeypatch.setattr( + RecommendItem, "model_validate", staticmethod(_patched_validate) + ) + + degraded_calls: list[tuple[str, str, str, int]] = [] + real_inc = _metrics_mod.inc_metadata_degraded_items + + def _spy_degraded(recipe, verb, kind, count=1): + degraded_calls.append((recipe, verb, kind, count)) + + monkeypatch.setattr(_metrics_mod, "inc_metadata_degraded_items", _spy_degraded) + + serialization_calls: list[tuple[str, str]] = [] + real_serialization = _metrics_mod.inc_metadata_serialization_error + + def _spy_serialization(recipe, verb): + serialization_calls.append((recipe, verb)) + + monkeypatch.setattr( + _metrics_mod, "inc_metadata_serialization_error", _spy_serialization + ) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2} + ) + + assert r.status_code == 200, r.text + degraded_val = r.headers.get("x-recotem-items-degraded") + assert degraded_val is not None, ( + "X-Recotem-Items-Degraded must be set when fallback occurs" + ) + assert int(degraded_val) >= 1, ( + f"X-Recotem-Items-Degraded must be >= 1; got {degraded_val!r}" + ) + + fallback_events = [e for e in degraded_calls if e[2] == "fallback"] + assert fallback_events, ( + f"inc_metadata_degraded_items must be called with kind='fallback'; got {degraded_calls!r}" + ) + + log_events = [e for e in cap if e.get("event") == "metadata_serialization_failed"] + assert log_events, ( + f"metadata_serialization_failed log event must be emitted; got {[e.get('event') for e in cap]!r}" + ) + + assert serialization_calls, ( + f"inc_metadata_serialization_error must be called; got {serialization_calls!r}" + ) + + +def test_build_items_dropped_path_via_monkeypatch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Scenario B (dropped): both model_validate calls (full and bare) raise + ValidationError for a specific item. + + Asserts: + - Response is 200 (other items served). + - X-Recotem-Items-Degraded header equals the dropped count. + - recotem_v1_metadata_degraded_items_total{kind="dropped"} increments. + - metadata_serialization_failed log event is captured. + - recotem_metadata_serialization_errors_total{recipe,verb} increments. + """ + import structlog.testing + from pydantic import ValidationError + + import recotem.serving.metrics as _metrics_mod + from recotem.serving.schemas import RecommendItem + + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + ("bad-item", 0.9), + ("i2", 0.5), + ] + meta_index: dict[str, dict] = {} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + client = _make_client(entry) + + original_validate = RecommendItem.model_validate + + def _patched_validate(data, *args, **kwargs): + if isinstance(data, dict) and data.get("item_id") == "bad-item": + try: + return original_validate({"item_id": "bad-item", "score": float("inf")}) + except ValidationError as exc: + raise exc + return original_validate(data, *args, **kwargs) + + monkeypatch.setattr( + RecommendItem, "model_validate", staticmethod(_patched_validate) + ) + + degraded_calls: list[tuple[str, str, str, int]] = [] + + def _spy_degraded(recipe, verb, kind, count=1): + degraded_calls.append((recipe, verb, kind, count)) + + monkeypatch.setattr(_metrics_mod, "inc_metadata_degraded_items", _spy_degraded) + + serialization_calls: list[tuple[str, str]] = [] + + def _spy_serialization(recipe, verb): + serialization_calls.append((recipe, verb)) + + monkeypatch.setattr( + _metrics_mod, "inc_metadata_serialization_error", _spy_serialization + ) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2} + ) + + assert r.status_code == 200, r.text + items = r.json()["items"] + item_ids = [it["item_id"] for it in items] + assert "bad-item" not in item_ids, "Dropped item must not appear in response" + assert "i2" in item_ids, "Valid item must still be served" + + degraded_val = r.headers.get("x-recotem-items-degraded") + assert degraded_val is not None, ( + "X-Recotem-Items-Degraded must be set when drop occurs" + ) + assert int(degraded_val) >= 1 + + dropped_events = [e for e in degraded_calls if e[2] == "dropped"] + assert dropped_events, ( + f"inc_metadata_degraded_items must be called with kind='dropped'; got {degraded_calls!r}" + ) + + log_events = [e for e in cap if e.get("event") == "metadata_serialization_failed"] + assert log_events, ( + f"metadata_serialization_failed log event must be emitted; got {[e.get('event') for e in cap]!r}" + ) + + assert serialization_calls, ( + f"inc_metadata_serialization_error must be called; got {serialization_calls!r}" + ) + + +def test_build_items_fallback_and_degraded_header() -> None: + """_build_items with no degradation must not set X-Recotem-Items-Degraded. + + This confirms the "all items OK" baseline is clean before the degradation + scenarios in the monkeypatch tests above. + """ + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + ("i1", 0.9), + ("i2", 0.5), + ] + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + client = _make_client(entry) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2}) + + assert r.status_code == 200, r.text + assert "x-recotem-items-degraded" not in r.headers, ( + "No degradation must not set X-Recotem-Items-Degraded" + ) + assert len(r.json()["items"]) == 2 + + +def test_build_items_no_degraded_header_when_all_items_ok() -> None: + """When all items serialize cleanly, X-Recotem-Items-Degraded must not + be set on the response.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + meta_index = {"i1": {"title": "OK Item"}} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=meta_index, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + client = _make_client(entry) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + + assert r.status_code == 200, r.text + assert "x-recotem-items-degraded" not in r.headers, ( + "X-Recotem-Items-Degraded must NOT be present when all items are OK" + ) diff --git a/tests/unit/test_v1_metrics_cardinality.py b/tests/unit/test_v1_metrics_cardinality.py new file mode 100644 index 00000000..16e2a67b --- /dev/null +++ b/tests/unit/test_v1_metrics_cardinality.py @@ -0,0 +1,179 @@ +# tests/unit/test_v1_metrics_cardinality.py +"""T6: /v1/metrics bounded cardinality of recipe= labels in recotem_v1_requests_total. + +Design: + Send 10 requests to nonexistent recipes (recipe_0 … recipe_9), then scrape + /v1/metrics and count the distinct recipe= label values in + recotem_v1_requests_total. + + The implementation records the verbatim recipe name from the URL path even + for not-found requests. This test asserts the *observable contract*: + after N nonexistent-recipe requests the metric line count for + ``recotem_v1_requests_total`` is bounded — specifically, each unique + ``recipe=`` value appears in the Prometheus output AT MOST ONCE (no + duplicate label combinations), confirming the counter is not growing + without bound per *request* (each unique (recipe, verb, status) triple + is counted once, not once per request). + + This test is NOT asserting that cardinality is capped at a constant value — + it asserts only that the metric is consistent (one Prometheus line per + distinct label-set, not one line per request). For operators that need a + strict cardinality cap, the recommendation is to add a pre-routing name + allow-list; that is a separate concern documented in docs/operations.md. +""" + +from __future__ import annotations + +import pytest +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelRegistry +from tests.conftest import build_v1_app + + +def _make_client_with_metrics( + monkeypatch: pytest.MonkeyPatch, +) -> TestClient: + """Build a TestClient with metrics enabled.""" + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + from recotem.serving import metrics as _m + + monkeypatch.setattr(_m, "metrics_enabled", lambda: True) + # Reset lazy-init state so metrics objects are re-created under the patch. + monkeypatch.setattr(_m, "_V1_REQUEST_COUNTER", None) + monkeypatch.setattr(_m, "_V1_REQUEST_LATENCY", None) + monkeypatch.setattr(_m, "_V1_BATCH_SIZE", None) + monkeypatch.setattr(_m, "_V1_BATCH_ELEMENT_ERRORS", None) + monkeypatch.setattr(_m, "_V1_METADATA_DEGRADED_ITEMS", None) + monkeypatch.setattr(_m, "_V1_VALIDATION_ERRORS_OUTSIDE_VERB", None) + + registry = ModelRegistry() + return TestClient(build_v1_app(registry)) + + +def test_metrics_label_cardinality_does_not_grow_per_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Each distinct (recipe, verb, status) triple appears exactly once in + recotem_v1_requests_total lines — not once per request. + + Sends 10 requests to 10 distinct nonexistent recipe names, then checks + the test counter to confirm that Prometheus counters accumulate value + (one line per label-set, not one line per request). + """ + pytest.importorskip("prometheus_client") + from unittest.mock import MagicMock + + import prometheus_client + + from recotem.serving import metrics as _m + + # Use a fresh CollectorRegistry for isolation. + test_registry = prometheus_client.CollectorRegistry() + test_counter = prometheus_client.Counter( + "test_recotem_v1_requests_cardinality_main", + "Test counter for cardinality check", + ["recipe", "verb", "status"], + registry=test_registry, + ) + latency_mock = MagicMock() # also mock the latency histogram so it doesn't fail + + monkeypatch.setattr(_m, "_V1_REQUEST_COUNTER", test_counter) + monkeypatch.setattr(_m, "_V1_REQUEST_LATENCY", latency_mock) + + # record_v1_request 10 times with the same label-set (same recipe/verb/status). + # The counter must accumulate (value increments), not add new lines. + for i in range(10): + _m.record_v1_request( + recipe="repeated_recipe", + verb="recommend", + status="recipe_not_found", + latency_seconds=0.001, + ) + + samples = list(test_registry.collect()) + # Prometheus Counter produces both a _total sample and a _created sample. + # Only the _total sample carries the accumulated count; filter to those. + request_total_samples = [] + for metric_family in samples: + if "test_recotem_v1_requests_cardinality_main" in metric_family.name: + for s in metric_family.samples: + if s.name.endswith("_total"): + request_total_samples.append(s) + + # Each distinct label-set must appear exactly once in the _total samples. + label_sets = [tuple(sorted(s.labels.items())) for s in request_total_samples] + unique_label_sets = set(label_sets) + + assert len(label_sets) == len(unique_label_sets), ( + f"Duplicate label-sets detected in metrics: " + f"total_lines={len(label_sets)}, unique={len(unique_label_sets)}. " + "Each distinct (recipe, verb, status) combination must appear exactly once." + ) + + # The counter value for the repeated label-set must be 10 (accumulated, not reset). + assert len(request_total_samples) == 1, ( + f"Expected 1 sample line for one unique label-set; " + f"got {len(request_total_samples)}: {request_total_samples!r}" + ) + assert request_total_samples[0].value == 10.0, ( + f"Counter must accumulate to 10 for 10 identical requests; " + f"got {request_total_samples[0].value}" + ) + + +def test_metrics_recipe_not_found_recorded_with_recipe_name( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A RECIPE_NOT_FOUND (404) request is recorded with status=recipe_not_found. + + This asserts the status label value used for not-found recipes so operators + know which status bucket covers nonexistent recipe requests. + """ + pytest.importorskip("prometheus_client") + from unittest.mock import MagicMock + + import prometheus_client + + test_registry = prometheus_client.CollectorRegistry() + from recotem.serving import metrics as _m + + counter = prometheus_client.Counter( + "test_recotem_v1_requests_cardinality_status", + "Status label check for not-found", + ["recipe", "verb", "status"], + registry=test_registry, + ) + + # record_v1_request calls both _V1_REQUEST_COUNTER and _V1_REQUEST_LATENCY + # unconditionally; mock both so _V1_REQUEST_LATENCY is not None. + latency_mock = MagicMock() + monkeypatch.setattr(_m, "_V1_REQUEST_COUNTER", counter) + monkeypatch.setattr(_m, "_V1_REQUEST_LATENCY", latency_mock) + + _m.record_v1_request( + recipe="nonexistent_recipe", + verb="recommend", + status="recipe_not_found", + latency_seconds=0.001, + ) + + # Verify the label value was recorded correctly. + # Filter to _total samples only (Prometheus Counter also emits _created samples). + samples = list(test_registry.collect()) + recorded = [] + for mf in samples: + if "test_recotem_v1_requests_cardinality_status" in mf.name: + for s in mf.samples: + if s.name.endswith("_total"): + recorded.append(s) + + assert len(recorded) == 1, ( + f"Expected exactly 1 _total counter sample; got {len(recorded)}: {recorded!r}" + ) + assert recorded[0].labels["status"] == "recipe_not_found", ( + f"Status label must be 'recipe_not_found'; got {recorded[0].labels!r}" + ) + assert recorded[0].labels["recipe"] == "nonexistent_recipe" + assert recorded[0].labels["verb"] == "recommend" diff --git a/tests/unit/test_v1_recipes_discovery.py b/tests/unit/test_v1_recipes_discovery.py new file mode 100644 index 00000000..b630a42f --- /dev/null +++ b/tests/unit/test_v1_recipes_discovery.py @@ -0,0 +1,315 @@ +# tests/unit/test_v1_recipes_discovery.py +"""GET /v1/recipes and GET /v1/recipes/{name} discovery endpoints.""" + +from __future__ import annotations + +import hashlib + +from fastapi.testclient import TestClient + +from recotem.config import ApiKeyEntry +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + + +def _make_api_entry(plaintext: str, kid: str = "api-key") -> ApiKeyEntry: + digest = hashlib.scrypt( + plaintext.encode("utf-8"), + salt=b"recotem.api-key.v1", + n=2, + r=8, + p=1, + dklen=32, + ).hex() + return ApiKeyEntry(kid=kid, sha256_hex=digest) + + +def _client_with_entries( + entries: list[ModelEntry], + api_keys: list[ApiKeyEntry] | None = None, +) -> TestClient: + registry = ModelRegistry() + for e in entries: + registry.replace(e.name, e) + return TestClient(build_v1_app(registry, api_keys=api_keys or [])) + + +_FAKE_SHA256_HEX = "c" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _stub(name: str) -> ModelEntry: + return ModelEntry( + name=name, + recommender=object(), + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_recipes_list_returns_summaries(): + r = _client_with_entries([_stub("a"), _stub("b")]).get("/v1/recipes") + assert r.status_code == 200 + body = r.json() + names = {x["name"] for x in body["recipes"]} + assert names == {"a", "b"} + a = next(x for x in body["recipes"] if x["name"] == "a") + assert a["model_version"] == f"sha256:{_FAKE_SHA256_HEX}" + assert a["kind"] == "user-item" + assert "recommend" in a["supported_verbs"] + + +def test_recipe_detail_returns_404_for_unknown(): + r = _client_with_entries([_stub("a")]).get("/v1/recipes/unknown") + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + + +def test_recipe_detail_returns_503_for_stub_not_loaded(): + unloaded = ModelEntry( + name="broken", + recommender=None, + header={}, + kid="", + loaded=False, + ) + r = _client_with_entries([unloaded]).get("/v1/recipes/broken") + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + + +def test_recipe_detail_returns_full_summary_for_known(): + r = _client_with_entries([_stub("a")]).get("/v1/recipes/a") + assert r.status_code == 200 + body = r.json() + assert body["name"] == "a" + assert body["model_version"] == f"sha256:{_FAKE_SHA256_HEX}" + # algorithms / best_algorithm / config_digest may be empty for the + # stub but the keys MUST exist (contract). + assert "algorithms" in body + assert "best_algorithm" in body + assert "config_digest" in body + + +# --------------------------------------------------------------------------- +# B. Discovery auth boundary (I3) +# --------------------------------------------------------------------------- + + +_VALID_PLAINTEXT = "discovery_test_api_key_32_bytes!" + + +def test_list_recipes_requires_auth() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get("/v1/recipes") + assert r.status_code == 401 + + +def test_list_recipes_rejects_wrong_key() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get( + "/v1/recipes", + headers={"X-API-Key": "wrong_key_value_32_bytes_padding!"}, + ) + assert r.status_code == 401 + + +def test_list_recipes_accepts_valid_key() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get( + "/v1/recipes", + headers={"X-API-Key": _VALID_PLAINTEXT}, + ) + assert r.status_code == 200 + + +def test_recipe_detail_requires_auth() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get("/v1/recipes/a") + assert r.status_code == 401 + + +def test_recipe_detail_rejects_wrong_key() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get( + "/v1/recipes/a", + headers={"X-API-Key": "wrong_key_value_32_bytes_padding!"}, + ) + assert r.status_code == 401 + + +def test_recipe_detail_accepts_valid_key() -> None: + api_entry = _make_api_entry(_VALID_PLAINTEXT) + r = _client_with_entries([_stub("a")], api_keys=[api_entry]).get( + "/v1/recipes/a", + headers={"X-API-Key": _VALID_PLAINTEXT}, + ) + assert r.status_code == 200 + + +# --------------------------------------------------------------------------- +# I. New detail fields from artifact header +# --------------------------------------------------------------------------- + +_FAKE_RECIPE_HASH = "a" * 64 # 64 lowercase hex chars — valid HexHash + +_HEADER_FIELDS = { + "trained_at": "2026-01-01T00:00:00Z", + "best_class": "TopPopRecommender", + "best_params": {"alpha": 0.1}, + "best_score": 0.42, + "metric": "ndcg", + "cutoff": 10, + "tuning": {"n_trials": 5}, + "data_stats": {"n_users": 100, "n_items": 50}, + "recotem_version": "1.0.0", + "irspack_version": "0.3.0", + "recipe_hash": _FAKE_RECIPE_HASH, +} + + +def _stub_with_header(name: str, header: dict) -> ModelEntry: + return ModelEntry( + name=name, + recommender=object(), + header=header, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_recipe_detail_exposes_artifact_header_fields() -> None: + entry = _stub_with_header("myrecipe", _HEADER_FIELDS) + r = _client_with_entries([entry]).get("/v1/recipes/myrecipe") + assert r.status_code == 200 + body = r.json() + for field_name, expected in _HEADER_FIELDS.items(): + assert body[field_name] == expected, ( + f"Field {field_name!r}: expected {expected!r}, got {body[field_name]!r}" + ) + + +def test_recipe_detail_tolerates_missing_header_fields() -> None: + entry = _stub_with_header("emptyheader", {}) + r = _client_with_entries([entry]).get("/v1/recipes/emptyheader") + assert r.status_code == 200 + body = r.json() + for field_name in _HEADER_FIELDS: + assert body[field_name] is None, ( + f"Field {field_name!r} should be null when header omits it; " + f"got {body[field_name]!r}" + ) + + +# --------------------------------------------------------------------------- +# M1/M2 (code-review): algorithms from tuning.tried_algorithms + config_digest normalize +# --------------------------------------------------------------------------- + + +def test_algorithms_from_tuning_tried_algorithms() -> None: + """extract_algorithms falls back to tuning.tried_algorithms when the top-level + 'algorithms' key is absent or empty.""" + from recotem.serving._header_utils import extract_algorithms + + header_no_top_level = { + "trained_at": "2026-01-01T00:00:00Z", + "tuning": {"n_trials": 5, "tried_algorithms": ["TopPop", "IALS"]}, + } + assert extract_algorithms(header_no_top_level) == ["TopPop", "IALS"], ( + "extract_algorithms must fall back to tuning.tried_algorithms" + ) + + header_empty_top_level = { + "algorithms": [], + "tuning": {"tried_algorithms": ["TopPop", "IALS"]}, + } + assert extract_algorithms(header_empty_top_level) == ["TopPop", "IALS"], ( + "extract_algorithms must fall back to tried_algorithms when top-level is empty" + ) + + header_missing_tuning = {} + assert extract_algorithms(header_missing_tuning) == [], ( + "extract_algorithms must return [] when both keys are absent" + ) + + +def test_algorithms_top_level_takes_precedence() -> None: + """When 'algorithms' is present and non-empty at the top level, + extract_algorithms must return it rather than tuning.tried_algorithms.""" + from recotem.serving._header_utils import extract_algorithms + + header = { + "algorithms": ["BPRMFRecommender"], + "tuning": {"tried_algorithms": ["TopPop", "IALS"]}, + } + result = extract_algorithms(header) + assert result == ["BPRMFRecommender"], ( + f"Top-level algorithms must take precedence; got {result!r}" + ) + + +def test_algorithms_field_in_recipe_detail_from_modelentry() -> None: + """The algorithms field returned by GET /v1/recipes/{name} comes from + ModelEntry.algorithms (populated by extract_algorithms at load time).""" + from recotem.serving._header_utils import extract_algorithms + + header = { + "tuning": {"tried_algorithms": ["TopPop", "IALS"]}, + } + entry = ModelEntry( + name="algo_recipe", + recommender=object(), + header=header, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + algorithms=extract_algorithms(header), + ) + r = _client_with_entries([entry]).get("/v1/recipes/algo_recipe") + assert r.status_code == 200 + body = r.json() + assert body["algorithms"] == ["TopPop", "IALS"], ( + f"algorithms from tuning.tried_algorithms must appear in detail; " + f"got {body['algorithms']!r}" + ) + + +def test_config_digest_normalized_with_sha256_prefix() -> None: + """When the artifact header carries a bare hex config_digest (no sha256: prefix), + normalize_config_digest must add the prefix before storing in ModelEntry. + + The resulting value must satisfy the Sha256Hex pattern (sha256:<64 hex chars>) + used by RecipeDetailResponse.config_digest. + """ + from pydantic import TypeAdapter + + from recotem.serving._header_utils import normalize_config_digest + from recotem.serving.schemas import Sha256Hex + + _hex64 = "a" * 64 + _prefixed = f"sha256:{_hex64}" + + assert normalize_config_digest(_hex64) == _prefixed + assert normalize_config_digest(_prefixed) == _prefixed + assert normalize_config_digest("") is None + assert normalize_config_digest(None) is None + + # Confirm the 64-hex prefixed result passes Sha256Hex pattern validation. + ta = TypeAdapter(Sha256Hex) + validated = ta.validate_python(normalize_config_digest(_hex64)) + assert validated == _prefixed diff --git a/tests/unit/test_v1_recommend.py b/tests/unit/test_v1_recommend.py new file mode 100644 index 00000000..d24c1fae --- /dev/null +++ b/tests/unit/test_v1_recommend.py @@ -0,0 +1,435 @@ +# tests/unit/test_v1_recommend.py +"""POST /v1/recipes/{name}:recommend — single user→items.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_FAKE_SHA256_HEX = "a" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _entry_with_recommender(recommender) -> ModelEntry: + """Build a loaded ModelEntry around the given recommender mock. + + The artifact SHA-256 lives on `_loaded_marker[1]`; pass it through + that field rather than introducing a parallel attribute. + """ + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _app_with_entry(entry: ModelEntry) -> TestClient: + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_recommend_returns_items_and_envelope(): + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9), ("i2", 0.5)] + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 2}) + assert r.status_code == 200, r.text + body = r.json() + assert body["recipe"] == "demo" + assert body["model_version"] == f"sha256:{_FAKE_SHA256_HEX}" + assert [i["item_id"] for i in body["items"]] == ["i1", "i2"] + assert "request_id" in body + rec.get_recommendation_for_known_user_id.assert_called_once_with("u1", 2) + + +def test_recommend_404_when_user_unknown(): + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = KeyError("u1") + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + body = r.json() + # Flat error body: top-level "code" and "detail" (string). + assert body["code"] == "UNKNOWN_USER" + assert isinstance(body["detail"], str) + + +def test_recommend_503_when_recipe_not_loaded(): + stub = ModelEntry( + name="demo", + recommender=None, + header={}, + kid="", + loaded=False, + ) + client = _app_with_entry(stub) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + + +def test_recommend_422_on_empty_user_id(): + rec = MagicMock() + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "", "limit": 5}) + assert r.status_code == 422 + + +def test_recommend_404_when_recipe_missing_from_registry(): + rec = MagicMock() + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post("/v1/recipes/unknown:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + + +# --------------------------------------------------------------------------- +# D. exclude_items + extra="forbid" +# --------------------------------------------------------------------------- + + +def test_recommend_excludes_items() -> None: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [ + ("i1", 0.9), + ("i2", 0.8), + ("i3", 0.7), + ("i4", 0.6), + ("i5", 0.5), + ] + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 5, "exclude_items": ["i2", "i4"]}, + ) + assert r.status_code == 200, r.text + items = r.json()["items"] + ids = [i["item_id"] for i in items] + assert "i2" not in ids + assert "i4" not in ids + assert len(ids) == 3 + + +def test_recommend_rejects_context_field() -> None: + """context field has been removed; sending it must produce 422 (extra=forbid).""" + rec = MagicMock() + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1, "context": {"foo": "bar"}}, + ) + assert r.status_code == 422, r.text + + +def test_recommend_rejects_extra_field() -> None: + rec = MagicMock() + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "u1", "limit": 1, "unknown_field": "x"}, + ) + assert r.status_code == 422 + + +def test_recommend_rejects_oversized_user_id() -> None: + rec = MagicMock() + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post( + "/v1/recipes/demo:recommend", + json={"user_id": "a" * 257, "limit": 1}, + ) + assert r.status_code == 422 + + +# --------------------------------------------------------------------------- +# Finding 11: KeyError mis-attribution fix +# --------------------------------------------------------------------------- + + +def _entry_with_user_map(recommender, known_users: list[str]) -> ModelEntry: + """Build a loaded entry whose _mapper.user_id_to_index knows *known_users*.""" + recommender._mapper.user_id_to_index = {u: i for i, u in enumerate(known_users)} + return ModelEntry( + name="demo", + recommender=recommender, + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_unknown_user_not_in_map_yields_unknown_user_error() -> None: + """User NOT in _mapper.user_id_to_index must yield UNKNOWN_USER (404), + not INTERNAL_ERROR — membership is checked before irspack is called.""" + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = KeyError("u-ghost") + # u-ghost is NOT in the known map + entry = _entry_with_user_map(rec, known_users=["u-known"]) + client = _app_with_entry(entry) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u-ghost"}) + assert r.status_code == 404 + assert r.json()["code"] == "UNKNOWN_USER", ( + "User absent from mapper must yield UNKNOWN_USER, not INTERNAL_ERROR" + ) + + +def test_known_user_unexpected_keyerror_yields_internal_error() -> None: + """When the user IS in _mapper.user_id_to_index but irspack raises + KeyError anyway, the response must be INTERNAL_ERROR (500).""" + import structlog.testing + + rec = MagicMock() + rec.get_recommendation_for_known_user_id.side_effect = KeyError( + "internal-irspack-bug" + ) + # u-known IS in the user map + entry = _entry_with_user_map(rec, known_users=["u-known"]) + client = _app_with_entry(entry) + + with structlog.testing.capture_logs() as cap: + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u-known"}) + + assert r.status_code == 500, r.text + body = r.json() + assert body.get("code") == "INTERNAL_ERROR", ( + "Unexpected KeyError from irspack for a known user must yield INTERNAL_ERROR" + ) + # The log event "recommender_unexpected_key_error" must be emitted + log_events = [e.get("event") for e in cap] + assert "recommender_unexpected_key_error" in log_events, ( + f"Expected recommender_unexpected_key_error log; got: {log_events!r}" + ) + + +def test_batch_known_user_unexpected_keyerror_yields_internal_error() -> None: + """In :batch-recommend, a user in the id-map that triggers KeyError in irspack + must produce status=error / code=INTERNAL_ERROR for that element (not UNKNOWN_USER). + """ + import structlog.testing + + from tests.conftest import build_v1_app + + rec = MagicMock() + # u-known is in the id-map, but irspack raises KeyError anyway + rec._mapper.user_id_to_index = {"u-known": 0} + rec.get_recommendation_for_known_user_id.side_effect = KeyError("irspack-internal") + + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u-known"}]}, + ) + + assert r.status_code == 200, r.text + result = r.json()["results"][0] + assert result["status"] == "error" + assert result["error"]["code"] == "INTERNAL_ERROR", ( + f"Unexpected KeyError for known user must yield INTERNAL_ERROR; got {result!r}" + ) + log_events = [e.get("event") for e in cap] + assert "recommender_unexpected_key_error" in log_events, ( + f"Expected recommender_unexpected_key_error log; got {log_events!r}" + ) + + +def test_recommend_sets_model_version_response_header(): + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + client = _app_with_entry(_entry_with_recommender(rec)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 1}) + assert r.status_code == 200, r.text + header_val = r.headers.get("x-recotem-model-version") + assert header_val, "X-Recotem-Model-Version header must be present and non-empty" + assert header_val == r.json()["model_version"] + + +# --------------------------------------------------------------------------- +# F4: user_known AttributeError path — mirrors _any_seed_known sentinel +# --------------------------------------------------------------------------- + + +def _entry_with_broken_user_mapper() -> ModelEntry: + """Build a loaded entry whose recommender has no accessible _mapper + (spec=[] ensures accessing any attribute raises AttributeError). + This mimics an irspack API incompatibility for the user-id mapping path. + """ + + # Use a class with a descriptor _mapper that raises AttributeError on access, + # while get_recommendation_for_known_user_id returns normally. + class _BrokenMapper: + """Descriptor that raises AttributeError on __get__.""" + + def __get__(self, obj, objtype=None): + raise AttributeError("_mapper not available") + + class _BrokenRec: + _mapper = _BrokenMapper() + + def get_recommendation_for_known_user_id(self, user_id, limit): + return [("i1", 0.9)] + + return ModelEntry( + name="demo", + recommender=_BrokenRec(), + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_recommend_user_known_attribute_error_logs_warning_and_still_serves() -> None: + """When _mapper.user_id_to_index raises AttributeError (unexpected irspack layout), + :recommend must log recommender_layout_unexpected, increment the metric counter, + and still serve the result (irspack call succeeds despite broken mapper). + """ + import structlog.testing + + entry = _entry_with_broken_user_mapper() + client = _app_with_entry(entry) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 1} + ) + + # irspack call succeeds — the response is 200 with items + assert r.status_code == 200, r.text + body = r.json() + assert body["items"][0]["item_id"] == "i1" + + # recommender_layout_unexpected must be logged at WARNING + log_events = [e.get("event") for e in cap] + assert "recommender_layout_unexpected" in log_events, ( + f"Expected recommender_layout_unexpected warning log; got: {log_events!r}" + ) + + +def test_recommend_user_known_attribute_error_then_key_error_yields_internal_error() -> ( + None +): + """When _mapper.user_id_to_index raises AttributeError (user_known=None) + AND irspack subsequently raises KeyError, the response must be INTERNAL_ERROR (500), + not UNKNOWN_USER — user membership is unknown, not confirmed-absent. + """ + import structlog.testing + + class _BrokenMapperRec: + @property + def _mapper(self): + raise AttributeError("_mapper not available") + + def get_recommendation_for_known_user_id(self, user_id, limit): + raise KeyError(user_id) + + entry = ModelEntry( + name="demo", + recommender=_BrokenMapperRec(), + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + client = _app_with_entry(entry) + + with structlog.testing.capture_logs() as cap: + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u-ghost"}) + + assert r.status_code == 500, ( + "AttributeError then KeyError must yield INTERNAL_ERROR (500), not UNKNOWN_USER" + ) + body = r.json() + assert body.get("code") == "INTERNAL_ERROR", ( + f"Expected INTERNAL_ERROR; got {body!r}" + ) + log_events = [e.get("event") for e in cap] + assert "recommender_layout_unexpected" in log_events, ( + f"recommender_layout_unexpected must be logged; got: {log_events!r}" + ) + + +def test_batch_recommend_user_known_attribute_error_logs_warning() -> None: + """In :batch-recommend, AttributeError on _mapper.user_id_to_index must log + recommender_layout_unexpected and yield INTERNAL_ERROR for that element when + irspack subsequently raises KeyError. + """ + import structlog.testing + + class _BrokenMapperRec: + @property + def _mapper(self): + raise AttributeError("_mapper not available") + + def get_recommendation_for_known_user_id(self, user_id, limit): + raise KeyError(user_id) + + entry = ModelEntry( + name="demo", + recommender=_BrokenMapperRec(), + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + with structlog.testing.capture_logs() as cap: + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u-ghost"}]}, + ) + + assert r.status_code == 200, r.text # batch always returns 200 for element errors + result = r.json()["results"][0] + assert result["status"] == "error" + assert result["error"]["code"] == "INTERNAL_ERROR", ( + f"AttributeError then KeyError in batch must yield INTERNAL_ERROR; got {result!r}" + ) + log_events = [e.get("event") for e in cap] + assert "recommender_layout_unexpected" in log_events, ( + f"recommender_layout_unexpected must be logged in batch; got: {log_events!r}" + ) diff --git a/tests/unit/test_v1_recommend_related.py b/tests/unit/test_v1_recommend_related.py new file mode 100644 index 00000000..4a0287db --- /dev/null +++ b/tests/unit/test_v1_recommend_related.py @@ -0,0 +1,266 @@ +# tests/unit/test_v1_recommend_related.py +"""POST /v1/recipes/{name}:recommend-related — single items→items.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_FAKE_SHA256_HEX = "1" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _client_with_recommender(rec, known_items: list[str] | None = None) -> TestClient: + """Wrap *rec* in a ModelEntry whose id-map advertises *known_items*. + + The router pre-checks ``entry.recommender._mapper.item_id_to_index`` + to distinguish ``UNKNOWN_SEED_ITEMS`` from ``NO_CANDIDATES``; tests + that exercise the happy path need at least one seed in the map. + """ + # MagicMock auto-creates ``_mapper`` if not preset; explicitly set + # ``item_id_to_index`` so ``"in"`` works as a dict membership test. + rec._mapper.item_id_to_index = {iid: i for i, iid in enumerate(known_items or [])} + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="test", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry)) + + +def test_related_returns_items(): + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i9", 0.7), ("i8", 0.6)] + r = _client_with_recommender(rec, known_items=["7203"]).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["7203"], "limit": 5}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert [i["item_id"] for i in body["items"]] == ["i9", "i8"] + rec.get_recommendation_for_new_user.assert_called_once_with(["7203"], 5) + + +def test_related_422_on_empty_seed_items(): + rec = MagicMock() + r = _client_with_recommender(rec).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": []}, + ) + assert r.status_code == 422 + + +def test_related_404_when_all_seeds_unknown_returns_empty(): + """No seed in id_map → UNKNOWN_SEED_ITEMS (router pre-check, ranker + never called).""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [] + r = _client_with_recommender(rec, known_items=[]).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["zzz"]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "UNKNOWN_SEED_ITEMS" + assert isinstance(body["detail"], str) + + +def test_related_404_when_seeds_known_but_ranker_empty(): + """Seed in id_map but ranker returns [] → NO_CANDIDATES.""" + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [] + r = _client_with_recommender(rec, known_items=["i1"]).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["i1"]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "NO_CANDIDATES" + + +def test_related_404_when_recipe_missing_from_registry(): + rec = MagicMock() + r = _client_with_recommender(rec).post( + "/v1/recipes/unknown:recommend-related", + json={"seed_items": ["i1"]}, + ) + assert r.status_code == 404 + body = r.json() + assert body["code"] == "RECIPE_NOT_FOUND" + assert isinstance(body["detail"], str) + + +def test_related_503_when_recipe_stub_not_loaded(): + stub = ModelEntry( + name="demo", + recommender=None, + header={}, + kid="", + loaded=False, + ) + registry = ModelRegistry() + registry.replace("demo", stub) + r = TestClient(build_v1_app(registry)).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["i1"]}, + ) + assert r.status_code == 503 + body = r.json() + assert body["code"] == "RECIPE_UNAVAILABLE" + assert isinstance(body["detail"], str) + + +# --------------------------------------------------------------------------- +# E. exclude_items + length cap +# --------------------------------------------------------------------------- + + +def test_recommend_related_excludes_items() -> None: + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [ + ("i1", 0.9), + ("i2", 0.8), + ("i3", 0.7), + ("i4", 0.6), + ("i5", 0.5), + ] + r = _client_with_recommender(rec, known_items=["s1"]).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["s1"], "limit": 5, "exclude_items": ["i2", "i4"]}, + ) + assert r.status_code == 200, r.text + items = r.json()["items"] + ids = [i["item_id"] for i in items] + assert "i2" not in ids + assert "i4" not in ids + assert len(ids) == 3 + + +def test_recommend_related_rejects_oversized_seed_item() -> None: + rec = MagicMock() + r = _client_with_recommender(rec).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["a" * 257]}, + ) + assert r.status_code == 422 + + +# --------------------------------------------------------------------------- +# Finding 10: _any_seed_known AttributeError → INTERNAL_ERROR +# --------------------------------------------------------------------------- + + +def _client_with_broken_mapper(rec) -> TestClient: + """Wrap a recommender whose _mapper attribute raises AttributeError.""" + entry = ModelEntry( + name="demo", + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("demo", entry) + return TestClient(build_v1_app(registry), raise_server_exceptions=False) + + +def test_recommend_related_attribute_error_on_mapper_returns_500() -> None: + """When _mapper attribute access raises AttributeError, :recommend-related + must return 500 with code INTERNAL_ERROR (not UNKNOWN_SEED_ITEMS). + + Uses spec=[] on the recommender so that any attribute access raises + AttributeError — this mimics an irspack API incompatibility where the + expected internal layout (_mapper) is absent. + """ + # spec=[] means NO attributes are defined — accessing _mapper raises AttributeError + rec = MagicMock(spec=[]) + + r = _client_with_broken_mapper(rec).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["s1"]}, + ) + assert r.status_code == 500, r.text + body = r.json() + assert body.get("code") == "INTERNAL_ERROR", ( + f"AttributeError on _mapper must yield INTERNAL_ERROR; got {body!r}" + ) + + +def test_batch_recommend_related_attribute_error_only_affects_element() -> None: + """In a batch, AttributeError on _mapper affects only the element that + triggered it; remaining elements with a valid mapper continue.""" + from unittest.mock import MagicMock + + from recotem.serving.registry import ModelEntry, ModelRegistry + + # Build a recommender whose _mapper raises AttributeError for the bad seed + # but responds normally for others. + rec = MagicMock() + + # Use a real dict for item_id_to_index — this is what the code actually accesses + rec._mapper.item_id_to_index = {"good-seed": 0} + rec.get_recommendation_for_new_user.return_value = [("i1", 0.9)] + + # Now for the broken entry: a separate entry with no _mapper + broken_rec = MagicMock( + spec=[] + ) # spec=[] means NO attributes allowed → AttributeError + + # We need ONE entry with two different requests. The handler calls + # _any_seed_known per-element, which calls entry.recommender._mapper.item_id_to_index + # Since entry.recommender is fixed, we can't simulate mixed per-element mapper failure. + # Instead, test that a wholly broken mapper yields all INTERNAL_ERROR in a batch. + broken_entry = ModelEntry( + name="broken", + recommender=broken_rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1.0, + ) + registry = ModelRegistry() + registry.replace("broken", broken_entry) + client = TestClient(build_v1_app(registry), raise_server_exceptions=False) + + r = client.post( + "/v1/recipes/broken:batch-recommend-related", + json={"requests": [{"seed_items": ["s1"]}, {"seed_items": ["s2"]}]}, + ) + assert r.status_code == 200, r.text # batch always returns 200 on element errors + results = r.json()["results"] + for result in results: + assert result["status"] == "error" + assert result["error"]["code"] == "INTERNAL_ERROR", ( + f"AttributeError on _mapper must yield INTERNAL_ERROR per element; got {result!r}" + ) + + +def test_recommend_related_sets_model_version_response_header(): + rec = MagicMock() + rec.get_recommendation_for_new_user.return_value = [("i9", 0.7)] + r = _client_with_recommender(rec, known_items=["seed1"]).post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["seed1"], "limit": 1}, + ) + assert r.status_code == 200, r.text + header_val = r.headers.get("x-recotem-model-version") + assert header_val, "X-Recotem-Model-Version header must be present and non-empty" + assert header_val == r.json()["model_version"] diff --git a/tests/unit/test_v1_router_basics.py b/tests/unit/test_v1_router_basics.py new file mode 100644 index 00000000..11b19f27 --- /dev/null +++ b/tests/unit/test_v1_router_basics.py @@ -0,0 +1,170 @@ +# tests/unit/test_v1_router_basics.py +"""v1 router smoke tests. + +Confirms the factory wires auth and registry and that the router is +mounted at ``/v1``. Inference verbs and the discovery endpoints are +added incrementally across Tasks 6-11. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + + +def _client_with_entry(entry: ModelEntry) -> TestClient: + registry = ModelRegistry() + registry.replace(entry.name, entry) + return TestClient(build_v1_app(registry)) + + +_FAKE_SHA256_HEX = "d" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _loaded_entry(name: str = "demo") -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [] + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def test_make_router_returns_routable_apiroute_factory(): + registry = ModelRegistry() + client = TestClient(build_v1_app(registry)) + # An entirely unknown verb on the recipes path must return 404, + # confirming the router rejects undefined verbs (rather than e.g. + # routing through a catch-all). A GET request is used so we are not + # confused with the POST-only colon-verb endpoints. + r = client.get("/v1/totally-unknown-path") + assert r.status_code == 404 + + +# --------------------------------------------------------------------------- +# Recipe name path-constraint tests (^[A-Za-z0-9_-]{1,64}$) +# --------------------------------------------------------------------------- + +_INVALID_RECIPE_NAMES = [ + "my recipe", # space + "../etc/passwd", # slashes and dots + "recipe.yaml", # dot in name + "日本語", # non-ASCII +] + + +@pytest.mark.parametrize("bad_name", _INVALID_RECIPE_NAMES) +def test_recommend_rejects_invalid_recipe_name(bad_name: str) -> None: + """POST :recommend with a name that fails the path regex must return 404 or 422.""" + client = _client_with_entry(_loaded_entry()) + r = client.post( + f"/v1/recipes/{bad_name}:recommend", + json={"user_id": "u1"}, + ) + # FastAPI returns 422 when the Path regex match fails at the router level; + # it may return 404 when the URL is parsed differently (e.g. slashes split + # the path into segments that don't match any route). + assert r.status_code in {404, 422}, ( + f"Expected 404 or 422 for invalid name {bad_name!r}, got {r.status_code}" + ) + + +@pytest.mark.parametrize("bad_name", _INVALID_RECIPE_NAMES) +def test_recommend_related_rejects_invalid_recipe_name(bad_name: str) -> None: + """POST :recommend-related with invalid name must return 404 or 422.""" + client = _client_with_entry(_loaded_entry()) + r = client.post( + f"/v1/recipes/{bad_name}:recommend-related", + json={"seed_items": ["i1"]}, + ) + assert r.status_code in {404, 422}, ( + f"Expected 404 or 422 for invalid name {bad_name!r}, got {r.status_code}" + ) + + +@pytest.mark.parametrize("bad_name", _INVALID_RECIPE_NAMES) +def test_recipe_detail_rejects_invalid_recipe_name(bad_name: str) -> None: + """GET /v1/recipes/{name} with invalid name must return 404 or 422.""" + client = _client_with_entry(_loaded_entry()) + r = client.get(f"/v1/recipes/{bad_name}") + assert r.status_code in {404, 422}, ( + f"Expected 404 or 422 for invalid name {bad_name!r}, got {r.status_code}" + ) + + +# --------------------------------------------------------------------------- +# J. Path regex — must accept every valid recipe name +# --------------------------------------------------------------------------- +# The router path regex must mirror ``Recipe.name`` (^[A-Za-z0-9_-]{1,64}$) +# so any recipe accepted at load time is also routable. Recipes with a +# leading "_" or "-" are valid per the recipe loader, so the router must +# NOT 422 on those — instead they get a normal 404 when the registry is +# empty (the name passes the path regex; the registry has no entry). + + +def test_recipe_path_accepts_leading_hyphen() -> None: + client = _client_with_entry(_loaded_entry("-bad")) + r = client.post("/v1/recipes/-bad:recommend", json={"user_id": "u1"}) + # 422 (regex rejection) would be a regression; 200/404 both indicate + # the regex accepted the name and the request reached the handler. + assert r.status_code != 422 + + +def test_recipe_path_accepts_leading_underscore() -> None: + client = _client_with_entry(_loaded_entry("_bad")) + r = client.post("/v1/recipes/_bad:recommend", json={"user_id": "u1"}) + assert r.status_code != 422 + + +def test_recipe_path_accepts_alphanumeric_first_char() -> None: + client = _client_with_entry(_loaded_entry("abc")) + r = client.post("/v1/recipes/abc:recommend", json={"user_id": "u1"}) + assert r.status_code != 422 + + +# --------------------------------------------------------------------------- +# L. kid contextvar binding on recipe_detail +# --------------------------------------------------------------------------- + + +def test_recipe_detail_binds_kid_to_logs() -> None: + import structlog + import structlog.testing + + captured_kwargs: list[dict] = [] + + def _spy(logger, name, event_dict): + captured_kwargs.append(dict(event_dict)) + return event_dict + + structlog.configure( + processors=[ + structlog.contextvars.merge_contextvars, + _spy, + structlog.processors.KeyValueRenderer(), + ], + wrapper_class=structlog.make_filtering_bound_logger(0), + cache_logger_on_first_use=False, + ) + + entry = _loaded_entry("kidtest") + client = _client_with_entry(entry) + client.get("/v1/recipes/noexist") + + has_kid = any("kid" in e for e in captured_kwargs) + assert has_kid, ( + "Expected at least one log event with a 'kid' key bound via contextvars " + f"during /v1/recipes/{{name}}; captured events: {captured_kwargs!r}" + ) diff --git a/tests/unit/test_v1_stale_and_health.py b/tests/unit/test_v1_stale_and_health.py new file mode 100644 index 00000000..7730884e --- /dev/null +++ b/tests/unit/test_v1_stale_and_health.py @@ -0,0 +1,171 @@ +"""Availability-contract tests for the v1 surface. + +Two invariants that must not regress between releases: + +1. **Stale-but-loaded keeps serving.** An entry where the most recent + hot-swap failed (``last_load_error`` is set) but the previous model is + still in memory (``loaded=True``, ``recommender`` non-None) must keep + answering 200. Treating ``last_load_error`` as a 503 trigger would + silently take healthy traffic offline on a single bad artifact. + +2. **``/v1/health`` returns 503 when degraded.** K8s readiness probes + point at this endpoint. The body status mirrors HTTP status — 503 if + any registered recipe is unloaded. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +from fastapi.testclient import TestClient + +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + +_FAKE_SHA256_HEX = "2" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _loaded_entry(name: str = "demo") -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + rec._mapper = MagicMock() + rec._mapper.item_id_to_index = {"i1": 0} + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _stub_entry(name: str) -> ModelEntry: + return ModelEntry( + name=name, + recommender=None, + header={}, + kid="", + metadata_df=None, + last_load_error="initial load failed", + artifact_path="", + loaded=False, + ) + + +# --------------------------------------------------------------------------- +# M-6: stale-but-loaded keeps serving +# --------------------------------------------------------------------------- + + +def test_stale_but_loaded_recipe_keeps_serving_recommend() -> None: + """``last_load_error`` set + ``loaded=True`` → ``:recommend`` returns 200. + + The watcher sets ``last_load_error`` via ``set_load_error()`` after a + hot-swap fails; this does NOT flip ``loaded`` to False. The 200 path + must remain reachable so a single bad artifact does not page oncall. + """ + entry = _loaded_entry() + entry.last_load_error = "hot-swap failed: HMAC verify failed" + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 200 + body = r.json() + assert body["items"][0]["item_id"] == "i1" + + +def test_stale_but_loaded_recipe_counts_as_loaded_in_health() -> None: + """A stale-but-loaded entry must count toward the /v1/health loaded total.""" + entry = _loaded_entry() + entry.last_load_error = "transient stat failure" + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health") + assert r.status_code == 200 + body = r.json() + assert body["total"] == 1 + assert body["loaded"] == 1 + assert body["status"] == "ok" + + +def test_stale_but_loaded_recipe_shown_in_health_details() -> None: + """``last_load_error`` must surface in the per-recipe health detail. + + Even though ``/v1/health`` aggregate stays "ok", operators must be able + to see the underlying error string in ``/v1/health/details`` for + debugging. + """ + # No api_keys → health/details is reachable without an X-API-Key header. + entry = _loaded_entry() + entry.last_load_error = "transient stat failure" + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health/details") + # /v1/health/details flips to 503 when any error string is set, even + # if loaded=True — this is the documented behavior so degraded entries + # are visible without scraping the aggregate. + assert r.status_code == 503 + body = r.json() + assert body["status"] == "degraded" + assert body["recipes"]["demo"]["loaded"] is True + assert "transient stat failure" in body["recipes"]["demo"]["error"] + + +# --------------------------------------------------------------------------- +# M-7: /v1/health returns 503 when degraded +# --------------------------------------------------------------------------- + + +def test_health_returns_503_when_loaded_lt_total() -> None: + """K8s readiness contract: any unloaded recipe → HTTP 503 on /v1/health.""" + registry = ModelRegistry() + registry.replace("demo", _loaded_entry("demo")) + registry.replace("broken", _stub_entry("broken")) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health") + assert r.status_code == 503 + body = r.json() + assert body["status"] == "degraded" + assert body["total"] == 2 + assert body["loaded"] == 1 + + +def test_health_returns_200_when_all_loaded() -> None: + registry = ModelRegistry() + registry.replace("a", _loaded_entry("a")) + registry.replace("b", _loaded_entry("b")) + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health") + assert r.status_code == 200 + body = r.json() + assert body["status"] == "ok" + assert body["total"] == body["loaded"] == 2 + + +def test_health_returns_200_when_registry_empty() -> None: + """No recipes is "ok" — there is no failure to be degraded by. + + This is the boot-time state before the watcher's first successful + poll on an empty recipes directory. K8s should mark the pod Ready so + traffic can route to it; serving 503 on an empty registry would + create a deadlock between startup and registration. + """ + registry = ModelRegistry() + client = TestClient(build_v1_app(registry)) + + r = client.get("/v1/health") + assert r.status_code == 200 + body = r.json() + assert body["status"] == "ok" diff --git a/tests/unit/test_v1_status_labels.py b/tests/unit/test_v1_status_labels.py new file mode 100644 index 00000000..44edf61e --- /dev/null +++ b/tests/unit/test_v1_status_labels.py @@ -0,0 +1,319 @@ +"""Verify ``recotem_v1_requests_total`` labels are set by the route handler. + +The metric ``status`` label values documented in ``docs/operations.md`` +must be reachable from the HTTP handler — otherwise alert rules filtering +on ``status="unavailable"`` / ``status="unknown_user"`` / +``status="recipe_not_found"`` silently never fire. This file exercises +each branch via the HTTP layer (not by calling ``record_v1_request`` +directly) so a regression that mis-labels the metric is caught. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest +from fastapi.testclient import TestClient + +from recotem.serving import metrics as _metrics +from recotem.serving.registry import ModelEntry, ModelRegistry +from tests.conftest import build_v1_app + + +@pytest.fixture(autouse=True) +def _enable_metrics(monkeypatch: pytest.MonkeyPatch) -> None: + """Force metrics to be enabled and wipe v1 metric state between tests. + + The Prometheus default registry is process-global and `_ensure_v1_initialized` + is idempotent on the first non-None counter — so we must (a) unregister + pre-existing collectors before each test and (b) reset the v1 module-level + counters to None so the next ``record_v1_request`` re-creates them on the + cleaned registry. + """ + pytest.importorskip("prometheus_client") + monkeypatch.setenv("RECOTEM_METRICS_ENABLED", "1") + monkeypatch.setattr(_metrics, "metrics_enabled", lambda: True) + + import prometheus_client # noqa: PLC0415 + + # Unregister any collectors carrying our v1 metric names so the next + # ``_ensure_v1_initialized`` succeeds with fresh Counters. + _v1_names = { + "recotem_v1_requests", + "recotem_v1_request_latency_seconds", + "recotem_v1_batch_size", + "recotem_v1_batch_element_errors", + "recotem_v1_metadata_degraded_items", + "recotem_v1_validation_errors_outside_verb", + } + for collector in list(prometheus_client.REGISTRY._collector_to_names): + names = prometheus_client.REGISTRY._collector_to_names.get(collector, set()) + if any(n.startswith(tuple(_v1_names)) for n in names): + try: + prometheus_client.REGISTRY.unregister(collector) + except Exception: # noqa: BLE001 + pass + + for attr in ( + "_V1_REQUEST_COUNTER", + "_V1_REQUEST_LATENCY", + "_V1_BATCH_SIZE", + "_V1_BATCH_ELEMENT_ERRORS", + "_V1_METADATA_DEGRADED_ITEMS", + "_V1_VALIDATION_ERRORS_OUTSIDE_VERB", + ): + monkeypatch.setattr(_metrics, attr, None, raising=False) + + yield + + # Best-effort cleanup so adjacent test files do not see our counters. + for collector in list(prometheus_client.REGISTRY._collector_to_names): + names = prometheus_client.REGISTRY._collector_to_names.get(collector, set()) + if any(n.startswith(tuple(_v1_names)) for n in names): + try: + prometheus_client.REGISTRY.unregister(collector) + except Exception: # noqa: BLE001 + pass + + +_FAKE_SHA256_HEX = "e" * 64 # 64 lowercase hex chars for a valid Sha256Hex marker + + +def _loaded_entry(name: str = "demo") -> ModelEntry: + rec = MagicMock() + rec.get_recommendation_for_known_user_id.return_value = [("i1", 0.9)] + rec.get_recommendation_for_new_user.return_value = [("i2", 0.8)] + rec._mapper = MagicMock() + rec._mapper.item_id_to_index = {"i1": 0, "i2": 1, "seed-known": 2} + return ModelEntry( + name=name, + recommender=rec, + header={}, + kid="t", + metadata_df=None, + metadata_index=None, + loaded=True, + _loaded_marker=(None, _FAKE_SHA256_HEX), + loaded_at_unix=1747800000.0, + ) + + +def _stub_entry(name: str = "stub") -> ModelEntry: + return ModelEntry( + name=name, + recommender=None, + header={}, + kid="", + metadata_df=None, + last_load_error="not loaded", + artifact_path="", + loaded=False, + ) + + +def _label_value(verb: str, status: str, recipe: str = "demo") -> float: + counter = _metrics._V1_REQUEST_COUNTER + assert counter is not None, "v1 request counter must be initialised" + return counter.labels(recipe=recipe, verb=verb, status=status)._value.get() + + +def test_recommend_records_ok_status() -> None: + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry)) + + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 200 + assert _label_value("recommend", "ok") == 1.0 + + +def test_recommend_records_unknown_user_status() -> None: + entry = _loaded_entry() + entry.recommender.get_recommendation_for_known_user_id.side_effect = KeyError("u1") + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + assert r.json()["code"] == "UNKNOWN_USER" + assert _label_value("recommend", "unknown_user") == 1.0 + + +def test_recommend_records_unavailable_when_stub() -> None: + registry = ModelRegistry() + registry.replace("demo", _stub_entry("demo")) + client = TestClient(build_v1_app(registry)) + + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 503 + assert r.json()["code"] == "RECIPE_UNAVAILABLE" + assert _label_value("recommend", "unavailable") == 1.0 + + +def test_recommend_records_recipe_not_found_when_missing() -> None: + registry = ModelRegistry() + client = TestClient(build_v1_app(registry)) + + r = client.post("/v1/recipes/ghost:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + assert r.json()["code"] == "RECIPE_NOT_FOUND" + assert _label_value("recommend", "recipe_not_found", recipe="ghost") == 1.0 + + +def test_recommend_related_records_unknown_seed_items() -> None: + entry = _loaded_entry() + # id_map empty so no seed is known. + entry.recommender._mapper.item_id_to_index = {} + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["i-unknown"]}, + ) + assert r.status_code == 404 + assert r.json()["code"] == "UNKNOWN_SEED_ITEMS" + assert _label_value("recommend-related", "unknown_seed_items") == 1.0 + + +def test_recommend_related_records_no_candidates() -> None: + entry = _loaded_entry() + # seed-known is in id_map but ranker returns [] + entry.recommender.get_recommendation_for_new_user.return_value = [] + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post( + "/v1/recipes/demo:recommend-related", + json={"seed_items": ["seed-known"]}, + ) + assert r.status_code == 404 + assert r.json()["code"] == "NO_CANDIDATES" + assert _label_value("recommend-related", "no_candidates") == 1.0 + + +def test_validation_error_records_metric_for_matching_v1_path() -> None: + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry)) + + # limit=0 fails the schema; whole-request 422. + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1", "limit": 0}) + assert r.status_code == 422 + assert _label_value("recommend", "validation_error") == 1.0 + + +# --------------------------------------------------------------------------- +# Finding 4: recipe_not_found metric across all verbs +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "verb,path,body", + [ + ("recommend", "ghost:recommend", {"user_id": "u1"}), + ( + "recommend-related", + "ghost:recommend-related", + {"seed_items": ["i1"]}, + ), + ( + "batch-recommend", + "ghost:batch-recommend", + {"requests": [{"user_id": "u1"}]}, + ), + ( + "batch-recommend-related", + "ghost:batch-recommend-related", + {"requests": [{"seed_items": ["i1"]}]}, + ), + ], +) +def test_recipe_not_found_metric_across_verbs(verb: str, path: str, body: dict) -> None: + """404 on missing recipe must record recipe_not_found label for every verb.""" + registry = ModelRegistry() + client = TestClient(build_v1_app(registry)) + r = client.post(f"/v1/recipes/{path}", json=body) + assert r.status_code == 404 + assert r.json()["code"] == "RECIPE_NOT_FOUND" + assert _label_value(verb, "recipe_not_found", recipe="ghost") == 1.0 + + +# --------------------------------------------------------------------------- +# Finding 5: model_version header absent on error responses +# --------------------------------------------------------------------------- + + +def test_model_version_header_absent_on_404_recipe_not_found() -> None: + """When :recommend returns 404 RECIPE_NOT_FOUND, X-Recotem-Model-Version + must NOT be present — there is no loaded model to report.""" + registry = ModelRegistry() + client = TestClient(build_v1_app(registry)) + r = client.post("/v1/recipes/ghost:recommend", json={"user_id": "u1"}) + assert r.status_code == 404 + assert "x-recotem-model-version" not in r.headers, ( + "404 RECIPE_NOT_FOUND must not carry X-Recotem-Model-Version" + ) + + +def test_model_version_header_absent_on_503_recipe_unavailable() -> None: + """503 RECIPE_UNAVAILABLE must not carry X-Recotem-Model-Version.""" + registry = ModelRegistry() + registry.replace("demo", _stub_entry("demo")) + client = TestClient(build_v1_app(registry)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 503 + assert "x-recotem-model-version" not in r.headers, ( + "503 RECIPE_UNAVAILABLE must not carry X-Recotem-Model-Version" + ) + + +def test_model_version_header_present_on_200_recommend() -> None: + """200 response must carry a non-empty X-Recotem-Model-Version header.""" + registry = ModelRegistry() + registry.replace("demo", _loaded_entry()) + client = TestClient(build_v1_app(registry)) + r = client.post("/v1/recipes/demo:recommend", json={"user_id": "u1"}) + assert r.status_code == 200, r.text + assert r.headers.get("x-recotem-model-version"), ( + "200 :recommend must carry X-Recotem-Model-Version" + ) + + +def test_batch_recommend_records_outer_ok_when_partial_failure() -> None: + """A batch with mixed ok/error elements still records the OUTER request + as ``status=ok`` (HTTP 200) — per-element errors are observable via + the separate ``_v1_batch_element_errors_total`` counter. + """ + entry = _loaded_entry() + + def _side(user_id, limit): # noqa: ARG001 + if user_id == "bad": + raise KeyError(user_id) + return [("i1", 0.5)] + + entry.recommender.get_recommendation_for_known_user_id.side_effect = _side + registry = ModelRegistry() + registry.replace("demo", entry) + client = TestClient(build_v1_app(registry)) + + r = client.post( + "/v1/recipes/demo:batch-recommend", + json={"requests": [{"user_id": "u1"}, {"user_id": "bad"}]}, + ) + assert r.status_code == 200 + body = r.json() + statuses = [e["status"] for e in body["results"]] + assert statuses == ["ok", "error"] + assert _label_value("batch-recommend", "ok") == 1.0 + counter = _metrics._V1_BATCH_ELEMENT_ERRORS + assert counter is not None + assert ( + counter.labels( + recipe="demo", verb="batch-recommend", code="UNKNOWN_USER" + )._value.get() + == 1.0 + )