ci(fidelity): wire strict fidelity check into lint.yml (#72)

patrick-chinchill · web-flow · commit c36c8aa399b9 · 2026-04-24T13:04:53.000-07:00
Enforces strict upstream parity for mapped core files in CI. Closes #53. - scripts/verify_test_fidelity.py: --strict mode (default) fails on any missing test, --update-baseline writes the file with dynamic ts_parity from UPSTREAM_PARITY - Fails cleanly when upstream checkout is missing (no silent skip-and-exit-0) - Validates baseline ts_parity against UPSTREAM_PARITY to catch drift after upstream bumps - lint.yml: clones vercel/chat@4.26.0 to /tmp/vercel-chat then runs --strict; clone step is required (no continue-on-error) - fidelity_baseline.json: empty, ships at zero-missing for mapped core files (8 of 17 packages/chat/src/*.test.ts) - Follow-ups: #78 (MAPPING expansion), #79 (SHA pin clone), #80 (fuzzy matcher hyphen)
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -57,6 +57,19 @@ jobs:
         continue-on-error: true
         run: uv run python scripts/audit_test_quality.py
 
+      - name: Clone upstream vercel/chat at pinned parity tag
+        id: clone_upstream
+        run: |
+          git clone --depth 1 --branch chat@4.26.0 \
+            https://github.com/vercel/chat.git /tmp/vercel-chat
+
+      - name: Test fidelity check (strict — zero missing in mapped core files)
+        id: fidelity
+        continue-on-error: true
+        env:
+          TS_ROOT: /tmp/vercel-chat
+        run: uv run python scripts/verify_test_fidelity.py --strict
+
       - name: Pyrefly type check
         id: pyrefly
         continue-on-error: true
@@ -75,6 +88,7 @@ jobs:
           echo "| Ruff check | ${{ steps.ruff_check.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Ruff format | ${{ steps.ruff_format.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Test audit | ${{ steps.audit.outcome }} |" >> $GITHUB_STEP_SUMMARY
+          echo "| Test fidelity | ${{ steps.fidelity.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "| Pyrefly | ${{ steps.pyrefly.outcome }} |" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           if [ "${{ steps.pyrefly.outcome }}" = "success" ]; then
@@ -89,10 +103,11 @@ jobs:
           RUFF_CHECK: ${{ steps.ruff_check.outcome }}
           RUFF_FORMAT: ${{ steps.ruff_format.outcome }}
           AUDIT: ${{ steps.audit.outcome }}
+          FIDELITY: ${{ steps.fidelity.outcome }}
           PYREFLY: ${{ steps.pyrefly.outcome }}
         run: |
           failures=0
-          for var in RUFF_CHECK RUFF_FORMAT AUDIT PYREFLY; do
+          for var in RUFF_CHECK RUFF_FORMAT AUDIT FIDELITY PYREFLY; do
             outcome="${!var}"
             if [ "$outcome" != "success" ]; then
               echo "$var failed (outcome: $outcome)"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -95,6 +95,18 @@ Parity catch-up with upstream `4.26.0`. No upstream version change.
   (`test_memory_state.py`, `test_state_postgres.py`). Closes the same
   flaky-test hazard fixed for the Redis backend in PR #73.
 
+### CI / Internals
+
+- `verify_test_fidelity.py` now enforces against upstream on every PR
+  (`.github/workflows/lint.yml`); fails when the upstream clone is missing
+  or when any mapped TS file can't be found. Workflow runs `--strict` and
+  the clone step no longer carries `continue-on-error: true`, so infra
+  failures surface immediately at the job level. Baseline shipped empty
+  (all previously-missing tests ported in this release) — strict fidelity
+  for *mapped core files* (8 of 17 `packages/chat/src/*.test.ts` files;
+  see the `MAPPING` dict in `scripts/verify_test_fidelity.py` for the
+  authoritative scope list). Closes #53.
+
 ## 0.4.26.1 (2026-04-23)
 
 Python-only follow-up on `0.4.26`. Still alpha — APIs may change.
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -105,5 +105,22 @@ async mock bugs, and cross-file duplicates. PRs that introduce hard failures
 will not pass CI.
 
 **Fidelity check** (`scripts/verify_test_fidelity.py`) verifies every TS
-`it("...")` has a matching Python `def test_*()`. Must show 0 missing before
-committing test changes.
+`it("...")` in the mapped core files has a matching Python `def test_*()`,
+pinned to `chat@4.26.0`. The `MAPPING` dict in that script is the
+authoritative scope list — it currently covers 8 of 17
+`packages/chat/src/*.test.ts` files (extending it is tracked as a
+follow-up). **CI runs `--strict`** (see `.github/workflows/lint.yml`):
+any missing translation in a mapped file fails the build, and a missing
+upstream checkout also fails (the script exits non-zero when any mapped
+TS file isn't found). Baseline mode (the default without `--strict`) is
+retained for local workflows where a few ports land in flight —
+regenerate via `--update-baseline` after documenting intentional
+divergence in `docs/UPSTREAM_SYNC.md`.
+
+Before the fidelity check can run locally, clone the pinned upstream
+checkout (same command CI uses in `lint.yml`):
+```bash
+git clone --depth 1 --branch chat@4.26.0 \
+  https://github.com/vercel/chat.git /tmp/vercel-chat
+```
+Then `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict`.
diff --git a/docs/UPSTREAM_SYNC.md b/docs/UPSTREAM_SYNC.md
@@ -72,6 +72,38 @@ tests. If upstream tests lock in inconsistent behavior, choose one of:
 - **Preserve parity** and document the inconsistency in the non-parity section below
 - **Intentionally diverge** and document the divergence in the non-parity section
 
+### Test fidelity (strict mode)
+
+`scripts/verify_test_fidelity.py` runs in CI (`.github/workflows/lint.yml`) pinned
+to `vercel/chat@4.26.0` (matches the `UPSTREAM_PARITY` constant in
+`src/chat_sdk/__init__.py`). **CI runs `--strict`** — the repo ships at 0
+missing *for mapped core files* as of `0.4.26.2` and the baseline
+(`scripts/fidelity_baseline.json`) is empty. Scope is defined by the
+`MAPPING` dict in the script: 8 of 17 `packages/chat/src/*.test.ts` files
+today (extending to the remaining 9 is tracked as a follow-up). Unmapped
+files are not checked — tightening scope requires editing `MAPPING` and
+re-running `--strict`.
+
+Infra guardrails:
+
+- The workflow's `Clone upstream vercel/chat at pinned parity tag` step does
+  **not** use `continue-on-error` — a failed clone aborts the job loudly.
+- The script itself fails with exit 1 if any mapped TS file is missing under
+  `TS_ROOT` (defense in depth against silent skips).
+
+Workflows:
+
+| Goal | Command |
+|------|---------|
+| Port a missing test | Write the Python test and land it; CI rejects anything that re-introduces a gap |
+| Add a Python-only divergence (intentional skip) | Document in [Known Non-Parity](#known-non-parity-with-typescript-sdk), then `--update-baseline` and switch the workflow back to non-strict default for that file if truly unavoidable |
+| Upstream sync | After pulling new upstream, run `--strict` — newly-added TS tests appear as missing and CI fails until ported |
+| Final parity check | Same as CI: `TS_ROOT=/tmp/vercel-chat uv run python scripts/verify_test_fidelity.py --strict` |
+
+Baseline mode (the default without `--strict`) is retained for local
+development where a few ports land in flight. Regenerate the baseline via
+`--update-baseline` rather than hand-editing.
+
 ## Divergence Policy
 
 Every divergence from upstream has a cost: merge conflicts on future syncs,
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,6 +2,20 @@
 name = "chat-sdk"
 version = "0.4.26.2"
 description = "Multi-platform async chat SDK for Python — port of Vercel Chat"
+keywords = [
+    "chat",
+    "chatbot",
+    "chatops",
+    "slack-bot",
+    "discord-bot",
+    "telegram-bot",
+    "teams-bot",
+    "whatsapp-bot",
+    "bot-framework",
+    "async",
+    "asyncio",
+    "vercel",
+]
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.10"
@@ -16,7 +30,11 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Topic :: Communications",
     "Topic :: Communications :: Chat",
+    "Topic :: Internet",
+    "Topic :: Software Development :: Libraries :: Application Frameworks",
+    "Topic :: Software Development :: Libraries :: Python Modules",
     "Typing :: Typed",
 ]
 
diff --git a/scripts/fidelity_baseline.json b/scripts/fidelity_baseline.json
@@ -0,0 +1,7 @@
+{
+  "_comment": "Ratchet-down baseline for scripts/verify_test_fidelity.py. This repo ships at strict fidelity for mapped core files (0 missing) against chat@4.26.0, so the baseline is empty. Scope: the MAPPING dict in scripts/verify_test_fidelity.py is the authoritative list of TS files checked; it currently covers 8 of the 17 packages/chat/src/*.test.ts files. Default CI mode runs --strict via .github/workflows/lint.yml; this file is retained for local workflows that want to opt back into baseline mode (e.g. during an upstream sync where several ports land in flight). To baseline genuinely-divergent tests, run scripts/verify_test_fidelity.py --update-baseline after documenting the divergence in docs/UPSTREAM_SYNC.md.",
+  "ts_parity": "chat@4.26.0",
+  "total_ts_tests": 588,
+  "total_missing": 0,
+  "missing": {}
+}
diff --git a/scripts/verify_test_fidelity.py b/scripts/verify_test_fidelity.py
diff --git a/src/chat_sdk/adapters/teams/format_converter.py b/src/chat_sdk/adapters/teams/format_converter.py
diff --git a/tests/test_cards.py b/tests/test_cards.py