Skip to content

Commit 47ffdb3

Browse files
hubert-marekclaude
andcommitted
Unified conformance tests across all 4 services with real API validation
- Box: added DELETE comments/tasks, file version upload, fixed scoring (collections ignore, dev_token bug). 105/106 (99%) - Calendar: added pytest entry point, fixed auth (seed email mismatch), added optional fields. 77/77 (100%) - Linear: added pytest entry point, resource queries, mutation parity, removed invalid tests. 89/90 (98%) - Slack: added 9 new docs-golden shape tests, validation wrapper. 22/22 (100%) - Registered pytest conformance/external/replica_only markers Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 334043d commit 47ffdb3

7 files changed

Lines changed: 618 additions & 23 deletions

File tree

backend/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,8 @@ dependencies = [
2727

2828
[tool.pytest.ini_options]
2929
addopts = ["--tb=short"]
30+
markers = [
31+
"conformance: API conformance/parity tests against production APIs",
32+
"external: requires live API credentials (tokens/keys)",
33+
"replica_only: tests against replica only (no external credentials needed)",
34+
]

backend/tests/integration/test_slack_api_docs.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,144 @@ async def test_search_messages_doc_shape(self, slack_client: AsyncClient) -> Non
357357
}
358358
assert expected_match_keys <= match.keys()
359359
assert HIGHLIGHT_START in match["text"] and HIGHLIGHT_END in match["text"]
360+
361+
async def test_auth_test_doc_shape(self, slack_client: AsyncClient) -> None:
362+
resp = await slack_client.post("/auth.test", json={})
363+
assert resp.status_code == 200
364+
data = resp.json()
365+
assert data["ok"] is True
366+
assert {"user_id", "user", "team_id", "team"} <= data.keys()
367+
assert data["user_id"] == USER_AGENT
368+
369+
async def test_chat_update_doc_shape(self, slack_client: AsyncClient) -> None:
370+
post_resp = await slack_client.post(
371+
"/chat.postMessage",
372+
json={"channel": CHANNEL_GENERAL, "text": "Original text for update"},
373+
)
374+
assert post_resp.status_code == 200
375+
ts = post_resp.json()["ts"]
376+
377+
resp = await slack_client.post(
378+
"/chat.update",
379+
json={"channel": CHANNEL_GENERAL, "ts": ts, "text": "Updated text"},
380+
)
381+
assert resp.status_code == 200
382+
data = resp.json()
383+
assert data["ok"] is True
384+
assert {"ok", "channel", "ts", "text"} <= data.keys()
385+
assert data["text"] == "Updated text"
386+
387+
async def test_conversations_archive_doc_shape(
388+
self, slack_client: AsyncClient
389+
) -> None:
390+
channel_name = _unique_name("doc-archive")
391+
create_resp = await slack_client.post(
392+
"/conversations.create", json={"name": channel_name, "is_private": False}
393+
)
394+
assert create_resp.status_code == 200
395+
channel_id = create_resp.json()["channel"]["id"]
396+
397+
resp = await slack_client.post(
398+
"/conversations.archive", json={"channel": channel_id}
399+
)
400+
assert resp.status_code == 200
401+
data = resp.json()
402+
assert data["ok"] is True
403+
404+
async def test_conversations_unarchive_doc_shape(
405+
self, slack_client: AsyncClient
406+
) -> None:
407+
channel_name = _unique_name("doc-unarch")
408+
create_resp = await slack_client.post(
409+
"/conversations.create", json={"name": channel_name, "is_private": False}
410+
)
411+
assert create_resp.status_code == 200
412+
channel_id = create_resp.json()["channel"]["id"]
413+
414+
await slack_client.post(
415+
"/conversations.archive", json={"channel": channel_id}
416+
)
417+
418+
resp = await slack_client.post(
419+
"/conversations.unarchive", json={"channel": channel_id}
420+
)
421+
assert resp.status_code == 200
422+
data = resp.json()
423+
assert data["ok"] is True
424+
425+
async def test_conversations_rename_doc_shape(
426+
self, slack_client: AsyncClient
427+
) -> None:
428+
channel_name = _unique_name("doc-rename")
429+
create_resp = await slack_client.post(
430+
"/conversations.create", json={"name": channel_name, "is_private": False}
431+
)
432+
assert create_resp.status_code == 200
433+
channel_id = create_resp.json()["channel"]["id"]
434+
435+
new_name = _unique_name("doc-renamed")
436+
resp = await slack_client.post(
437+
"/conversations.rename",
438+
json={"channel": channel_id, "name": new_name},
439+
)
440+
assert resp.status_code == 200
441+
data = resp.json()
442+
assert data["ok"] is True
443+
assert data["channel"]["name"] == new_name
444+
445+
async def test_conversations_kick_doc_shape(
446+
self, slack_client: AsyncClient, slack_client_john: AsyncClient
447+
) -> None:
448+
channel_name = _unique_name("doc-kick")
449+
create_resp = await slack_client.post(
450+
"/conversations.create", json={"name": channel_name, "is_private": False}
451+
)
452+
assert create_resp.status_code == 200
453+
channel_id = create_resp.json()["channel"]["id"]
454+
455+
await slack_client.post(
456+
"/conversations.invite",
457+
json={"channel": channel_id, "users": USER_JOHN},
458+
)
459+
460+
resp = await slack_client.post(
461+
"/conversations.kick",
462+
json={"channel": channel_id, "user": USER_JOHN},
463+
)
464+
assert resp.status_code == 200
465+
data = resp.json()
466+
assert data["ok"] is True
467+
468+
async def test_conversations_members_doc_shape(
469+
self, slack_client: AsyncClient
470+
) -> None:
471+
resp = await slack_client.get(
472+
f"/conversations.members?channel={CHANNEL_GENERAL}&limit=10"
473+
)
474+
assert resp.status_code == 200
475+
data = resp.json()
476+
assert data["ok"] is True
477+
assert "members" in data
478+
assert isinstance(data["members"], list)
479+
assert "response_metadata" in data
480+
481+
async def test_users_list_doc_shape(self, slack_client: AsyncClient) -> None:
482+
resp = await slack_client.get("/users.list?limit=5")
483+
assert resp.status_code == 200
484+
data = resp.json()
485+
assert data["ok"] is True
486+
assert "members" in data
487+
assert isinstance(data["members"], list)
488+
if data["members"]:
489+
user = data["members"][0]
490+
assert {"id", "name", "profile"} <= user.keys()
491+
492+
async def test_users_conversations_doc_shape(
493+
self, slack_client: AsyncClient
494+
) -> None:
495+
resp = await slack_client.get(f"/users.conversations?user={USER_AGENT}&limit=5")
496+
assert resp.status_code == 200
497+
data = resp.json()
498+
assert data["ok"] is True
499+
assert "channels" in data
500+
assert isinstance(data["channels"], list)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# API Conformance Testing
2+
3+
## Overview
4+
5+
This directory contains conformance tests that validate Agent-Diff API replicas against their real-world production counterparts. The tests compare **response schema/shape** (field presence, types, and structure), **status codes**, **error semantics**, and **mutation behavior** -- not exact values, since IDs and timestamps will naturally differ between environments.
6+
7+
## Per-Service Methodology
8+
9+
### Box (REST API)
10+
11+
**Approach:** Dual-fire against production Box API and replica. Each operation is executed against both environments, and response schemas are compared using recursive shape extraction.
12+
13+
- **Token:** `BOX_DEV_TOKEN` (Box developer token)
14+
- **Endpoints tested:** 33/33 implemented endpoints
15+
- **What is validated:** Response field presence and types, status code parity, error shapes (404, 400, 409), CRUD operations (folders, files, comments, tasks, hubs, collections, search), file upload/download, file version upload
16+
- **Enterprise-only fields** (54 fields like `role`, `enterprise`, `sync_state`) are excluded from comparison, as they only appear for enterprise Box accounts
17+
- **Last run:** 105/106 passed (99%)
18+
19+
### Google Calendar (REST API)
20+
21+
**Approach:** Dual-fire against Google Calendar API v3 and replica. Creates matching resources (calendars, events) in both environments, then validates all operations.
22+
23+
- **Token:** `GOOGLE_CALENDAR_ACCESS_TOKEN` (OAuth2 bearer token)
24+
- **Endpoints tested:** 37/37 implemented endpoints (calendars, calendarList, events, ACL, settings, colors, freeBusy, batch, watch, channels)
25+
- **What is validated:** Response schema parity, status codes, CRUD operations, recurring events, quickAdd, event move, ETag behavior, batch requests, error handling, delete operations
26+
- **Optional data-dependent fields** (55+ fields like `nextPageToken`, `attendees`, `conferenceData`) are excluded from comparison
27+
28+
### Linear (GraphQL API)
29+
30+
**Approach:** Dual-fire against Linear production GraphQL API and replica. Creates matching resources (issues, labels, comments) in both environments, then validates queries and mutations. Additionally runs **focused schema introspection** to detect drift between production and replica GraphQL schemas.
31+
32+
- **Token:** `LINEAR_API_KEY` (Linear API key)
33+
- **Operations tested:** 31 queries + 16 mutations + schema introspection
34+
- **Queries validated:** Issue filters (string, number, ID, team, assignee, creator, state, date, label, comment comparators), search operations (with pagination, ordering, partial match), resource queries (teams, projects, users, workflowStates, issueLabels, viewer), pagination/sorting, query by identifier, error handling
35+
- **Mutations validated:** issueCreate, issueUpdate, issueDelete, issueArchive/Unarchive, commentCreate, commentUpdate, commentDelete, issueLabelCreate, issueLabelUpdate, issueLabelDelete, issueAddLabel, issueRemoveLabel
36+
- **Schema introspection:** Compares focused type surfaces (StringComparator, IssueFilter, Issue, Query, Mutation, etc.) between production and replica schemas
37+
- **Last run:** 89/90 passed (98%) -- single failure is schema drift on newer Linear API fields (expected as Linear evolves their API)
38+
39+
### Slack (Docs-Golden)
40+
41+
**Approach:** Replica-only, validated against documented Slack API contracts. Unlike Box/Calendar/Linear, Slack conformance does not compare against a live Slack workspace because live-workspace parity is difficult to standardize (workspace state, installed apps, and permissions vary).
42+
43+
- **No external token required**
44+
- **Methods tested:** 22/28 implemented methods
45+
- **What is validated:** Response field presence (exact key sets), error semantics (`ok: false` with specific error codes), warning shapes, pagination structure
46+
- **Methods covered:** auth.test, chat.postMessage, chat.update, chat.delete, conversations.create, conversations.join, conversations.history, conversations.replies, conversations.info, conversations.leave, conversations.setTopic, conversations.archive, conversations.unarchive, conversations.rename, conversations.kick, conversations.members, reactions.add, reactions.get, users.info, users.list, users.conversations, search.messages
47+
- **Last run:** 22/22 passed (100%)
48+
49+
## How to Run
50+
51+
```bash
52+
# All conformance tests (requires all tokens set)
53+
pytest -m conformance -v
54+
55+
# Individual services
56+
BOX_DEV_TOKEN=<token> pytest tests/validation/test_box_parity.py -v -s
57+
GOOGLE_CALENDAR_ACCESS_TOKEN=<token> pytest tests/validation/test_calendar_parity_comprehensive.py -v -s
58+
LINEAR_API_KEY=<key> pytest tests/validation/test_linear_parity_comprehensive.py -v -s
59+
60+
# Slack (no external token needed)
61+
pytest tests/validation/test_slack_conformance.py -v
62+
63+
# Or run standalone (with detailed output):
64+
BOX_DEV_TOKEN=<token> python tests/validation/test_box_parity.py
65+
GOOGLE_CALENDAR_ACCESS_TOKEN=<token> python tests/validation/test_calendar_parity_comprehensive.py
66+
LINEAR_API_KEY=<key> python tests/validation/test_linear_parity_comprehensive.py
67+
```
68+
69+
**Prerequisites:**
70+
- Backend replica must be running (`docker-compose up` from `ops/`)
71+
- For Slack tests: must run inside Docker (`docker exec ops-backend-1 pytest ...`) or have local database access
72+
73+
## Interpreting Results
74+
75+
- **Pass threshold:** pytest entry points assert >= 70% pass rate. This threshold allows for minor schema differences (e.g., enterprise-only fields, newer API fields) while catching significant divergence.
76+
- **Schema mismatches** indicate fields present in one environment but not the other. These are logged with the specific field path and should be investigated -- many are benign (optional fields, tier-specific fields).
77+
- **Error parity** means both environments return the same error class (e.g., both return 404, or both return a GraphQL error with similar keywords). Exact error messages may differ.
78+
79+
## Coverage Summary
80+
81+
| Service | Protocol | Endpoints Tested | Test Count | Pass Rate | Methodology |
82+
|----------|----------|-----------------|------------|-----------|-------------|
83+
| Box | REST | 33/33 | 106 | 99% | Production parity |
84+
| Calendar | REST | 37/37 | 77 | 100% | Production parity |
85+
| Linear | GraphQL | 47 operations | 90 | 98% | Production parity + introspection |
86+
| Slack | REST | 22/28 methods | 22 | 100% | Docs-golden |

0 commit comments

Comments
 (0)