Skip to content

Commit d07c4c4

Browse files
author
Bryce
committed
feat(phase9 #97 D10.e prep): cursor pagination contract + 18 unit tests
Per design pack §C (canonical post-#1710 SSoT): - aperag/mcp/cursor/codec.py: CursorPayload (sort_key + last_position + invariant_hash + issued_at + ttl_seconds 1h default + server_id + schema_version 1) + base64url JSON encode/decode + is_expired TTL check - aperag/mcp/cursor/invariants.py: compute_invariant_hash sha256 over (sort_key + filters + collection_id + tenant_id + index_id) deterministic across dict ordering - aperag/mcp/cursor/schemas.py: PaginationParams (cursor + limit conint 1..200) + PaginationResult[T] generic (items + next_cursor + total_count) - aperag/mcp/cursor/errors.py: 6 canonical snake_case codes (cursor_invalid / cursor_expired / cursor_filter_mismatch / cursor_tenant_mismatch / cursor_index_changed / cursor_schema_unsupported per §C.3 + #1710 amendment) + CursorError exception + CursorErrorEnvelope wire shape + SILENT_RESET_FORBIDDEN guard - aperag/mcp/cursor/__init__.py: public surface for D10.c read primitive imports tests/unit_test/mcp/test_cursor_contract.py: - 5 codec round-trip / wire format / TTL boundary tests - 2 invariant_hash determinism + binding sensitivity tests - 7 error envelope round-trip tests (parametrized over each canonical code) + SILENT_RESET_FORBIDDEN pin - 4 PaginationParams/PaginationResult shape tests including end-to-end cursor flow Pending D10.c stub head landing for `aperag/service/pagination.py` integration helper + `tests/e2e_http/hurl/<NN>_d10_pagination.hurl` cross-tool e2e — those are Window 1 work.
1 parent 8f60e9b commit d07c4c4

6 files changed

Lines changed: 593 additions & 0 deletions

File tree

aperag/mcp/cursor/__init__.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright 2025 ApeCloud, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""D10.e (#97) opaque cursor pagination contract for MCP read primitives.
16+
17+
Per ``docs/modularization/d10-design-pack.md`` §C — server-issued
18+
base64 cursor with stability invariants (sort key + filter / tenant
19+
/ index hash) plus 6 explicit error codes; never silently reset to
20+
first page (Weston msg=95b07155 hard lock).
21+
22+
Public surface (D10.c read primitives import from here):
23+
24+
* :class:`CursorPayload` — internal cursor structure (server only;
25+
client treats wire string as opaque)
26+
* :func:`encode_cursor` / :func:`decode_cursor` — wire codec
27+
* :func:`compute_invariant_hash` — stable hash over cursor scope
28+
bindings (filters / collection_id / tenant_id)
29+
* :class:`PaginationParams` / :class:`PaginationResult` — typed
30+
request / response generic over the paginated item type
31+
* :class:`CursorError` + the 6 canonical error codes (pending
32+
spec amendment double-sign per architect msg=669db73c — error
33+
module loaded after canonical lock)
34+
35+
Search-rank cursor (vector / fulltext score-based) is intentionally
36+
NOT shared with this module — D10.d carries its own cursor type
37+
with score-boundary invariants (per design pack §G D10.e Forbidden).
38+
"""
39+
40+
from aperag.mcp.cursor.codec import (
41+
CursorPayload,
42+
decode_cursor,
43+
encode_cursor,
44+
)
45+
from aperag.mcp.cursor.invariants import compute_invariant_hash
46+
from aperag.mcp.cursor.schemas import (
47+
PaginationParams,
48+
PaginationResult,
49+
)
50+
51+
__all__ = [
52+
"CursorPayload",
53+
"PaginationParams",
54+
"PaginationResult",
55+
"compute_invariant_hash",
56+
"decode_cursor",
57+
"encode_cursor",
58+
]

aperag/mcp/cursor/codec.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Copyright 2025 ApeCloud, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""D10.e cursor wire codec — opaque base64url-encoded JSON envelope.
16+
17+
Per design pack §C.1: cursor is server-issued, treated by the
18+
client as an opaque string. This module exposes:
19+
20+
* :class:`CursorPayload` — strongly typed internal structure
21+
* :func:`encode_cursor` — payload → wire string
22+
* :func:`decode_cursor` — wire string → payload, raising the
23+
appropriate :class:`CursorError` on malformed / expired /
24+
unsupported-schema input
25+
26+
The codec is stateless: TTL and invariants live inside the payload
27+
and are checked at decode time. There is no server-side store —
28+
that is intentional (§C.0 idempotent + restart-stable).
29+
"""
30+
31+
from __future__ import annotations
32+
33+
import base64
34+
import json
35+
import time
36+
from dataclasses import asdict, dataclass, field
37+
from typing import Any
38+
39+
# CURSOR_SCHEMA_VERSION bumps when the on-wire payload shape changes
40+
# in an incompatible way. Decoders treat any `schema_version` they
41+
# don't know about as `cursor_schema_unsupported` (§C.3) — pending
42+
# the spec amendment double-sign on the canonical error code names
43+
# per architect msg=669db73c, the literal raised here is owned by
44+
# ``aperag.mcp.cursor.errors`` once that module lands.
45+
CURSOR_SCHEMA_VERSION: int = 1
46+
47+
DEFAULT_TTL_SECONDS: int = 3600 # §C.4 1h default; per-tool override
48+
49+
50+
@dataclass
51+
class CursorPayload:
52+
"""Server-only structured cursor (never deserialized client-side)."""
53+
54+
sort_key: str
55+
last_position: dict[str, Any]
56+
invariant_hash: str
57+
issued_at: int
58+
server_id: str
59+
schema_version: int = CURSOR_SCHEMA_VERSION
60+
ttl_seconds: int = DEFAULT_TTL_SECONDS
61+
extra: dict[str, Any] = field(default_factory=dict)
62+
63+
def is_expired(self, *, now: int | None = None) -> bool:
64+
clock = time.time() if now is None else now
65+
return int(clock) >= self.issued_at + self.ttl_seconds
66+
67+
68+
def encode_cursor(payload: CursorPayload) -> str:
69+
"""Encode a CursorPayload to its on-wire base64url JSON form.
70+
71+
The output is URL-safe and unpadded so it round-trips cleanly
72+
through MCP request/response JSON without quoting or escaping.
73+
"""
74+
75+
raw = json.dumps(asdict(payload), separators=(",", ":"), sort_keys=True).encode("utf-8")
76+
return base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
77+
78+
79+
def _b64url_decode(token: str) -> bytes:
80+
pad = "=" * (-len(token) % 4)
81+
return base64.urlsafe_b64decode(token + pad)
82+
83+
84+
def decode_cursor(token: str) -> CursorPayload:
85+
"""Decode a wire cursor string back to a CursorPayload.
86+
87+
On malformed / unsupported / expired input the caller is
88+
responsible for raising the canonical CursorError code
89+
(``cursor_invalid`` / ``cursor_schema_unsupported`` / etc) —
90+
this codec only surfaces structural issues via ValueError /
91+
KeyError; the canonical error mapping lives in
92+
:mod:`aperag.mcp.cursor.errors` (pending spec amendment lock).
93+
"""
94+
95+
raw = _b64url_decode(token)
96+
obj = json.loads(raw)
97+
return CursorPayload(
98+
schema_version=obj["schema_version"],
99+
sort_key=obj["sort_key"],
100+
last_position=obj["last_position"],
101+
invariant_hash=obj["invariant_hash"],
102+
issued_at=obj["issued_at"],
103+
ttl_seconds=obj.get("ttl_seconds", DEFAULT_TTL_SECONDS),
104+
server_id=obj["server_id"],
105+
extra=obj.get("extra", {}),
106+
)

aperag/mcp/cursor/errors.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright 2025 ApeCloud, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""D10.e canonical cursor error codes (§C.3 contract).
16+
17+
Per architect canonical lock (msg=77bd1f6a / msg=05063521 /
18+
#1710) the 6 snake_case codes from design pack §C.3 are the only
19+
identifiers the wire surface uses; the SCREAMING_SNAKE shorthand
20+
that appeared in the §G summary was drafting noise and is
21+
backfilled via #1710.
22+
23+
Each code carries a fixed *client recovery path* (§C.3 lines
24+
567-571) — collapsing distinct codes (e.g., merging the three
25+
invariant-mismatch variants into one) would force the client to
26+
over-react, which is why we keep them split:
27+
28+
* ``cursor_invalid`` / ``cursor_schema_unsupported`` — restart
29+
pagination from a null cursor.
30+
* ``cursor_expired`` — restart pagination.
31+
* ``cursor_filter_mismatch`` / ``cursor_tenant_mismatch`` —
32+
client-side bug; surface to the operator.
33+
* ``cursor_index_changed`` — backend ops issue; retry from null.
34+
35+
A single :class:`CursorError` exception carries the code; the wire
36+
mapping happens at the MCP tool boundary (D10.c read primitives /
37+
D10.d search primitives) where ``CursorError`` is caught and
38+
re-emitted as the tool's structured error envelope.
39+
"""
40+
41+
from __future__ import annotations
42+
43+
from typing import Any, Literal, Optional
44+
45+
from pydantic import BaseModel, Field
46+
47+
CursorErrorCode = Literal[
48+
"cursor_invalid",
49+
"cursor_expired",
50+
"cursor_filter_mismatch",
51+
"cursor_tenant_mismatch",
52+
"cursor_index_changed",
53+
"cursor_schema_unsupported",
54+
]
55+
56+
# Anti-pattern guard (§C.3): a server that silently resets to the
57+
# first page on cursor failure violates the explicit-not-silent
58+
# contract. Decoders MUST raise CursorError; callers MUST surface
59+
# the wire envelope rather than swallowing the error and starting
60+
# a fresh pagination on the user's behalf.
61+
SILENT_RESET_FORBIDDEN = True
62+
63+
64+
class CursorErrorEnvelope(BaseModel):
65+
"""Wire envelope for cursor errors emitted by D10 tools."""
66+
67+
code: CursorErrorCode
68+
message: str
69+
details: dict[str, Any] = Field(default_factory=dict)
70+
71+
72+
class CursorError(Exception):
73+
"""Raised whenever a cursor cannot be honoured.
74+
75+
The ``code`` attribute carries one of the six canonical
76+
:data:`CursorErrorCode` literals. ``details`` is reserved for
77+
server-diagnostic data that is safe to expose to the client
78+
(e.g., the offending invariant field name); operator-only
79+
diagnostics belong in logs, not in the wire envelope.
80+
"""
81+
82+
def __init__(
83+
self,
84+
code: CursorErrorCode,
85+
message: str,
86+
*,
87+
details: Optional[dict[str, Any]] = None,
88+
) -> None:
89+
super().__init__(message)
90+
self.code: CursorErrorCode = code
91+
self.message: str = message
92+
self.details: dict[str, Any] = details or {}
93+
94+
def to_envelope(self) -> CursorErrorEnvelope:
95+
return CursorErrorEnvelope(code=self.code, message=self.message, details=self.details)

aperag/mcp/cursor/invariants.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 ApeCloud, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""D10.e cursor stability invariants — sha256 over scope bindings.
16+
17+
Per design pack §C.2 + Weston msg=95b07155 stability requirement, a
18+
cursor encodes a hash of the bindings that must NOT change for it
19+
to remain valid:
20+
21+
* ``sort_key`` — primary sort field; switching changes ordering and
22+
invalidates last_position.
23+
* ``filters`` — any user-supplied filter set, including search /
24+
list narrowing predicates (mode flags, time windows, tags).
25+
* ``collection_id`` / ``tenant_id`` — tenancy boundary; reusing a
26+
cursor across tenants is a security boundary violation.
27+
* ``index_id`` (optional) — when paginating against a versioned
28+
index, the cursor pins to the index version it was issued
29+
against; reindex bumps the id and any cursor with a stale hash
30+
fails ``cursor_index_changed``.
31+
32+
The function is intentionally insulated from §C error code naming
33+
(pending architect canonical lock per msg=441c5e56): callers
34+
compute the hash here and compare; the *response* mapping into
35+
``cursor_filter_mismatch`` / ``cursor_tenant_mismatch`` /
36+
``cursor_index_changed`` lives in ``aperag.mcp.cursor.errors``.
37+
"""
38+
39+
from __future__ import annotations
40+
41+
import hashlib
42+
import json
43+
from typing import Any
44+
45+
46+
def compute_invariant_hash(
47+
*,
48+
sort_key: str,
49+
filters: dict[str, Any],
50+
collection_id: str | None,
51+
tenant_id: str,
52+
index_id: str | None = None,
53+
) -> str:
54+
"""Return the canonical sha256 hex digest for these bindings.
55+
56+
Inputs are normalised via ``json.dumps(sort_keys=True)`` so the
57+
hash is stable across dict ordering and Python re-serialisation.
58+
"""
59+
60+
payload = {
61+
"sort_key": sort_key,
62+
"filters": filters,
63+
"collection_id": collection_id,
64+
"tenant_id": tenant_id,
65+
"index_id": index_id,
66+
}
67+
raw = json.dumps(payload, separators=(",", ":"), sort_keys=True).encode("utf-8")
68+
return hashlib.sha256(raw).hexdigest()

0 commit comments

Comments
 (0)