diff --git a/docs/api.md b/docs/api.md index d0f05c7bf..3f4ecfe9d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -887,10 +887,15 @@ items match the request before paging. **`page_size`** — Items per page, in `1..100`. Default 20. -**`sort_by`** — Column to sort by. Note: for `memory_type` values -where `"timestamp"` does not apply (e.g. `"profile"` has no -timestamp, `"agent_skill"` is a named entity), the server silently -falls back to `"updated_at"`. +**`sort_by`** — Column to sort by. The `agent_skill` table has no +`timestamp` column, so a request with `sort_by="timestamp"` is +downgraded to `"updated_at"`; the actual column used is reported +in `response.data.effective_sort` so the caller can detect the +override (diff `response.data.effective_sort` against the request's +`sort_by`). The manager also emits a structured warning +(`get.sort_by.downgraded`) on every override so the loss is visible +in logs / metrics. `profile` is a single-row KV and does not +consult this field. **`sort_order`** — `"desc"` (newest first, default) or `"asc"`. @@ -911,6 +916,7 @@ branching on `memory_type`; exactly one is populated. | `agent_skills` | `array` | Populated when `memory_type="agent_skill"` | | `total_count` | `integer` | Total matching records **before** paging | | `count` | `integer` | Number of items in **this page** (`len(items)`) | +| `effective_sort` | `string \| null` | The sort column actually applied. Normally equal to the request's `sort_by`; for `agent_skill` with `sort_by="timestamp"` it is `"updated_at"` (the override) | #### GetEpisodeItem diff --git a/docs/openapi.json b/docs/openapi.json index 0ae6608cd..576a59a8c 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -557,6 +557,17 @@ "type": "integer", "title": "Count", "default": 0 + }, + "effective_sort": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Effective Sort" } }, "additionalProperties": false, diff --git a/src/everos/memory/get/dto.py b/src/everos/memory/get/dto.py index 836167245..ebd57d58e 100644 --- a/src/everos/memory/get/dto.py +++ b/src/everos/memory/get/dto.py @@ -77,9 +77,12 @@ class GetRequest(BaseModel): page: int = Field(default=1, ge=1) page_size: int = Field(default=20, ge=1, le=100) sort_by: Literal["timestamp", "updated_at"] = "timestamp" - """Sort column. ``profile`` and ``agent_skill`` silently override - to ``updated_at`` (profile has no timestamp; agent_skill is a - named entity with no temporal column).""" + """Sort column. ``agent_skill`` has no ``timestamp`` column; the + manager downgrades the request to ``updated_at`` and surfaces the + actual column used in :attr:`GetData.effective_sort` so the + caller can detect the override. ``profile`` is single-row KV (no + listing sort surface), so this field is not consulted for that + kind.""" sort_order: Literal["asc", "desc"] = "desc" filters: FilterNode | None = None @@ -110,6 +113,33 @@ def _validate_owner_memory_type_pair(self) -> Self: ) return self + @model_validator(mode="after") + def _validate_profile_has_no_paging(self) -> Self: + # ``profile`` is a single-row KV fetch (one user_id -> at most + # one user_profile row). The manager path (manager.py PROFILE + # arm) reads ``req.page`` / ``req.page_size`` / ``req.filters`` + # into locals but never threads them into the profile lookup, + # so a caller can ship a paginated/filtered request and the + # server silently degrades to a single-row 200. Reject + # pagination + filters up front so the contract is honest. + # ``sort_by`` is allowed because the manager surfaces the + # applied column in ``GetData.effective_sort`` (callers diff + # against their request); see r2-4. + if self.memory_type == GetMemoryType.PROFILE: + extras: list[str] = [] + if self.page != 1: + extras.append(f"page={self.page}") + if self.page_size != 20: + extras.append(f"page_size={self.page_size}") + if self.filters is not None: + extras.append("filters=") + if extras: + raise ValueError( + "memory_type='profile' is a single-row KV fetch and " + f"ignores pagination/filter arguments; got {', '.join(extras)}" + ) + return self + @property def owner_id(self) -> str: """Derived from whichever of ``user_id`` / ``agent_id`` is set.""" @@ -211,6 +241,13 @@ class GetData(BaseModel): count: int = 0 """Number of items in this page (``len(items)`` after slicing).""" + effective_sort: str | None = None + """The sort column actually applied to the query. Normally equal + to :attr:`GetRequest.sort_by`; for ``agent_skill`` the manager + downgrades ``"timestamp"`` to ``"updated_at"`` because the + table has no ``timestamp`` column — callers can diff against + ``GetRequest.sort_by`` to detect that override.""" + class GetResponse(BaseModel): """Top-level response envelope.""" diff --git a/src/everos/memory/get/manager.py b/src/everos/memory/get/manager.py index 1000cbdd2..c89b8394a 100644 --- a/src/everos/memory/get/manager.py +++ b/src/everos/memory/get/manager.py @@ -92,6 +92,7 @@ async def get(self, req: GetRequest) -> GetResponse: episodes=items, total_count=total, count=len(items), + effective_sort=req.sort_by, ) case GetMemoryType.PROFILE: profiles = await self._fetch_profile(req.owner_id) @@ -99,6 +100,7 @@ async def get(self, req: GetRequest) -> GetResponse: profiles=profiles, total_count=len(profiles), count=len(profiles), + effective_sort=req.sort_by, ) case GetMemoryType.AGENT_CASE: rows, total = await self._case.find_where_paginated( @@ -113,15 +115,33 @@ async def get(self, req: GetRequest) -> GetResponse: agent_cases=items, total_count=total, count=len(items), + effective_sort=req.sort_by, ) case GetMemoryType.AGENT_SKILL: - # AgentSkill has no ``timestamp`` column. Silently - # downgrade ``sort_by`` to ``updated_at`` (from + # AgentSkill has no ``timestamp`` column. Downgrade + # ``sort_by`` to ``updated_at`` (from # :class:`BaseLanceTable`) so the caller cannot - # accidentally trigger a schema error. + # trigger a PyArrow ``KeyError`` in + # ``arrow_tbl.sort_by([(sort_by, order)])``. The + # original request is preserved in + # :attr:`GetData.effective_sort` and a structured + # warning is emitted when the override fires so the + # loss is observable in logs / metrics (callers diff + # ``effective_sort`` against ``request.sort_by`` to + # detect the change). + effective_sort = "updated_at" + if req.sort_by != "updated_at": + logger.warning( + "get.sort_by.downgraded", + memory_type=req.memory_type.value, + requested_sort_by=req.sort_by, + effective_sort_by=effective_sort, + owner_id=req.owner_id, + request_id=request_id, + ) rows, total = await self._skill.find_where_paginated( where, - sort_by="updated_at", + sort_by=effective_sort, descending=descending, page=req.page, page_size=req.page_size, @@ -131,6 +151,7 @@ async def get(self, req: GetRequest) -> GetResponse: agent_skills=items, total_count=total, count=len(items), + effective_sort=effective_sort, ) return GetResponse(request_id=request_id, data=data) diff --git a/tests/unit/test_memory/test_get/test_manager.py b/tests/unit/test_memory/test_get/test_manager.py index cb4e16714..ade5f171d 100644 --- a/tests/unit/test_memory/test_get/test_manager.py +++ b/tests/unit/test_memory/test_get/test_manager.py @@ -319,32 +319,88 @@ async def test_agent_case_populates_agent_cases( async def test_agent_skill_sort_by_silently_overridden_to_updated_at( manager: tuple[GetManager, _StubRepo, _StubRepo, _StubRepo], + monkeypatch: pytest.MonkeyPatch, ) -> None: - """``agent_skill`` always sorts by ``updated_at`` (no ``timestamp`` column).""" + """``agent_skill`` always sorts by ``updated_at`` (no ``timestamp`` column). + + The override is observable: a structured warning is emitted and the + actual sort column is surfaced in ``response.data.effective_sort`` + so the caller can diff against their request to detect the change. + """ + from unittest.mock import MagicMock + mgr, _, _, sk = manager sk.rows = [_agent_skill_row("planner")] sk.total = 1 req = GetRequest( agent_id="a1", memory_type=GetMemoryType.AGENT_SKILL, - # User passes the default — should be silently downgraded. + # User passes the default — must be downgraded + made observable. sort_by="timestamp", ) + # Patch the module-level structlog logger so we can assert the call + # directly (structlog + pytest caplog interaction is brittle). + fake_logger = MagicMock() + monkeypatch.setattr("everos.memory.get.manager.logger", fake_logger) resp = await mgr.get(req) assert sk.last.sort_by == "updated_at" assert resp.data.total_count == 1 assert resp.data.agent_skills[0].name == "planner" + # The override is now observable in the response DTO. + assert resp.data.effective_sort == "updated_at" + assert resp.data.effective_sort != req.sort_by + # And a structured warning was emitted (structlog: event name + kwargs). + assert fake_logger.warning.call_count == 1 + call_args = fake_logger.warning.call_args + assert call_args.args[0] == "get.sort_by.downgraded" + kw = call_args.kwargs + assert kw["memory_type"] == "agent_skill" + assert kw["requested_sort_by"] == "timestamp" + assert kw["effective_sort_by"] == "updated_at" + assert kw["owner_id"] == "a1" + assert isinstance(kw["request_id"], str) and len(kw["request_id"]) == 32 async def test_agent_skill_explicit_updated_at_is_respected( manager: tuple[GetManager, _StubRepo, _StubRepo, _StubRepo], + monkeypatch: pytest.MonkeyPatch, ) -> None: - """``updated_at`` passes through unchanged (no double-override surprise).""" + """``updated_at`` passes through unchanged — and emits no warning.""" + from unittest.mock import MagicMock + mgr, _, _, sk = manager req = GetRequest( agent_id="a1", memory_type=GetMemoryType.AGENT_SKILL, sort_by="updated_at", ) - await mgr.get(req) + fake_logger = MagicMock() + monkeypatch.setattr("everos.memory.get.manager.logger", fake_logger) + resp = await mgr.get(req) assert sk.last.sort_by == "updated_at" + # No downgrade happened — DTO echoes the request and no warning fires. + assert resp.data.effective_sort == "updated_at" + assert fake_logger.warning.call_count == 0 + + +async def test_other_kinds_surface_effective_sort( + manager: tuple[GetManager, _StubRepo, _StubRepo, _StubRepo], +) -> None: + """For kinds that have a ``timestamp`` column, ``effective_sort`` + echoes the request verbatim — there's nothing to override.""" + mgr, ep, ac, _ = manager + ep.rows = [_episode_row("ep_1")] + ep.total = 1 + ac.rows = [_agent_case_row("ac_1")] + ac.total = 1 + + ep_resp = await mgr.get( + GetRequest(user_id="u1", memory_type=GetMemoryType.EPISODE, sort_by="timestamp") + ) + assert ep_resp.data.effective_sort == "timestamp" + + ac_req = GetRequest( + agent_id="a1", memory_type=GetMemoryType.AGENT_CASE, sort_by="updated_at" + ) + ac_resp = await mgr.get(ac_req) + assert ac_resp.data.effective_sort == "updated_at"