Skip to content

Commit ef3426d

Browse files
hartphoenixclaude
andauthored
fix(representation): add missing deleted_at filter to working representation queries (plastic-labs#456)
RepresentationManager._query_documents_recent() and ._query_documents_most_derived() do not filter soft-deleted documents, unlike every other document query function in the codebase. This causes the deriver's working representation to include documents that are being garbage-collected. Refs plastic-labs#444 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7275372 commit ef3426d

2 files changed

Lines changed: 138 additions & 0 deletions

File tree

src/crud/representation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ async def _query_documents_recent(
366366
models.Document.workspace_name == self.workspace_name,
367367
models.Document.observer == self.observer,
368368
models.Document.observed == self.observed,
369+
models.Document.deleted_at.is_(None),
369370
*(
370371
[models.Document.session_name == session_name]
371372
if session_name is not None
@@ -391,6 +392,7 @@ async def _query_documents_most_derived(
391392
models.Document.workspace_name == self.workspace_name,
392393
models.Document.observer == self.observer,
393394
models.Document.observed == self.observed,
395+
models.Document.deleted_at.is_(None),
394396
)
395397
.order_by(models.Document.times_derived.desc())
396398
)
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import pytest
2+
from nanoid import generate as generate_nanoid
3+
from sqlalchemy import func, update
4+
from sqlalchemy.ext.asyncio import AsyncSession
5+
6+
from src import models
7+
from src.crud.representation import RepresentationManager
8+
9+
10+
class TestRepresentationManagerSoftDelete:
11+
"""Tests that RepresentationManager query methods exclude soft-deleted documents."""
12+
13+
async def _setup(
14+
self,
15+
db_session: AsyncSession,
16+
test_workspace: models.Workspace,
17+
test_peer: models.Peer,
18+
) -> tuple[models.Peer, models.Session, models.Collection, RepresentationManager]:
19+
"""Create peers, session, collection, and a RepresentationManager."""
20+
test_peer2 = models.Peer(
21+
name=str(generate_nanoid()), workspace_name=test_workspace.name
22+
)
23+
db_session.add(test_peer2)
24+
await db_session.flush()
25+
26+
test_session = models.Session(
27+
name=str(generate_nanoid()), workspace_name=test_workspace.name
28+
)
29+
db_session.add(test_session)
30+
await db_session.flush()
31+
32+
collection = models.Collection(
33+
workspace_name=test_workspace.name,
34+
observer=test_peer.name,
35+
observed=test_peer2.name,
36+
)
37+
db_session.add(collection)
38+
await db_session.flush()
39+
40+
manager = RepresentationManager(
41+
test_workspace.name,
42+
observer=test_peer.name,
43+
observed=test_peer2.name,
44+
)
45+
46+
return test_peer2, test_session, collection, manager
47+
48+
@pytest.mark.asyncio
49+
async def test_query_documents_recent_excludes_soft_deleted(
50+
self,
51+
db_session: AsyncSession,
52+
sample_data: tuple[models.Workspace, models.Peer],
53+
):
54+
"""Soft-deleted documents must not appear in the recent-documents query."""
55+
test_workspace, test_peer = sample_data
56+
test_peer2, test_session, _, manager = await self._setup(
57+
db_session, test_workspace, test_peer
58+
)
59+
60+
# Create two documents
61+
doc_live = models.Document(
62+
workspace_name=test_workspace.name,
63+
observer=test_peer.name,
64+
observed=test_peer2.name,
65+
content="Live observation",
66+
session_name=test_session.name,
67+
)
68+
doc_deleted = models.Document(
69+
workspace_name=test_workspace.name,
70+
observer=test_peer.name,
71+
observed=test_peer2.name,
72+
content="Deleted observation",
73+
session_name=test_session.name,
74+
)
75+
db_session.add_all([doc_live, doc_deleted])
76+
await db_session.flush()
77+
78+
# Soft-delete one
79+
await db_session.execute(
80+
update(models.Document)
81+
.where(models.Document.id == doc_deleted.id)
82+
.values(deleted_at=func.now())
83+
)
84+
await db_session.commit()
85+
86+
results = await manager._query_documents_recent(db_session, top_k=10) # pyright: ignore[reportPrivateUsage]
87+
88+
result_ids = [doc.id for doc in results]
89+
assert doc_live.id in result_ids
90+
assert doc_deleted.id not in result_ids
91+
92+
@pytest.mark.asyncio
93+
async def test_query_documents_most_derived_excludes_soft_deleted(
94+
self,
95+
db_session: AsyncSession,
96+
sample_data: tuple[models.Workspace, models.Peer],
97+
):
98+
"""Soft-deleted documents must not appear in the most-derived query."""
99+
test_workspace, test_peer = sample_data
100+
test_peer2, test_session, _, manager = await self._setup(
101+
db_session, test_workspace, test_peer
102+
)
103+
104+
# Create two documents with different times_derived
105+
doc_live = models.Document(
106+
workspace_name=test_workspace.name,
107+
observer=test_peer.name,
108+
observed=test_peer2.name,
109+
content="Live observation",
110+
session_name=test_session.name,
111+
times_derived=5,
112+
)
113+
doc_deleted = models.Document(
114+
workspace_name=test_workspace.name,
115+
observer=test_peer.name,
116+
observed=test_peer2.name,
117+
content="Deleted high-derived observation",
118+
session_name=test_session.name,
119+
times_derived=100,
120+
)
121+
db_session.add_all([doc_live, doc_deleted])
122+
await db_session.flush()
123+
124+
# Soft-delete the high-derived one
125+
await db_session.execute(
126+
update(models.Document)
127+
.where(models.Document.id == doc_deleted.id)
128+
.values(deleted_at=func.now())
129+
)
130+
await db_session.commit()
131+
132+
results = await manager._query_documents_most_derived(db_session, top_k=10) # pyright: ignore[reportPrivateUsage]
133+
134+
result_ids = [doc.id for doc in results]
135+
assert doc_live.id in result_ids
136+
assert doc_deleted.id not in result_ids

0 commit comments

Comments
 (0)