Skip to content

Commit fdc78f7

Browse files
cdeustclaude
andcommitted
fix(seed): scope delete_memories_by_tag to domain (issue #16)
PSGSupport reported seed_project purging memories globally despite the domain argument. Added optional domain= parameter to delete_memories_by_tag; seed_project now passes domain=args["domain"] so purge stays within scope. Default behavior (domain=None) unchanged for callers that intentionally want global purge. Tests cover both the per-domain scoping and the legacy global path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6f7de1e commit fdc78f7

5 files changed

Lines changed: 242 additions & 12 deletions

File tree

mcp_server/handlers/seed_project.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,14 @@ def _parse_args(args: dict[str, Any] | None) -> tuple[Path, str, int, bool]:
113113
"""Extract and validate handler arguments."""
114114
args = args or {}
115115
directory = args.get("directory", "") or os.getcwd()
116-
domain = args.get("domain", "")
116+
root = Path(directory).expanduser().resolve()
117+
# Domain auto-detection: schema documents this behavior; the previous
118+
# implementation passed an empty string through, which broke the
119+
# per-domain purge contract (issue #16).
120+
domain = args.get("domain", "") or root.name
117121
max_kb = int(args.get("max_file_size_kb", 64))
118122
dry_run = args.get("dry_run", False)
119-
return Path(directory).expanduser().resolve(), domain, max_kb * 1024, dry_run
123+
return root, domain, max_kb * 1024, dry_run
120124

121125

122126
async def _store_discoveries(
@@ -177,7 +181,10 @@ async def handler(args: dict[str, Any] | None = None) -> dict[str, Any]:
177181

178182
purged = 0
179183
if not dry_run:
180-
purged = _get_store().delete_memories_by_tag("seeded")
184+
# Scope purge to this domain (issue #16): seeding repo-A must not
185+
# wipe out repo-B's seeded memories. Domain authority ends at the
186+
# project boundary; cross-domain effects are an externality.
187+
purged = _get_store().delete_memories_by_tag("seeded", domain=domain)
181188

182189
all_discoveries = collect_all_discoveries(root, max_bytes)
183190

mcp_server/infrastructure/pg_store_queries.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,27 @@ def search_by_tag_vector(
192192
).fetchall()
193193
return [self._normalize_memory_row(r) for r in rows]
194194

195-
def delete_memories_by_tag(self, tag: str) -> int:
196-
"""Delete all memories containing the given tag."""
197-
cur = self._execute(
198-
"DELETE FROM memories WHERE tags @> %s::jsonb",
199-
(json.dumps([tag]),),
200-
)
195+
def delete_memories_by_tag(self, tag: str, domain: str | None = None) -> int:
196+
"""Delete memories with the given tag, optionally scoped to a domain.
197+
198+
precondition: tag is a non-empty string; domain is None or a non-empty string.
199+
postcondition: returns the number of rows removed; rows removed iff their
200+
tags JSONB contains [tag] AND (domain is None OR domain matches).
201+
domain=None preserves global-purge behavior for callers that
202+
actually want it (legacy contract). Caller is responsible for
203+
passing domain when scope matters (e.g. seed_project, which is
204+
per-repo by design — see issue #16).
205+
"""
206+
if domain is None:
207+
cur = self._execute(
208+
"DELETE FROM memories WHERE tags @> %s::jsonb",
209+
(json.dumps([tag]),),
210+
)
211+
else:
212+
cur = self._execute(
213+
"DELETE FROM memories WHERE tags @> %s::jsonb AND domain = %s",
214+
(json.dumps([tag]), domain),
215+
)
201216
self._conn.commit()
202217
return cur.rowcount
203218

mcp_server/infrastructure/sqlite_store_queries.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,22 @@ def get_all_memories_for_decay(self) -> list[dict[str, Any]]:
111111
).fetchall()
112112
return [self._normalize_memory_row(r) for r in rows]
113113

114-
def delete_memories_by_tag(self, tag: str) -> int:
115-
"""Delete all memories containing the given tag.
114+
def delete_memories_by_tag(self, tag: str, domain: str | None = None) -> int:
115+
"""Delete memories with the given tag, optionally scoped to a domain.
116+
117+
precondition: tag is a non-empty string; domain is None or a non-empty string.
118+
postcondition: returns the number of memory rows removed; rows removed
119+
iff their tags list contains tag AND (domain is None OR row.domain
120+
matches). domain=None preserves the legacy global-purge behavior.
116121
117122
SQLite lacks jsonb operators — filter in Python then delete by ID.
118123
"""
119-
rows = self._conn.execute("SELECT id, tags FROM memories").fetchall()
124+
if domain is None:
125+
rows = self._conn.execute("SELECT id, tags FROM memories").fetchall()
126+
else:
127+
rows = self._conn.execute(
128+
"SELECT id, tags FROM memories WHERE domain = ?", (domain,)
129+
).fetchall()
120130
ids_to_delete: list[int] = []
121131
for r in rows:
122132
try:
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
"""Tests for seed_project handler — domain-scoped purge (issue #16).
2+
3+
PSGSupport reported that calling `seed_project` for repo-A and then
4+
repo-B purged repo-A's "seeded" memories despite the explicit `domain`
5+
argument. The cause was a global `delete_memories_by_tag("seeded")` call
6+
that ignored domain. The fix scopes the purge to the supplied domain.
7+
8+
These tests prove the cross-domain leak is closed.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import asyncio
14+
from pathlib import Path
15+
16+
from mcp_server.handlers.seed_project import handler
17+
from mcp_server.infrastructure.memory_store import MemoryStore
18+
19+
20+
def _make_repo(tmp_path: Path, name: str) -> Path:
21+
"""Build a minimal repo skeleton sufficient for seed_project to find content."""
22+
root = tmp_path / name
23+
root.mkdir()
24+
(root / "README.md").write_text(f"# {name}\nMinimal seed test fixture.\n")
25+
(root / "pyproject.toml").write_text(
26+
f'[project]\nname = "{name}"\nversion = "0.1.0"\n'
27+
)
28+
return root
29+
30+
31+
def _seeded_count(store, domain: str) -> int:
32+
rows = store.get_memories_for_domain(domain, min_heat=0.0, limit=500)
33+
return sum(1 for r in rows if "seeded" in (r.get("tags") or []))
34+
35+
36+
def test_seed_repo_a_then_repo_b_preserves_repo_a(tmp_path: Path) -> None:
37+
"""The bug: seeding repo-B used to wipe out repo-A's seeded memories."""
38+
repo_a = _make_repo(tmp_path, "repo-a")
39+
repo_b = _make_repo(tmp_path, "repo-b")
40+
41+
store = MemoryStore()
42+
43+
# Seed repo-a
44+
res_a = asyncio.run(
45+
handler({"directory": str(repo_a), "domain": "repo-a"})
46+
)
47+
assert res_a["seeded"] is True
48+
assert res_a["stored"] >= 1, "fixture should yield at least one stored memory"
49+
a_after_first_seed = _seeded_count(store, "repo-a")
50+
assert a_after_first_seed >= 1
51+
52+
# Seed repo-b — must not touch repo-a's memories
53+
res_b = asyncio.run(
54+
handler({"directory": str(repo_b), "domain": "repo-b"})
55+
)
56+
assert res_b["seeded"] is True
57+
58+
a_after_second_seed = _seeded_count(store, "repo-a")
59+
assert a_after_second_seed == a_after_first_seed, (
60+
f"repo-a's seeded memories must survive seeding repo-b "
61+
f"(had {a_after_first_seed}, now {a_after_second_seed})"
62+
)
63+
64+
65+
def test_reseeding_same_repo_purges_only_that_repo(tmp_path: Path) -> None:
66+
"""Reseeding repo-A should clear repo-A's prior seeds and replace them,
67+
while leaving repo-B's seeds untouched."""
68+
repo_a = _make_repo(tmp_path, "repo-a")
69+
repo_b = _make_repo(tmp_path, "repo-b")
70+
71+
store = MemoryStore()
72+
73+
asyncio.run(handler({"directory": str(repo_a), "domain": "repo-a"}))
74+
asyncio.run(handler({"directory": str(repo_b), "domain": "repo-b"}))
75+
76+
b_before = _seeded_count(store, "repo-b")
77+
assert b_before >= 1
78+
79+
# Reseed repo-a — purge should be domain-scoped
80+
res_a2 = asyncio.run(
81+
handler({"directory": str(repo_a), "domain": "repo-a"})
82+
)
83+
assert res_a2["seeded"] is True
84+
# Reported purge count must reflect the prior repo-a memories,
85+
# never zero (we know there were some) and never include repo-b's.
86+
assert res_a2["purged_stale"] >= 1
87+
assert res_a2["purged_stale"] <= b_before + res_a2["purged_stale"] # sanity
88+
89+
b_after = _seeded_count(store, "repo-b")
90+
assert b_after == b_before, (
91+
"reseeding repo-a must not affect repo-b's seeded memories"
92+
)
93+
94+
95+
def test_domain_auto_detected_from_directory_name(tmp_path: Path) -> None:
96+
"""Schema documents domain auto-detection from directory name. The fix
97+
materializes that contract — without it, an omitted domain would
98+
fall back to global purge (the original bug)."""
99+
repo = _make_repo(tmp_path, "auto-detect-me")
100+
101+
res = asyncio.run(handler({"directory": str(repo)}))
102+
assert res["seeded"] is True
103+
assert res["domain"] == "auto-detect-me"
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""Tests for delete_memories_by_tag — domain scoping (issue #16).
2+
3+
Background: PSGSupport reported `seed_project` purging memories globally
4+
across repos. Root cause: `delete_memories_by_tag("seeded")` was unscoped
5+
and authority leaked past the project boundary. Fix: optional `domain`
6+
parameter scopes the delete; default (None) preserves legacy behavior
7+
for callers that intentionally want a global purge.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from mcp_server.infrastructure.memory_store import MemoryStore
13+
14+
15+
def _insert(store, *, content: str, tag: str, domain: str) -> int:
16+
return store.insert_memory(
17+
{
18+
"content": content,
19+
"tags": [tag],
20+
"domain": domain,
21+
"source": "test",
22+
"is_protected": False,
23+
}
24+
)
25+
26+
27+
def test_delete_by_tag_with_domain_scopes_to_that_domain() -> None:
28+
"""domain=X removes only memories with tag AND domain=X."""
29+
store = MemoryStore()
30+
31+
a1 = _insert(store, content="repo-a memory 1", tag="seeded", domain="repo-a")
32+
a2 = _insert(store, content="repo-a memory 2", tag="seeded", domain="repo-a")
33+
b1 = _insert(store, content="repo-b memory 1", tag="seeded", domain="repo-b")
34+
35+
deleted = store.delete_memories_by_tag("seeded", domain="repo-a")
36+
37+
assert deleted == 2, "must remove exactly the two repo-a rows"
38+
# repo-b survives
39+
surviving = [
40+
m for m in store.get_memories_for_domain("repo-b", min_heat=0.0, limit=100)
41+
]
42+
surviving_ids = {m["id"] for m in surviving}
43+
assert b1 in surviving_ids, "repo-b memory must survive a repo-a-scoped purge"
44+
# repo-a is gone
45+
surviving_a = [
46+
m for m in store.get_memories_for_domain("repo-a", min_heat=0.0, limit=100)
47+
]
48+
surviving_a_ids = {m["id"] for m in surviving_a}
49+
assert a1 not in surviving_a_ids
50+
assert a2 not in surviving_a_ids
51+
52+
53+
def test_delete_by_tag_without_domain_purges_globally() -> None:
54+
"""domain=None preserves legacy global-purge behavior."""
55+
store = MemoryStore()
56+
57+
_insert(store, content="repo-a memory", tag="seeded", domain="repo-a")
58+
_insert(store, content="repo-b memory", tag="seeded", domain="repo-b")
59+
_insert(store, content="other tag memory", tag="kept", domain="repo-a")
60+
61+
deleted = store.delete_memories_by_tag("seeded")
62+
63+
assert deleted == 2, "must remove both seeded rows globally"
64+
# The differently-tagged memory survives
65+
a_remaining = store.get_memories_for_domain("repo-a", min_heat=0.0, limit=100)
66+
assert any("other tag" in m["content"] for m in a_remaining)
67+
68+
69+
def test_delete_by_tag_with_domain_does_not_touch_other_tags() -> None:
70+
"""Scoped delete must not remove rows that lack the tag, even in-domain."""
71+
store = MemoryStore()
72+
73+
_insert(store, content="seeded one", tag="seeded", domain="repo-a")
74+
_insert(store, content="manual one", tag="manual", domain="repo-a")
75+
76+
deleted = store.delete_memories_by_tag("seeded", domain="repo-a")
77+
78+
assert deleted == 1
79+
remaining = store.get_memories_for_domain("repo-a", min_heat=0.0, limit=100)
80+
contents = [m["content"] for m in remaining]
81+
assert "manual one" in contents
82+
assert "seeded one" not in contents
83+
84+
85+
def test_delete_by_tag_with_unknown_domain_is_noop() -> None:
86+
"""Domain that has no rows returns 0 and removes nothing."""
87+
store = MemoryStore()
88+
89+
_insert(store, content="real memory", tag="seeded", domain="repo-a")
90+
91+
deleted = store.delete_memories_by_tag("seeded", domain="nonexistent")
92+
93+
assert deleted == 0
94+
remaining = store.get_memories_for_domain("repo-a", min_heat=0.0, limit=100)
95+
assert len(remaining) == 1

0 commit comments

Comments
 (0)