Skip to content

Commit 5b95504

Browse files
committed
fix(sync): constrain watch service to --project scope (#758)
When an MCP server is started with `--project X`, its watch service now observes only that project instead of every active project. Previously `BASIC_MEMORY_MCP_PROJECT` filtered the one-shot background sync only, so each concurrent `basic-memory mcp --project X` process spawned a watcher over every project. The duplicate watchers raced on the same files, producing: - `AttributeError: 'NoneType' object has no attribute 'permalink'` in `Relation.permalink` when one watcher cascade-deleted an entity while another was resolving relation permalinks on an adjacent entity - `[Errno 2] No such file or directory: 'foo.tmp' -> 'foo.md'` when two watchers raced on the deterministic tmp path in `write_file_atomic` - `Entity with external_id ... not found` in MCP `edit_note` when a concurrent watcher deleted the entity mid-operation Threads `BASIC_MEMORY_MCP_PROJECT` through `initialize_file_sync` into a new `WatchService(constrained_project=...)` kwarg. Extracts the project filter into `_select_projects_to_watch()` so it is testable outside the `run()` loop (which remains `# pragma: no cover`). Follow-ups tracked in #758: defensively guard `from_entity` in `Relation.permalink` and use unique tmp suffixes in `write_file_atomic` so intra-process concurrent writers on the same path do not collide. Signed-off-by: phernandez <paul@basicmachines.co>
1 parent f3e46d7 commit 5b95504

4 files changed

Lines changed: 203 additions & 21 deletions

File tree

src/basic_memory/services/initialization.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,23 @@ async def initialize_file_sync(
9999
)
100100
project_repository = ProjectRepository(session_maker)
101101

102+
# Filter to constrained project if MCP server was started with --project.
103+
# Applied to both the initial background sync and the watch service so that
104+
# running multiple `basic-memory mcp --project X` processes does not produce
105+
# duplicate watchers fighting over the same files.
106+
constrained_project = os.environ.get("BASIC_MEMORY_MCP_PROJECT")
107+
102108
# Initialize watch service
103109
watch_service = WatchService(
104110
app_config=app_config,
105111
project_repository=project_repository,
106112
quiet=quiet,
113+
constrained_project=constrained_project,
107114
)
108115

109116
# Get active projects
110117
active_projects = await project_repository.get_active_projects()
111118

112-
# Filter to constrained project if MCP server was started with --project
113-
constrained_project = os.environ.get("BASIC_MEMORY_MCP_PROJECT")
114119
if constrained_project:
115120
active_projects = [p for p in active_projects if p.name == constrained_project]
116121
logger.info(f"Background sync constrained to project: {constrained_project}")
@@ -154,7 +159,10 @@ async def sync_project_background(project: Project):
154159
# Don't await the tasks - let them run in background while we continue
155160

156161
# Then start the watch service in the background
157-
logger.info("Starting watch service for all projects")
162+
if constrained_project:
163+
logger.info(f"Starting watch service constrained to project: {constrained_project}")
164+
else:
165+
logger.info("Starting watch service for all projects")
158166

159167
# run the watch service
160168
await watch_service.run()

src/basic_memory/sync/watch_service.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def __init__(
8585
project_repository: ProjectRepository,
8686
quiet: bool = False,
8787
sync_service_factory: Optional[SyncServiceFactory] = None,
88+
constrained_project: Optional[str] = None,
8889
):
8990
self.app_config = app_config
9091
self.project_repository = project_repository
@@ -93,6 +94,11 @@ def __init__(
9394
self.status_path.parent.mkdir(parents=True, exist_ok=True)
9495
self._ignore_patterns_cache: dict[Path, Set[str]] = {}
9596
self._sync_service_factory = sync_service_factory
97+
# When set (typically from BASIC_MEMORY_MCP_PROJECT), the watch cycle
98+
# only observes this project. Without it, each `basic-memory mcp --project X`
99+
# process spawns a watcher over every project and racing writers collide
100+
# on the same files.
101+
self.constrained_project = constrained_project
96102

97103
# quiet mode for mcp so it doesn't mess up stdout
98104
self.console = Console(quiet=quiet)
@@ -156,6 +162,35 @@ async def _watch_projects_cycle(self, projects: Sequence[Project], stop_event: a
156162
# process changes
157163
await asyncio.gather(*change_handlers)
158164

165+
async def _select_projects_to_watch(self) -> list[Project]:
166+
"""Return the set of projects this watch cycle should observe.
167+
168+
Applies two filters in order:
169+
1. ``constrained_project`` — if the MCP server was started with
170+
``--project``, only that project is watched. This keeps concurrent
171+
MCP processes from producing duplicate watchers that race on the
172+
same files.
173+
2. Cloud-only projects without a local bisync copy are skipped so we
174+
don't watch a path that does not exist on disk.
175+
"""
176+
projects = await self.project_repository.get_active_projects()
177+
178+
if self.constrained_project:
179+
projects = [p for p in projects if p.name == self.constrained_project]
180+
181+
cloud_skip: list[str] = []
182+
for p in projects:
183+
if self.app_config.get_project_mode(p.name) == ProjectMode.CLOUD:
184+
entry = self.app_config.projects.get(p.name)
185+
if entry and Path(entry.path).is_absolute():
186+
continue # Cloud project with local bisync copy — keep watching
187+
cloud_skip.append(p.name)
188+
if cloud_skip:
189+
projects = [p for p in projects if p.name not in cloud_skip]
190+
logger.debug(f"Skipping cloud-mode projects in watch cycle: {cloud_skip}")
191+
192+
return list(projects)
193+
159194
async def run(self): # pragma: no cover
160195
"""Watch for file changes and sync them"""
161196

@@ -174,23 +209,7 @@ async def run(self): # pragma: no cover
174209
# Clear ignore patterns cache to pick up any .gitignore changes
175210
self._ignore_patterns_cache.clear()
176211

177-
# Reload projects to catch any new/removed projects
178-
projects = await self.project_repository.get_active_projects()
179-
180-
# Trigger: project is configured for cloud routing
181-
# Why: cloud-only projects (no local directory) should not be watched;
182-
# cloud projects with a local bisync copy (absolute path) need watching
183-
# Outcome: watch cycle skips cloud projects without a local directory
184-
cloud_skip = []
185-
for p in projects:
186-
if self.app_config.get_project_mode(p.name) == ProjectMode.CLOUD:
187-
entry = self.app_config.projects.get(p.name)
188-
if entry and Path(entry.path).is_absolute():
189-
continue # Cloud project with local bisync copy — keep watching
190-
cloud_skip.append(p.name)
191-
if cloud_skip:
192-
projects = [p for p in projects if p.name not in cloud_skip]
193-
logger.debug(f"Skipping cloud-mode projects in watch cycle: {cloud_skip}")
212+
projects = await self._select_projects_to_watch()
194213

195214
project_paths = [project.path for project in projects]
196215
logger.debug(f"Starting watch cycle for directories: {project_paths}")

tests/services/test_initialization.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
ensure_initialization,
1818
initialize_app,
1919
initialize_database,
20+
initialize_file_sync,
2021
reconcile_projects_with_config,
2122
)
2223

@@ -167,6 +168,60 @@ def capture_warning(message: str) -> None:
167168
)
168169

169170

171+
class _FakeWatchService:
172+
"""Captures init kwargs so tests can assert what the real service receives."""
173+
174+
last_kwargs: dict[str, object] = {}
175+
176+
def __init__(self, **kwargs):
177+
_FakeWatchService.last_kwargs = kwargs
178+
179+
async def run(self):
180+
return None
181+
182+
183+
def _disable_test_env_short_circuit(monkeypatch) -> None:
184+
"""Bypass ``is_test_env`` so ``initialize_file_sync`` actually runs.
185+
186+
``is_test_env`` returns True whenever pytest is running, which would cause
187+
``initialize_file_sync`` to return before constructing a WatchService.
188+
"""
189+
monkeypatch.setattr(BasicMemoryConfig, "is_test_env", property(lambda self: False))
190+
191+
192+
@pytest.mark.asyncio
193+
async def test_initialize_file_sync_passes_constrained_project_to_watch_service(
194+
app_config: BasicMemoryConfig, monkeypatch
195+
):
196+
"""``BASIC_MEMORY_MCP_PROJECT`` must reach the watch service, not just the
197+
one-shot background sync. Otherwise multiple ``basic-memory mcp --project X``
198+
processes each spawn a watcher over every project and race on file writes.
199+
"""
200+
_disable_test_env_short_circuit(monkeypatch)
201+
monkeypatch.setenv("BASIC_MEMORY_MCP_PROJECT", "target-project")
202+
monkeypatch.setattr("basic_memory.sync.WatchService", _FakeWatchService)
203+
_FakeWatchService.last_kwargs = {}
204+
205+
await initialize_file_sync(app_config, quiet=True)
206+
207+
assert _FakeWatchService.last_kwargs.get("constrained_project") == "target-project"
208+
209+
210+
@pytest.mark.asyncio
211+
async def test_initialize_file_sync_no_constraint_when_env_unset(
212+
app_config: BasicMemoryConfig, monkeypatch
213+
):
214+
"""With no env var set, the watch service is unconstrained."""
215+
_disable_test_env_short_circuit(monkeypatch)
216+
monkeypatch.delenv("BASIC_MEMORY_MCP_PROJECT", raising=False)
217+
monkeypatch.setattr("basic_memory.sync.WatchService", _FakeWatchService)
218+
_FakeWatchService.last_kwargs = {}
219+
220+
await initialize_file_sync(app_config, quiet=True)
221+
222+
assert _FakeWatchService.last_kwargs.get("constrained_project") is None
223+
224+
170225
@pytest.mark.asyncio
171226
async def test_initialize_app_no_precedence_warning_when_not_conflicting(
172227
app_config: BasicMemoryConfig, monkeypatch

tests/sync/test_watch_service.py

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99
from watchfiles import Change
1010

11-
from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON
11+
from basic_memory.config import BasicMemoryConfig, ProjectMode, WATCH_STATUS_JSON
1212
from basic_memory.models.project import Project
1313
from basic_memory.sync.watch_service import WatchService, WatchServiceState
1414

@@ -44,6 +44,106 @@ def test_watch_service_status_path_honors_basic_memory_config_dir(tmp_path, monk
4444
assert service.status_path.parent.exists()
4545

4646

47+
async def _register_local_projects(
48+
app_config: BasicMemoryConfig, project_repository, specs
49+
) -> None:
50+
"""Register projects as local in both the DB and app_config.
51+
52+
Projects that aren't present in ``app_config.projects`` are treated as
53+
cloud-only by ``get_project_mode`` and get filtered out of the watch
54+
cycle, so tests that exercise ``_select_projects_to_watch`` need them
55+
added to both sides.
56+
"""
57+
from basic_memory.config import ProjectEntry
58+
59+
for spec in specs:
60+
await project_repository.create(
61+
{
62+
"name": spec["name"],
63+
"description": spec["name"],
64+
"path": spec["path"],
65+
"is_active": True,
66+
"is_default": False,
67+
}
68+
)
69+
app_config.projects[spec["name"]] = ProjectEntry(path=spec["path"], mode=ProjectMode.LOCAL)
70+
71+
72+
@pytest.mark.asyncio
73+
async def test_select_projects_to_watch_returns_all_when_unconstrained(
74+
app_config: BasicMemoryConfig, project_repository
75+
):
76+
"""Without a --project constraint, every active project is watched."""
77+
await _register_local_projects(
78+
app_config,
79+
project_repository,
80+
[
81+
{"name": "project-alpha", "path": "/tmp/alpha"},
82+
{"name": "project-beta", "path": "/tmp/beta"},
83+
],
84+
)
85+
86+
service = WatchService(app_config=app_config, project_repository=project_repository)
87+
88+
projects = await service._select_projects_to_watch()
89+
names = {p.name for p in projects}
90+
91+
assert "project-alpha" in names
92+
assert "project-beta" in names
93+
94+
95+
@pytest.mark.asyncio
96+
async def test_select_projects_to_watch_filters_to_constrained_project(
97+
app_config: BasicMemoryConfig, project_repository
98+
):
99+
"""With ``constrained_project`` set, only that project is returned.
100+
101+
Regression: multiple ``basic-memory mcp --project X`` processes each spawned
102+
a watch service over every project, producing duplicate change handlers
103+
that raced on file writes and cascaded deletes.
104+
"""
105+
await _register_local_projects(
106+
app_config,
107+
project_repository,
108+
[
109+
{"name": "project-alpha", "path": "/tmp/alpha"},
110+
{"name": "project-beta", "path": "/tmp/beta"},
111+
],
112+
)
113+
114+
service = WatchService(
115+
app_config=app_config,
116+
project_repository=project_repository,
117+
constrained_project="project-beta",
118+
)
119+
120+
projects = await service._select_projects_to_watch()
121+
122+
assert [p.name for p in projects] == ["project-beta"]
123+
124+
125+
@pytest.mark.asyncio
126+
async def test_select_projects_to_watch_empty_when_constrained_project_missing(
127+
app_config: BasicMemoryConfig, project_repository
128+
):
129+
"""An unknown constraint yields an empty watch set rather than watching everything."""
130+
await _register_local_projects(
131+
app_config,
132+
project_repository,
133+
[{"name": "project-alpha", "path": "/tmp/alpha"}],
134+
)
135+
136+
service = WatchService(
137+
app_config=app_config,
138+
project_repository=project_repository,
139+
constrained_project="does-not-exist",
140+
)
141+
142+
projects = await service._select_projects_to_watch()
143+
144+
assert projects == []
145+
146+
47147
def test_state_add_event():
48148
"""Test adding events to state."""
49149
state = WatchServiceState()

0 commit comments

Comments
 (0)