Skip to content

Commit 0c9800c

Browse files
fix(sync): use strict deferred relation resolution
1 parent 476239d commit 0c9800c

4 files changed

Lines changed: 162 additions & 2 deletions

File tree

src/basic_memory/indexing/batch_indexer.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,14 @@ async def _resolve_batch_relations(
493493
async def resolve_relation(relation: Relation) -> int:
494494
async with semaphore:
495495
try:
496+
# strict=True for deferred resolution: only fill in to_id on an
497+
# exact permalink/title/file_path match. Fuzzy fallback would silently
498+
# resolve ambiguous links to whichever entity shares tokens with the
499+
# link text, mismatching this with the sync_service forward-reference
500+
# path and producing confidently-wrong graph edges. See
501+
# sync_service.resolve_forward_references for the same change.
496502
resolved_entity = await self.entity_service.link_resolver.resolve_link(
497-
relation.to_name
503+
relation.to_name, strict=True
498504
)
499505
if resolved_entity is None or resolved_entity.id == relation.from_id:
500506
return 0

src/basic_memory/sync/sync_service.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1447,7 +1447,16 @@ async def resolve_relations(self, entity_id: int | None = None) -> set[int]:
14471447
f"to_name={relation.to_name}"
14481448
)
14491449

1450-
resolved_entity = await self.entity_service.link_resolver.resolve_link(relation.to_name)
1450+
# Use strict=True: deferred resolution should only fill in to_id when an
1451+
# exact permalink/title/file_path match exists. The fuzzy fallback (search-based
1452+
# token match) would silently resolve ambiguous links like
1453+
# `[[overview (state-management/session-execution)]]` to whichever entity shares
1454+
# the most tokens, polluting the graph with confidently-wrong edges that no
1455+
# audit catches. Leaving such relations unresolved keeps to_id=NULL so they
1456+
# surface as forward references and can be fixed by the producer.
1457+
resolved_entity = await self.entity_service.link_resolver.resolve_link(
1458+
relation.to_name, strict=True
1459+
)
14511460

14521461
# ignore reference to self
14531462
if resolved_entity and resolved_entity.id != relation.from_id:

tests/indexing/test_batch_indexer.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,78 @@ async def test_batch_indexer_index_markdown_file_can_defer_relation_resolution(
688688
assert source.outgoing_relations[0].to_name == "Deferred Target"
689689

690690

691+
@pytest.mark.asyncio
692+
async def test_batch_indexer_uses_strict_link_resolution_for_deferred_relations(
693+
app_config,
694+
entity_service,
695+
entity_repository,
696+
relation_repository,
697+
search_service,
698+
file_service,
699+
project_config,
700+
monkeypatch,
701+
):
702+
"""Regression: batch indexer's deferred relation resolution must call
703+
resolve_link with strict=True.
704+
705+
Mirror of sync_service.resolve_forward_references. Fuzzy fallback in the
706+
deferred path silently fills in to_id from BM25/ts_rank results, polluting
707+
the graph with confidently-wrong edges. Entity-creation already uses
708+
strict=True; this is the other deferred path.
709+
"""
710+
path = "notes/source.md"
711+
await _create_file(
712+
project_config.home / path,
713+
dedent(
714+
"""
715+
---
716+
title: Source
717+
type: note
718+
---
719+
720+
# Source
721+
722+
- links_to [[never-resolves-target]]
723+
"""
724+
),
725+
)
726+
727+
batch_indexer = _make_batch_indexer(
728+
app_config,
729+
entity_service,
730+
entity_repository,
731+
relation_repository,
732+
search_service,
733+
file_service,
734+
)
735+
736+
original_resolve_link = entity_service.link_resolver.resolve_link
737+
seen_strict: list[object] = []
738+
739+
async def spy_resolve_link(*args, **kwargs):
740+
seen_strict.append(kwargs.get("strict", False))
741+
return await original_resolve_link(*args, **kwargs)
742+
743+
monkeypatch.setattr(entity_service.link_resolver, "resolve_link", spy_resolve_link)
744+
745+
await batch_indexer.index_files(
746+
{path: await _load_input(file_service, path)},
747+
max_concurrent=1,
748+
)
749+
750+
assert seen_strict, "batch indexer did not invoke link_resolver.resolve_link"
751+
assert all(strict is True for strict in seen_strict), (
752+
f"Deferred resolution must call resolve_link(strict=True). Observed: {seen_strict!r}"
753+
)
754+
755+
# The unresolvable relation stayed unresolved.
756+
source = await entity_repository.get_by_file_path(path)
757+
assert source is not None
758+
assert len(source.outgoing_relations) == 1
759+
assert source.outgoing_relations[0].to_id is None
760+
assert source.outgoing_relations[0].to_name == "never-resolves-target"
761+
762+
691763
@pytest.mark.asyncio
692764
async def test_batch_indexer_strips_frontmatter_from_search_content_when_body_is_empty(
693765
app_config,

tests/sync/test_sync_service.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,79 @@ async def test_forward_reference_resolution(
108108
assert source.relations[0].to_name == target.title
109109

110110

111+
@pytest.mark.asyncio
112+
async def test_resolve_relations_uses_strict_link_resolution(
113+
sync_service: SyncService,
114+
project_config: ProjectConfig,
115+
entity_service: EntityService,
116+
monkeypatch,
117+
):
118+
"""Regression: deferred forward-reference resolution must call resolve_link
119+
with strict=True.
120+
121+
Producers sometimes emit disambiguator-style links like
122+
`[[overview (state-management/session-execution)]]` whose exact text does
123+
not match any entity's permalink, title, or file_path. The previous
124+
behavior fell through to BM25/ts_rank fuzzy search in
125+
LinkResolver._resolve_in_project and silently picked whichever entity
126+
shared the most tokens — polluting the graph with confidently-wrong edges
127+
that no audit catches.
128+
129+
Entity-creation already resolves relations with strict=True (see
130+
entity_service.update_entity_relations). The deferred sync path must use
131+
the same contract; otherwise unresolved relations get silently filled
132+
later by fuzzy search.
133+
"""
134+
# Create a source file with a forward reference. The target doesn't exist,
135+
# so resolution will fail — which is exactly when fuzzy fallback would
136+
# previously silently pick a wrong target.
137+
source_content = dedent("""
138+
---
139+
type: knowledge
140+
---
141+
# Source
142+
143+
## Relations
144+
- part_of [[never-resolves-target]]
145+
""")
146+
await create_test_file(project_config.home / "source.md", source_content)
147+
await sync_service.sync(project_config.home)
148+
149+
project_prefix = generate_permalink(project_config.name)
150+
source = await entity_service.get_by_permalink(f"{project_prefix}/source")
151+
assert len(source.relations) == 1
152+
assert source.relations[0].to_id is None # initial creation already strict
153+
154+
# Spy on resolve_link to capture the strict flag the deferred resolver uses.
155+
original_resolve_link = sync_service.entity_service.link_resolver.resolve_link
156+
seen_strict: list[Any] = []
157+
158+
async def spy_resolve_link(*args, **kwargs):
159+
seen_strict.append(kwargs.get("strict", False))
160+
return await original_resolve_link(*args, **kwargs)
161+
162+
monkeypatch.setattr(
163+
sync_service.entity_service.link_resolver,
164+
"resolve_link",
165+
spy_resolve_link,
166+
)
167+
168+
await sync_service.resolve_relations()
169+
170+
# Deferred resolution invoked resolve_link, and every call passed strict=True.
171+
assert seen_strict, "resolve_relations did not invoke link_resolver.resolve_link"
172+
assert all(strict is True for strict in seen_strict), (
173+
f"Deferred resolution must call resolve_link(strict=True) to avoid silent "
174+
f"fuzzy matching. Observed strict values: {seen_strict!r}"
175+
)
176+
177+
# Sanity check: the unresolvable relation stayed unresolved — no silent
178+
# fuzzy match polluted it.
179+
source = await entity_service.get_by_permalink(f"{project_prefix}/source")
180+
assert source.relations[0].to_id is None
181+
assert source.relations[0].to_name == "never-resolves-target"
182+
183+
111184
@pytest.mark.asyncio
112185
async def test_resolve_relations_deletes_duplicate_unresolved_relation(
113186
sync_service: SyncService,

0 commit comments

Comments
 (0)