Skip to content

Commit e90cfa4

Browse files
authored
fix(reflect): scope delta mental model recall to new memories only (#1192)
Delta mode mental model refresh was running a full recall across ALL memories (identical to full mode), then passing all facts to a second LLM call for delta ops. This caused content bloat, duplication, and made delta strictly more expensive than full mode. Changes: - Add created_after/created_before time range filter to the recall pipeline (retrieval.py, link_expansion_retrieval.py, graph_retrieval.py) threaded through recall_async -> reflect_async -> tool closures - Delta refresh passes last_refreshed_at as created_after so the agentic loop only retrieves memories created/updated since the last refresh (uses updated_at to catch consolidation updates) - Short-circuit delta when no new facts found (skip LLM call, preserve existing content) - Accumulate based_on across delta refreshes (merge previous + new, deduped by ID) - Pass context to reflect agent during MM refresh with document name, stay-on-topic guidance, and example preservation instructions - Rewrite delta prompt: preserve existing content from prior refreshes, merge overlapping topics, preserve concrete examples over abstract rules - Add recall time-range unit tests (8 tests) - Add integration test verifying delta fusion quality
1 parent 1078566 commit e90cfa4

9 files changed

Lines changed: 629 additions & 34 deletions

File tree

hindsight-api-slim/hindsight_api/engine/memory_engine.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2514,6 +2514,8 @@ async def recall_async(
25142514
tags: list[str] | None = None,
25152515
tags_match: TagsMatch = "any",
25162516
tag_groups: list[TagGroup] | None = None,
2517+
created_after: datetime | None = None,
2518+
created_before: datetime | None = None,
25172519
_connection_budget: int | None = None,
25182520
_quiet: bool = False,
25192521
) -> RecallResultModel:
@@ -2659,6 +2661,8 @@ async def recall_async(
26592661
tags=tags,
26602662
tags_match=tags_match,
26612663
tag_groups=tag_groups,
2664+
created_after=created_after,
2665+
created_before=created_before,
26622666
connection_budget=_connection_budget,
26632667
quiet=_quiet,
26642668
include_source_facts=include_source_facts,
@@ -2787,6 +2791,8 @@ async def _search_with_retries(
27872791
tags: list[str] | None = None,
27882792
tags_match: TagsMatch = "any",
27892793
tag_groups: list[TagGroup] | None = None,
2794+
created_after: datetime | None = None,
2795+
created_before: datetime | None = None,
27902796
connection_budget: int | None = None,
27912797
quiet: bool = False,
27922798
include_source_facts: bool = False,
@@ -2911,6 +2917,8 @@ async def _search_with_retries(
29112917
tags=tags,
29122918
tags_match=tags_match,
29132919
tag_groups=tag_groups,
2920+
created_after=created_after,
2921+
created_before=created_before,
29142922
)
29152923
parallel_duration = time.time() - parallel_start
29162924
finally:
@@ -5509,6 +5517,8 @@ async def reflect_async(
55095517
recall_include_chunks: bool | None = None,
55105518
recall_max_tokens_override: int | None = None,
55115519
recall_chunks_max_tokens_override: int | None = None,
5520+
created_after: datetime | None = None,
5521+
created_before: datetime | None = None,
55125522
_skip_span: bool = False,
55135523
) -> ReflectResult:
55145524
"""
@@ -5661,6 +5671,8 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An
56615671
last_consolidated_at=last_consolidated_at,
56625672
pending_consolidation=pending_consolidation,
56635673
source_facts_max_tokens=reflect_source_facts_max_tokens,
5674+
created_after=created_after,
5675+
created_before=created_before,
56645676
)
56655677

56665678
# Determine which tools to enable based on fact_types and exclude_mental_models
@@ -5688,6 +5700,8 @@ async def recall_fn(
56885700
max_chunk_tokens=max_chunk_tokens,
56895701
fact_types=recall_fact_types if fact_types is not None else None,
56905702
include_chunks=effective_recall_include_chunks,
5703+
created_after=created_after,
5704+
created_before=created_before,
56915705
)
56925706

56935707
async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
@@ -7077,9 +7091,26 @@ async def refresh_mental_model(
70777091

70787092
# Run reflect with the source query, excluding the mental model being refreshed
70797093
# Skip creating a nested "hindsight.reflect" span since we already have "hindsight.mental_model_refresh"
7094+
# Build context to guide the reflect agent: tell it what this mental
7095+
# model is about so it stays on-topic and produces high-quality content.
7096+
mm_name = mental_model.get("name") or mental_model_id
7097+
refresh_context = (
7098+
f'You are writing a document called "{mm_name}". '
7099+
f"ONLY include content that directly answers the topic query. "
7100+
f"Discard observations that are tangential or off-topic — retrieval may return "
7101+
f"loosely related content that does not belong in this document.\n\n"
7102+
f"Quality guidelines:\n"
7103+
f"- Preserve concrete examples, before/after pairs, and sample sentences "
7104+
f"from the observations. These teach more than abstract rules.\n"
7105+
f"- If observations contain illustrative examples (e.g. ✅/❌ pairs, "
7106+
f"rewrites, sample phrases), include them in your answer.\n"
7107+
f"- Structure the document around the topic, not around the sources."
7108+
)
7109+
70807110
reflect_kwargs: dict[str, Any] = dict(
70817111
bank_id=bank_id,
70827112
query=mental_model["source_query"],
7113+
context=refresh_context,
70837114
request_context=request_context,
70847115
tags=tag_filtering.tags,
70857116
tags_match=tag_filtering.tags_match,
@@ -7097,6 +7128,17 @@ async def refresh_mental_model(
70977128
stored_max_tokens = mental_model.get("max_tokens")
70987129
if stored_max_tokens is not None:
70997130
reflect_kwargs["max_tokens"] = stored_max_tokens
7131+
7132+
# Delta mode: scope recall to memories created since the last refresh
7133+
# so the agentic loop only retrieves genuinely new information.
7134+
if use_delta:
7135+
last_refreshed_at_raw = mental_model.get("last_refreshed_at")
7136+
if last_refreshed_at_raw is not None:
7137+
if isinstance(last_refreshed_at_raw, str):
7138+
reflect_kwargs["created_after"] = datetime.fromisoformat(last_refreshed_at_raw)
7139+
else:
7140+
reflect_kwargs["created_after"] = last_refreshed_at_raw
7141+
71007142
reflect_result = await self.reflect_async(**reflect_kwargs)
71017143

71027144
# Build reflect_response payload to store
@@ -7127,6 +7169,20 @@ async def refresh_mental_model(
71277169
)
71287170
based_on_serialized_payload[fact_type] = serialized_facts
71297171

7172+
# In delta mode, based_on must accumulate: the mental model is
7173+
# grounded on ALL facts ever used, not just the latest delta's new
7174+
# ones. Merge previous based_on with current, deduplicating by id.
7175+
if use_delta:
7176+
prev_rr = mental_model.get("reflect_response") or {}
7177+
prev_based_on = prev_rr.get("based_on") or {}
7178+
for ftype, prev_facts in prev_based_on.items():
7179+
if not isinstance(prev_facts, list):
7180+
continue
7181+
new_ids = {f["id"] for f in based_on_serialized_payload.get(ftype, [])}
7182+
carried = [f for f in prev_facts if isinstance(f, dict) and f.get("id") not in new_ids]
7183+
if carried:
7184+
based_on_serialized_payload.setdefault(ftype, []).extend(carried)
7185+
71307186
reflect_response_payload = {
71317187
"text": reflect_result.text,
71327188
"based_on": based_on_serialized_payload,
@@ -7179,6 +7235,23 @@ async def refresh_mental_model(
71797235
for _ftype, facts in based_on_serialized_payload.items():
71807236
supporting_facts.extend(facts)
71817237

7238+
# No new facts since last refresh — skip the delta LLM call
7239+
# and preserve existing content unchanged.
7240+
if not supporting_facts:
7241+
logger.info(
7242+
f"[MENTAL_MODELS] Delta refresh for {mental_model_id}: "
7243+
"no new facts found, preserving content"
7244+
)
7245+
reflect_response_payload["delta_applied"] = False
7246+
reflect_response_payload["delta_skipped_reason"] = "no_new_facts"
7247+
return await self.update_mental_model(
7248+
bank_id,
7249+
mental_model_id,
7250+
reflect_response=reflect_response_payload,
7251+
last_refreshed_source_query=current_source_query,
7252+
request_context=request_context,
7253+
)
7254+
71827255
# Op JSON is denser than the rendered markdown — each op
71837256
# carries the section_id, op type, and a full block payload
71847257
# whose ``text`` may quote the original passage. Budget 1.5×
@@ -7523,7 +7596,7 @@ def _get(key: str) -> Any:
75237596
tags_match = "any" # default: untagged MM is "global", tagged MM matches any overlap
75247597

75257598
params: list[Any] = [bank_id, last_refreshed_at]
7526-
where = ["bank_id = $1", "created_at > $2"]
7599+
where = ["bank_id = $1", "updated_at > $2"]
75277600

75287601
if mm_tags:
75297602
operator, include_untagged = _parse_tags_match(tags_match)

hindsight-api-slim/hindsight_api/engine/reflect/prompts.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -525,41 +525,60 @@ def build_final_system_prompt(mission: str | None = None) -> str:
525525
FINAL_SYSTEM_PROMPT = build_final_system_prompt()
526526

527527

528-
STRUCTURED_DELTA_SYSTEM_PROMPT = """You are computing a *minimal patch* to a structured document.
528+
STRUCTURED_DELTA_SYSTEM_PROMPT = """You are integrating *new information* into an existing structured document.
529529
530530
You will be given:
531-
1. CURRENT DOCUMENT (JSON) — the existing structured mental model. Each section
531+
1. TOPIC — the question this document answers. Content that does not help
532+
answer this question is OFF-TOPIC and should be removed.
533+
2. CURRENT DOCUMENT (JSON) — the existing structured mental model. Each section
532534
has a stable ``id``, a ``heading``, a ``level`` (1..6), and an ordered list
533535
of ``blocks``. Blocks are typed: ``paragraph``, ``bullet_list``,
534536
``ordered_list``, or ``code``.
535-
2. CANDIDATE SUMMARY (markdown) — a freshly generated synthesis of the latest
536-
memories, useful only as a hint about *what new information exists*. You
537-
MUST NOT copy its formatting or wording wholesale; it is not the target.
538-
3. SUPPORTING FACTS — the observations and facts the candidate is grounded in.
539-
Treat these as the only source of new information.
537+
3. NEW INFORMATION SYNTHESIS (markdown) — a synthesis showing how the new facts
538+
relate to the document's topic. Use it to understand context and relevance,
539+
but do NOT copy its formatting or wording wholesale.
540+
4. SUPPORTING FACTS — observations and facts created since the last refresh.
541+
These are genuinely new — they were NOT available when the current document
542+
was written.
540543
541544
Your task: output a JSON object ``{"operations": [...]}``. Applied to CURRENT
542-
DOCUMENT, the operations must produce the smallest possible change that
543-
reflects the new facts.
544-
545-
ABSOLUTE RULES
546-
- If CURRENT DOCUMENT already covers all the supporting facts, output
547-
exactly ``{"operations": []}``. An empty operation list IS the correct
548-
answer when nothing new has come in. This is the most common case.
545+
DOCUMENT, the operations must produce a document that best answers the TOPIC
546+
by integrating the new facts.
547+
548+
RULES
549+
- These facts are NEW since the last refresh. The existing document already
550+
captures all prior information from earlier refreshes. Your job is to
551+
integrate the new facts into the existing document.
552+
- **Preserve existing content**: The current document was built from prior facts
553+
that you cannot see. Do NOT remove or replace existing sections just because
554+
the new facts do not reference them. Only remove content when the new facts
555+
explicitly contradict or supersede it.
556+
- **Merge overlapping topics**: When new facts cover topics that overlap with
557+
existing sections, merge the new information INTO the existing section
558+
rather than creating duplicates. When new facts provide more specific or
559+
authoritative guidance on a topic already covered generically, update the
560+
existing content to reflect the more specific guidance.
561+
- **Preserve examples**: Concrete examples, before/after pairs, sample sentences,
562+
and illustrative ✅/❌ comparisons are MORE valuable than abstract rules.
563+
When facts contain examples, include them. Never drop an example to make
564+
room for an abstract restatement of the same point.
549565
- Operations target sections by ``section_id`` (use the ``id`` field of the
550566
section in CURRENT DOCUMENT, NOT the heading). Block operations target
551567
blocks by ``index`` (0-based, against the section's current block list).
552-
- Add new content with ``append_block``, ``insert_block``, or ``add_section``.
553-
Prefer extending an existing section over creating a new one.
554-
- Modify existing content with ``replace_block`` or ``replace_section_blocks``
555-
ONLY when the supporting facts contradict the current text. Do NOT rewrite
556-
for style, brevity, or "improvement".
557-
- Remove stale content with ``remove_block`` or ``remove_section`` ONLY when
558-
the supporting facts directly contradict it.
568+
- **Add** new content with ``append_block``, ``insert_block``, or ``add_section``
569+
when facts introduce information not yet covered. Prefer extending an
570+
existing section over creating a new one.
571+
- **Update** existing content with ``replace_block`` or ``replace_section_blocks``
572+
when new facts provide corrections, updates, or more specific information
573+
about topics already in the document.
574+
- **Remove** content with ``remove_block`` or ``remove_section`` ONLY when
575+
the new facts explicitly contradict or supersede it.
559576
- NEVER emit operations whose only effect is to reword unchanged content.
560577
- NEVER emit operations to "normalize" formatting (numbered → bulleted, casing
561578
changes, paragraph → list, etc).
562579
- Every operation MUST be justifiable by a specific fact in SUPPORTING FACTS.
580+
- Output ``{"operations": []}`` only if the new facts are already reflected
581+
in the document (e.g., from a concurrent update).
563582
564583
ALLOWED OPERATIONS (each line shows the JSON shape)
565584
- ``{"op": "append_block", "section_id": "...", "block": {...}}``
@@ -587,7 +606,12 @@ def build_final_system_prompt(mission: str | None = None) -> str:
587606
- No changes needed → ``{"operations": []}``
588607
- Add one bullet to an existing "Members" section →
589608
``{"operations": [{"op": "append_block", "section_id": "members",
590-
"block": {"type": "bullet_list", "items": ["Carol — junior engineer"]}}]}``"""
609+
"block": {"type": "bullet_list", "items": ["Carol — junior engineer"]}}]}``
610+
- Replace a paragraph that has been corrected by new facts →
611+
``{"operations": [{"op": "replace_block", "section_id": "overview",
612+
"index": 0, "block": {"type": "paragraph", "text": "Updated summary."}}]}``
613+
- Remove an obsolete block →
614+
``{"operations": [{"op": "remove_block", "section_id": "status", "index": 2}]}``"""
591615

592616

593617
def build_structured_delta_prompt(
@@ -631,15 +655,14 @@ def build_structured_delta_prompt(
631655
f"## Topic\n{source_query}\n\n"
632656
f"## CURRENT DOCUMENT (apply ops to this; reference section ids as listed)\n"
633657
f"```json\n{current_document_json}\n```\n\n"
634-
f"## CANDIDATE SUMMARY (hint only — do NOT copy wording wholesale)\n"
658+
f"## NEW INFORMATION SYNTHESIS (context for how new facts relate to the topic)\n"
635659
f"```markdown\n{candidate_markdown}\n```\n\n"
636-
f"## SUPPORTING FACTS (the only source of new information)\n{facts_block}"
660+
f"## SUPPORTING FACTS (new since last refresh — integrate these)\n{facts_block}"
637661
f"{budget_hint}\n\n"
638662
"## Task\n"
639-
"Output a JSON object matching the operations schema. Use an empty list "
640-
"if no new fact requires a change. Otherwise, emit the smallest set of "
641-
"operations that reflects the new facts in CURRENT DOCUMENT, preserving "
642-
"all unchanged sections and blocks by simply not mentioning them."
663+
"Output a JSON object matching the operations schema. Integrate the new "
664+
"supporting facts into CURRENT DOCUMENT. Add, update, or remove content "
665+
"as needed. Preserve unchanged sections and blocks by not mentioning them."
643666
)
644667

645668

hindsight-api-slim/hindsight_api/engine/reflect/tools.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ async def tool_search_observations(
135135
last_consolidated_at: datetime | None = None,
136136
pending_consolidation: int = 0,
137137
source_facts_max_tokens: int = -1,
138+
created_after: datetime | None = None,
139+
created_before: datetime | None = None,
138140
) -> dict[str, Any]:
139141
"""
140142
Search consolidated observations using recall.
@@ -178,6 +180,8 @@ async def tool_search_observations(
178180
tags_match=tags_match,
179181
tag_groups=tag_groups,
180182
include_source_facts=include_source_facts,
183+
created_after=created_after,
184+
created_before=created_before,
181185
_connection_budget=1,
182186
_quiet=True,
183187
**recall_kwargs,
@@ -214,6 +218,8 @@ async def tool_recall(
214218
max_chunk_tokens: int = 1000,
215219
fact_types: list[str] | None = None,
216220
include_chunks: bool = True,
221+
created_after: datetime | None = None,
222+
created_before: datetime | None = None,
217223
) -> dict[str, Any]:
218224
"""
219225
Search memories using TEMPR retrieval.
@@ -250,6 +256,8 @@ async def tool_recall(
250256
tags=tags,
251257
tags_match=tags_match,
252258
tag_groups=tag_groups,
259+
created_after=created_after,
260+
created_before=created_before,
253261
_connection_budget=connection_budget,
254262
_quiet=True, # Suppress logging for internal operations
255263
include_chunks=include_chunks,

hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import logging
1010
from abc import ABC, abstractmethod
11+
from datetime import datetime
1112

1213
from .tags import TagGroup, TagsMatch
1314
from .types import GraphRetrievalTimings, RetrievalResult
@@ -45,6 +46,8 @@ async def retrieve(
4546
tags: list[str] | None = None, # Visibility scope tags for filtering
4647
tags_match: TagsMatch = "any", # How to match tags: 'any' (OR) or 'all' (AND)
4748
tag_groups: list[TagGroup] | None = None, # Compound boolean tag filter groups
49+
created_after: datetime | None = None, # Only include memory_units created after this time
50+
created_before: datetime | None = None, # Only include memory_units created before this time
4851
) -> tuple[list[RetrievalResult], GraphRetrievalTimings | None]:
4952
"""
5053
Retrieve relevant facts via graph traversal.

0 commit comments

Comments
 (0)