vectorize-io
diff --git a/‎hindsight-api-slim/hindsight_api/engine/memory_engine.py‎
Lines changed: 74 additions & 1 deletion b/‎hindsight-api-slim/hindsight_api/engine/memory_engine.py‎
Lines changed: 74 additions & 1 deletion
diff --git a/‎hindsight-api-slim/hindsight_api/engine/reflect/prompts.py‎
Lines changed: 51 additions & 28 deletions b/‎hindsight-api-slim/hindsight_api/engine/reflect/prompts.py‎
Lines changed: 51 additions & 28 deletions
diff --git a/‎hindsight-api-slim/hindsight_api/engine/reflect/tools.py‎
Lines changed: 8 additions & 0 deletions b/‎hindsight-api-slim/hindsight_api/engine/reflect/tools.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py‎
Lines changed: 3 additions & 0 deletions b/‎hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py‎
Lines changed: 3 additions & 0 deletions
@@ -2514,6 +2514,8 @@ async def recall_async(
         tags: list[str] | None = None,
         tags_match: TagsMatch = "any",
         tag_groups: list[TagGroup] | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
         _connection_budget: int | None = None,
         _quiet: bool = False,
     ) -> RecallResultModel:
@@ -2659,6 +2661,8 @@ async def recall_async(
                             tags=tags,
                             tags_match=tags_match,
                             tag_groups=tag_groups,
+                            created_after=created_after,
+                            created_before=created_before,
                             connection_budget=_connection_budget,
                             quiet=_quiet,
                             include_source_facts=include_source_facts,
@@ -2787,6 +2791,8 @@ async def _search_with_retries(
         tags: list[str] | None = None,
         tags_match: TagsMatch = "any",
         tag_groups: list[TagGroup] | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
         connection_budget: int | None = None,
         quiet: bool = False,
         include_source_facts: bool = False,
@@ -2911,6 +2917,8 @@ async def _search_with_retries(
                         tags=tags,
                         tags_match=tags_match,
                         tag_groups=tag_groups,
+                        created_after=created_after,
+                        created_before=created_before,
                     )
                     parallel_duration = time.time() - parallel_start
             finally:
@@ -5509,6 +5517,8 @@ async def reflect_async(
         recall_include_chunks: bool | None = None,
         recall_max_tokens_override: int | None = None,
         recall_chunks_max_tokens_override: int | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
         _skip_span: bool = False,
     ) -> ReflectResult:
         """
@@ -5661,6 +5671,8 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An
                 last_consolidated_at=last_consolidated_at,
                 pending_consolidation=pending_consolidation,
                 source_facts_max_tokens=reflect_source_facts_max_tokens,
+                created_after=created_after,
+                created_before=created_before,
             )
 
         # Determine which tools to enable based on fact_types and exclude_mental_models
@@ -5688,6 +5700,8 @@ async def recall_fn(
                 max_chunk_tokens=max_chunk_tokens,
                 fact_types=recall_fact_types if fact_types is not None else None,
                 include_chunks=effective_recall_include_chunks,
+                created_after=created_after,
+                created_before=created_before,
             )
 
         async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
@@ -7077,9 +7091,26 @@ async def refresh_mental_model(
 
             # Run reflect with the source query, excluding the mental model being refreshed
             # Skip creating a nested "hindsight.reflect" span since we already have "hindsight.mental_model_refresh"
+            # Build context to guide the reflect agent: tell it what this mental
+            # model is about so it stays on-topic and produces high-quality content.
+            mm_name = mental_model.get("name") or mental_model_id
+            refresh_context = (
+                f'You are writing a document called "{mm_name}". '
+                f"ONLY include content that directly answers the topic query. "
+                f"Discard observations that are tangential or off-topic — retrieval may return "
+                f"loosely related content that does not belong in this document.\n\n"
+                f"Quality guidelines:\n"
+                f"- Preserve concrete examples, before/after pairs, and sample sentences "
+                f"from the observations. These teach more than abstract rules.\n"
+                f"- If observations contain illustrative examples (e.g. ✅/❌ pairs, "
+                f"rewrites, sample phrases), include them in your answer.\n"
+                f"- Structure the document around the topic, not around the sources."
+            )
+
             reflect_kwargs: dict[str, Any] = dict(
                 bank_id=bank_id,
                 query=mental_model["source_query"],
+                context=refresh_context,
                 request_context=request_context,
                 tags=tag_filtering.tags,
                 tags_match=tag_filtering.tags_match,
@@ -7097,6 +7128,17 @@ async def refresh_mental_model(
             stored_max_tokens = mental_model.get("max_tokens")
             if stored_max_tokens is not None:
                 reflect_kwargs["max_tokens"] = stored_max_tokens
+
+            # Delta mode: scope recall to memories created since the last refresh
+            # so the agentic loop only retrieves genuinely new information.
+            if use_delta:
+                last_refreshed_at_raw = mental_model.get("last_refreshed_at")
+                if last_refreshed_at_raw is not None:
+                    if isinstance(last_refreshed_at_raw, str):
+                        reflect_kwargs["created_after"] = datetime.fromisoformat(last_refreshed_at_raw)
+                    else:
+                        reflect_kwargs["created_after"] = last_refreshed_at_raw
+
             reflect_result = await self.reflect_async(**reflect_kwargs)
 
             # Build reflect_response payload to store
@@ -7127,6 +7169,20 @@ async def refresh_mental_model(
                         )
                 based_on_serialized_payload[fact_type] = serialized_facts
 
+            # In delta mode, based_on must accumulate: the mental model is
+            # grounded on ALL facts ever used, not just the latest delta's new
+            # ones. Merge previous based_on with current, deduplicating by id.
+            if use_delta:
+                prev_rr = mental_model.get("reflect_response") or {}
+                prev_based_on = prev_rr.get("based_on") or {}
+                for ftype, prev_facts in prev_based_on.items():
+                    if not isinstance(prev_facts, list):
+                        continue
+                    new_ids = {f["id"] for f in based_on_serialized_payload.get(ftype, [])}
+                    carried = [f for f in prev_facts if isinstance(f, dict) and f.get("id") not in new_ids]
+                    if carried:
+                        based_on_serialized_payload.setdefault(ftype, []).extend(carried)
+
             reflect_response_payload = {
                 "text": reflect_result.text,
                 "based_on": based_on_serialized_payload,
@@ -7179,6 +7235,23 @@ async def refresh_mental_model(
                     for _ftype, facts in based_on_serialized_payload.items():
                         supporting_facts.extend(facts)
 
+                    # No new facts since last refresh — skip the delta LLM call
+                    # and preserve existing content unchanged.
+                    if not supporting_facts:
+                        logger.info(
+                            f"[MENTAL_MODELS] Delta refresh for {mental_model_id}: "
+                            "no new facts found, preserving content"
+                        )
+                        reflect_response_payload["delta_applied"] = False
+                        reflect_response_payload["delta_skipped_reason"] = "no_new_facts"
+                        return await self.update_mental_model(
+                            bank_id,
+                            mental_model_id,
+                            reflect_response=reflect_response_payload,
+                            last_refreshed_source_query=current_source_query,
+                            request_context=request_context,
+                        )
+
                     # Op JSON is denser than the rendered markdown — each op
                     # carries the section_id, op type, and a full block payload
                     # whose ``text`` may quote the original passage. Budget 1.5×
@@ -7523,7 +7596,7 @@ def _get(key: str) -> Any:
             tags_match = "any"  # default: untagged MM is "global", tagged MM matches any overlap
 
         params: list[Any] = [bank_id, last_refreshed_at]
-        where = ["bank_id = $1", "created_at > $2"]
+        where = ["bank_id = $1", "updated_at > $2"]
 
         if mm_tags:
             operator, include_untagged = _parse_tags_match(tags_match)
 
@@ -525,41 +525,60 @@ def build_final_system_prompt(mission: str | None = None) -> str:
 FINAL_SYSTEM_PROMPT = build_final_system_prompt()
 
 
-STRUCTURED_DELTA_SYSTEM_PROMPT = """You are computing a *minimal patch* to a structured document.
+STRUCTURED_DELTA_SYSTEM_PROMPT = """You are integrating *new information* into an existing structured document.
 
 You will be given:
-1. CURRENT DOCUMENT (JSON) — the existing structured mental model. Each section
+1. TOPIC — the question this document answers. Content that does not help
+   answer this question is OFF-TOPIC and should be removed.
+2. CURRENT DOCUMENT (JSON) — the existing structured mental model. Each section
    has a stable ``id``, a ``heading``, a ``level`` (1..6), and an ordered list
    of ``blocks``. Blocks are typed: ``paragraph``, ``bullet_list``,
    ``ordered_list``, or ``code``.
-2. CANDIDATE SUMMARY (markdown) — a freshly generated synthesis of the latest
-   memories, useful only as a hint about *what new information exists*. You
-   MUST NOT copy its formatting or wording wholesale; it is not the target.
-3. SUPPORTING FACTS — the observations and facts the candidate is grounded in.
-   Treat these as the only source of new information.
+3. NEW INFORMATION SYNTHESIS (markdown) — a synthesis showing how the new facts
+   relate to the document's topic. Use it to understand context and relevance,
+   but do NOT copy its formatting or wording wholesale.
+4. SUPPORTING FACTS — observations and facts created since the last refresh.
+   These are genuinely new — they were NOT available when the current document
+   was written.
 
 Your task: output a JSON object ``{"operations": [...]}``. Applied to CURRENT
-DOCUMENT, the operations must produce the smallest possible change that
-reflects the new facts.
-
-ABSOLUTE RULES
-- If CURRENT DOCUMENT already covers all the supporting facts, output
-  exactly ``{"operations": []}``. An empty operation list IS the correct
-  answer when nothing new has come in. This is the most common case.
+DOCUMENT, the operations must produce a document that best answers the TOPIC
+by integrating the new facts.
+
+RULES
+- These facts are NEW since the last refresh. The existing document already
+  captures all prior information from earlier refreshes. Your job is to
+  integrate the new facts into the existing document.
+- **Preserve existing content**: The current document was built from prior facts
+  that you cannot see. Do NOT remove or replace existing sections just because
+  the new facts do not reference them. Only remove content when the new facts
+  explicitly contradict or supersede it.
+- **Merge overlapping topics**: When new facts cover topics that overlap with
+  existing sections, merge the new information INTO the existing section
+  rather than creating duplicates. When new facts provide more specific or
+  authoritative guidance on a topic already covered generically, update the
+  existing content to reflect the more specific guidance.
+- **Preserve examples**: Concrete examples, before/after pairs, sample sentences,
+  and illustrative ✅/❌ comparisons are MORE valuable than abstract rules.
+  When facts contain examples, include them. Never drop an example to make
+  room for an abstract restatement of the same point.
 - Operations target sections by ``section_id`` (use the ``id`` field of the
   section in CURRENT DOCUMENT, NOT the heading). Block operations target
   blocks by ``index`` (0-based, against the section's current block list).
-- Add new content with ``append_block``, ``insert_block``, or ``add_section``.
-  Prefer extending an existing section over creating a new one.
-- Modify existing content with ``replace_block`` or ``replace_section_blocks``
-  ONLY when the supporting facts contradict the current text. Do NOT rewrite
-  for style, brevity, or "improvement".
-- Remove stale content with ``remove_block`` or ``remove_section`` ONLY when
-  the supporting facts directly contradict it.
+- **Add** new content with ``append_block``, ``insert_block``, or ``add_section``
+  when facts introduce information not yet covered. Prefer extending an
+  existing section over creating a new one.
+- **Update** existing content with ``replace_block`` or ``replace_section_blocks``
+  when new facts provide corrections, updates, or more specific information
+  about topics already in the document.
+- **Remove** content with ``remove_block`` or ``remove_section`` ONLY when
+  the new facts explicitly contradict or supersede it.
 - NEVER emit operations whose only effect is to reword unchanged content.
 - NEVER emit operations to "normalize" formatting (numbered → bulleted, casing
   changes, paragraph → list, etc).
 - Every operation MUST be justifiable by a specific fact in SUPPORTING FACTS.
+- Output ``{"operations": []}`` only if the new facts are already reflected
+  in the document (e.g., from a concurrent update).
 
 ALLOWED OPERATIONS (each line shows the JSON shape)
 - ``{"op": "append_block", "section_id": "...", "block": {...}}``
@@ -587,7 +606,12 @@ def build_final_system_prompt(mission: str | None = None) -> str:
 - No changes needed → ``{"operations": []}``
 - Add one bullet to an existing "Members" section →
   ``{"operations": [{"op": "append_block", "section_id": "members",
-  "block": {"type": "bullet_list", "items": ["Carol — junior engineer"]}}]}``"""
+  "block": {"type": "bullet_list", "items": ["Carol — junior engineer"]}}]}``
+- Replace a paragraph that has been corrected by new facts →
+  ``{"operations": [{"op": "replace_block", "section_id": "overview",
+  "index": 0, "block": {"type": "paragraph", "text": "Updated summary."}}]}``
+- Remove an obsolete block →
+  ``{"operations": [{"op": "remove_block", "section_id": "status", "index": 2}]}``"""
 
 
 def build_structured_delta_prompt(
@@ -631,15 +655,14 @@ def build_structured_delta_prompt(
         f"## Topic\n{source_query}\n\n"
         f"## CURRENT DOCUMENT (apply ops to this; reference section ids as listed)\n"
         f"```json\n{current_document_json}\n```\n\n"
-        f"## CANDIDATE SUMMARY (hint only — do NOT copy wording wholesale)\n"
+        f"## NEW INFORMATION SYNTHESIS (context for how new facts relate to the topic)\n"
         f"```markdown\n{candidate_markdown}\n```\n\n"
-        f"## SUPPORTING FACTS (the only source of new information)\n{facts_block}"
+        f"## SUPPORTING FACTS (new since last refresh — integrate these)\n{facts_block}"
         f"{budget_hint}\n\n"
         "## Task\n"
-        "Output a JSON object matching the operations schema. Use an empty list "
-        "if no new fact requires a change. Otherwise, emit the smallest set of "
-        "operations that reflects the new facts in CURRENT DOCUMENT, preserving "
-        "all unchanged sections and blocks by simply not mentioning them."
+        "Output a JSON object matching the operations schema. Integrate the new "
+        "supporting facts into CURRENT DOCUMENT. Add, update, or remove content "
+        "as needed. Preserve unchanged sections and blocks by not mentioning them."
     )
 
 
 
@@ -135,6 +135,8 @@ async def tool_search_observations(
     last_consolidated_at: datetime | None = None,
     pending_consolidation: int = 0,
     source_facts_max_tokens: int = -1,
+    created_after: datetime | None = None,
+    created_before: datetime | None = None,
 ) -> dict[str, Any]:
     """
     Search consolidated observations using recall.
@@ -178,6 +180,8 @@ async def tool_search_observations(
         tags_match=tags_match,
         tag_groups=tag_groups,
         include_source_facts=include_source_facts,
+        created_after=created_after,
+        created_before=created_before,
         _connection_budget=1,
         _quiet=True,
         **recall_kwargs,
@@ -214,6 +218,8 @@ async def tool_recall(
     max_chunk_tokens: int = 1000,
     fact_types: list[str] | None = None,
     include_chunks: bool = True,
+    created_after: datetime | None = None,
+    created_before: datetime | None = None,
 ) -> dict[str, Any]:
     """
     Search memories using TEMPR retrieval.
@@ -250,6 +256,8 @@ async def tool_recall(
         tags=tags,
         tags_match=tags_match,
         tag_groups=tag_groups,
+        created_after=created_after,
+        created_before=created_before,
         _connection_budget=connection_budget,
         _quiet=True,  # Suppress logging for internal operations
         include_chunks=include_chunks,
 
@@ -8,6 +8,7 @@
 
 import logging
 from abc import ABC, abstractmethod
+from datetime import datetime
 
 from .tags import TagGroup, TagsMatch
 from .types import GraphRetrievalTimings, RetrievalResult
@@ -45,6 +46,8 @@ async def retrieve(
         tags: list[str] | None = None,  # Visibility scope tags for filtering
         tags_match: TagsMatch = "any",  # How to match tags: 'any' (OR) or 'all' (AND)
         tag_groups: list[TagGroup] | None = None,  # Compound boolean tag filter groups
+        created_after: datetime | None = None,  # Only include memory_units created after this time
+        created_before: datetime | None = None,  # Only include memory_units created before this time
     ) -> tuple[list[RetrievalResult], GraphRetrievalTimings | None]:
         """
         Retrieve relevant facts via graph traversal.