vectorize-io · xdonu2x · May 13, 2026 · May 13, 2026
diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
@@ -226,6 +226,7 @@ class RecallResult(BaseModel):
     source_fact_ids: list[str] | None = (
         None  # IDs of source facts (observation type only, when source_facts is enabled)
     )
+    similarity: float | None = None  # Cosine similarity to the query (semantic recall only; None otherwise)
 
 
 class EntityObservationResponse(BaseModel):
@@ -3233,6 +3234,7 @@ def _fact_to_result(fact: "MemoryFact") -> RecallResult:
                     chunk_id=fact.chunk_id,
                     tags=fact.tags,
                     source_fact_ids=fact.source_fact_ids,
+                    similarity=fact.similarity,
                 )
 
             recall_results = [_fact_to_result(fact) for fact in core_result.results]

diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py
@@ -1191,14 +1191,27 @@ def _build_observations_for_llm(
     observations: "list[MemoryFact]",
     source_facts: "dict[str, MemoryFact]",
 ) -> list[dict[str, Any]]:
-    """Serialize MemoryFact observations into dicts for the consolidation LLM prompt."""
+    """Serialize MemoryFact observations into dicts for the consolidation LLM prompt.
+
+    Observations are ordered by semantic similarity descending so the strongest
+    merge candidates appear first — token-attention bias in the LLM favours
+    leading items, which nudges the model toward UPDATE on the closest
+    existing observation instead of CREATE.
+    """
+    observations = sorted(
+        observations,
+        key=lambda o: o.similarity if o.similarity is not None else 0.0,
+        reverse=True,
+    )
     obs_list = []
     for obs in observations:
         obs_data: dict[str, Any] = {
             "id": obs.id,
             "text": obs.text,
             "proof_count": len(obs.source_fact_ids or []) or 1,
         }
+        if obs.similarity is not None:
+            obs_data["similarity"] = round(obs.similarity, 3)
         if obs.occurred_start:
             obs_data["occurred_start"] = obs.occurred_start
         if obs.occurred_end:

diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py b/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py
@@ -36,6 +36,11 @@
 - id: unique identifier for updating
 - text: the observation content
 - proof_count: number of supporting memories
+- similarity (when present): cosine similarity (0–1) of this observation to
+  the embedding used to recall it. Higher = more semantically related to the
+  facts being consolidated. Observations with similarity ≥ 0.85 are very
+  likely the SAME facet — strongly prefer UPDATE. ≥ 0.95 should almost
+  always UPDATE unless the new fact is structurally distinct.
 - occurred_start/occurred_end: temporal range of source facts
 - source_memories: array of supporting facts with their text and dates
 

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -3674,6 +3674,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact:
                         chunk_id=result_dict.get("chunk_id"),
                         tags=result_dict.get("tags"),
                         source_fact_ids=source_fact_ids_by_obs.get(result_id) if include_source_facts else None,
+                        similarity=result_dict.get("semantic_similarity"),
                     )
                 )
 

diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py
@@ -179,6 +179,14 @@ def parse_metadata(cls, v: Any) -> dict[str, str] | None:
         None,
         description="IDs of source facts this observation was derived from (observation type only, when source_facts is enabled)",
     )
+    similarity: float | None = Field(
+        None,
+        description=(
+            "Cosine similarity (0–1) to the query embedding when this fact was "
+            "surfaced via semantic recall. None when the fact did not arrive "
+            "through a semantic retrieval path."
+        ),
+    )
 
 
 class ChunkInfo(BaseModel):

diff --git a/hindsight-api-slim/tests/test_consolidation.py b/hindsight-api-slim/tests/test_consolidation.py
@@ -1939,6 +1939,45 @@ def test_consolidation_prompt_observations_mission():
     assert spec in rendered
 
 
+def test_consolidation_prompt_explains_similarity():
+    """The prompt documents the new `similarity` field so the LLM can rely on it."""
+    from hindsight_api.engine.consolidation.prompts import build_batch_consolidation_prompt
+
+    prompt = build_batch_consolidation_prompt()
+    assert "similarity" in prompt
+    # Concrete thresholds the LLM is told to act on must be in the prompt;
+    # if these constants change, the test should change deliberately.
+    assert "0.85" in prompt
+    assert "0.95" in prompt
+
+
+def test_build_observations_for_llm_emits_similarity_and_sorts():
+    """_build_observations_for_llm copies MemoryFact.similarity through and orders by it desc.
+
+    Sort order matters: the LLM's token-attention bias favours leading items,
+    so the strongest merge candidate must come first to nudge UPDATE over CREATE.
+    """
+    from hindsight_api.engine.consolidation.consolidator import _build_observations_for_llm
+    from hindsight_api.engine.response_models import MemoryFact
+
+    obs_low = MemoryFact(id="o-low", text="Unrelated.", fact_type="observation", similarity=0.31)
+    obs_high = MemoryFact(id="o-high", text="Near-duplicate.", fact_type="observation", similarity=0.972)
+    obs_none = MemoryFact(id="o-none", text="No similarity attached.", fact_type="observation")
+    obs_mid = MemoryFact(id="o-mid", text="Related.", fact_type="observation", similarity=0.65)
+
+    # Input order is deliberately scrambled and includes a None.
+    result = _build_observations_for_llm([obs_low, obs_high, obs_none, obs_mid], {})
+
+    ids_in_order = [r["id"] for r in result]
+    assert ids_in_order == ["o-high", "o-mid", "o-low", "o-none"]
+
+    assert result[0]["similarity"] == 0.972
+    assert result[1]["similarity"] == 0.65
+    assert result[2]["similarity"] == 0.31
+    # Absent score must not surface as 0 — that would falsely tell the LLM the obs is unrelated.
+    assert "similarity" not in result[3]
+
+
 def test_observations_mission_config():
     """Test that observations_mission is loaded from env and exposed as configurable."""
     import os