diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py index c357f4116..f65540b95 100644 --- a/hindsight-api-slim/hindsight_api/api/http.py +++ b/hindsight-api-slim/hindsight_api/api/http.py @@ -226,6 +226,7 @@ class RecallResult(BaseModel): source_fact_ids: list[str] | None = ( None # IDs of source facts (observation type only, when source_facts is enabled) ) + similarity: float | None = None # Cosine similarity to the query (semantic recall only; None otherwise) class EntityObservationResponse(BaseModel): @@ -3233,6 +3234,7 @@ def _fact_to_result(fact: "MemoryFact") -> RecallResult: chunk_id=fact.chunk_id, tags=fact.tags, source_fact_ids=fact.source_fact_ids, + similarity=fact.similarity, ) recall_results = [_fact_to_result(fact) for fact in core_result.results] diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py index cd81e5ab8..4d7cac7ee 100644 --- a/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py +++ b/hindsight-api-slim/hindsight_api/engine/consolidation/consolidator.py @@ -1191,7 +1191,18 @@ def _build_observations_for_llm( observations: "list[MemoryFact]", source_facts: "dict[str, MemoryFact]", ) -> list[dict[str, Any]]: - """Serialize MemoryFact observations into dicts for the consolidation LLM prompt.""" + """Serialize MemoryFact observations into dicts for the consolidation LLM prompt. + + Observations are ordered by semantic similarity descending so the strongest + merge candidates appear first — token-attention bias in the LLM favours + leading items, which nudges the model toward UPDATE on the closest + existing observation instead of CREATE. + """ + observations = sorted( + observations, + key=lambda o: o.similarity if o.similarity is not None else 0.0, + reverse=True, + ) obs_list = [] for obs in observations: obs_data: dict[str, Any] = { @@ -1199,6 +1210,8 @@ def _build_observations_for_llm( "text": obs.text, "proof_count": len(obs.source_fact_ids or []) or 1, } + if obs.similarity is not None: + obs_data["similarity"] = round(obs.similarity, 3) if obs.occurred_start: obs_data["occurred_start"] = obs.occurred_start if obs.occurred_end: diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py b/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py index 342402589..437f675fa 100644 --- a/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py @@ -36,6 +36,11 @@ - id: unique identifier for updating - text: the observation content - proof_count: number of supporting memories +- similarity (when present): cosine similarity (0–1) of this observation to + the embedding used to recall it. Higher = more semantically related to the + facts being consolidated. Observations with similarity ≥ 0.85 are very + likely the SAME facet — strongly prefer UPDATE. ≥ 0.95 should almost + always UPDATE unless the new fact is structurally distinct. - occurred_start/occurred_end: temporal range of source facts - source_memories: array of supporting facts with their text and dates diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 4f633e91b..88b98219d 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -3674,6 +3674,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact: chunk_id=result_dict.get("chunk_id"), tags=result_dict.get("tags"), source_fact_ids=source_fact_ids_by_obs.get(result_id) if include_source_facts else None, + similarity=result_dict.get("semantic_similarity"), ) ) diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py index bfd7a69c4..73167b632 100644 --- a/hindsight-api-slim/hindsight_api/engine/response_models.py +++ b/hindsight-api-slim/hindsight_api/engine/response_models.py @@ -179,6 +179,14 @@ def parse_metadata(cls, v: Any) -> dict[str, str] | None: None, description="IDs of source facts this observation was derived from (observation type only, when source_facts is enabled)", ) + similarity: float | None = Field( + None, + description=( + "Cosine similarity (0–1) to the query embedding when this fact was " + "surfaced via semantic recall. None when the fact did not arrive " + "through a semantic retrieval path." + ), + ) class ChunkInfo(BaseModel): diff --git a/hindsight-api-slim/tests/test_consolidation.py b/hindsight-api-slim/tests/test_consolidation.py index 5a4c99f11..f07cf53c7 100644 --- a/hindsight-api-slim/tests/test_consolidation.py +++ b/hindsight-api-slim/tests/test_consolidation.py @@ -1939,6 +1939,45 @@ def test_consolidation_prompt_observations_mission(): assert spec in rendered +def test_consolidation_prompt_explains_similarity(): + """The prompt documents the new `similarity` field so the LLM can rely on it.""" + from hindsight_api.engine.consolidation.prompts import build_batch_consolidation_prompt + + prompt = build_batch_consolidation_prompt() + assert "similarity" in prompt + # Concrete thresholds the LLM is told to act on must be in the prompt; + # if these constants change, the test should change deliberately. + assert "0.85" in prompt + assert "0.95" in prompt + + +def test_build_observations_for_llm_emits_similarity_and_sorts(): + """_build_observations_for_llm copies MemoryFact.similarity through and orders by it desc. + + Sort order matters: the LLM's token-attention bias favours leading items, + so the strongest merge candidate must come first to nudge UPDATE over CREATE. + """ + from hindsight_api.engine.consolidation.consolidator import _build_observations_for_llm + from hindsight_api.engine.response_models import MemoryFact + + obs_low = MemoryFact(id="o-low", text="Unrelated.", fact_type="observation", similarity=0.31) + obs_high = MemoryFact(id="o-high", text="Near-duplicate.", fact_type="observation", similarity=0.972) + obs_none = MemoryFact(id="o-none", text="No similarity attached.", fact_type="observation") + obs_mid = MemoryFact(id="o-mid", text="Related.", fact_type="observation", similarity=0.65) + + # Input order is deliberately scrambled and includes a None. + result = _build_observations_for_llm([obs_low, obs_high, obs_none, obs_mid], {}) + + ids_in_order = [r["id"] for r in result] + assert ids_in_order == ["o-high", "o-mid", "o-low", "o-none"] + + assert result[0]["similarity"] == 0.972 + assert result[1]["similarity"] == 0.65 + assert result[2]["similarity"] == 0.31 + # Absent score must not surface as 0 — that would falsely tell the LLM the obs is unrelated. + assert "similarity" not in result[3] + + def test_observations_mission_config(): """Test that observations_mission is loaded from env and exposed as configurable.""" import os