diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index 42bccd64c..280920779 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -160,6 +160,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: ENV_LLM_TEMPERATURE_RETAIN = "HINDSIGHT_API_LLM_TEMPERATURE_RETAIN" ENV_LLM_TEMPERATURE_REFLECT = "HINDSIGHT_API_LLM_TEMPERATURE_REFLECT" ENV_LLM_TEMPERATURE_CONSOLIDATION = "HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION" +ENV_LLM_TEMPERATURE_BANK_MISSION = "HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION" # Multi-LLM strategy. Extra LLMs are configured by index alongside the unindexed # primary (e.g. HINDSIGHT_API_LLM_1_PROVIDER, HINDSIGHT_API_LLM_2_PROVIDER, ...), @@ -184,6 +185,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: DEFAULT_LLM_TEMPERATURE_RETAIN = 0.1 # fact extraction DEFAULT_LLM_TEMPERATURE_REFLECT = 0.9 # reflect "thinking" DEFAULT_LLM_TEMPERATURE_CONSOLIDATION = 0.0 # mental-model delta / dedup +DEFAULT_LLM_TEMPERATURE_BANK_MISSION = 0.3 # bank/folder mission merge # Defaults for service tiers DEFAULT_LLM_GROQ_SERVICE_TIER = "auto" # "on_demand", "flex", or "auto" @@ -1586,6 +1588,7 @@ class HindsightConfig: llm_temperature_retain: float | None llm_temperature_reflect: float | None llm_temperature_consolidation: float | None + llm_temperature_bank_mission: float | None # LiteLLM Router chain (provider-specific; consumed by the "litellmrouter" provider). # List of deployment dicts evaluated in order with fallback on transient errors. @@ -2333,6 +2336,9 @@ def from_env(cls) -> "HindsightConfig": llm_temperature_consolidation=_resolve_operation_temperature( ENV_LLM_TEMPERATURE_CONSOLIDATION, DEFAULT_LLM_TEMPERATURE_CONSOLIDATION ), + llm_temperature_bank_mission=_resolve_operation_temperature( + ENV_LLM_TEMPERATURE_BANK_MISSION, DEFAULT_LLM_TEMPERATURE_BANK_MISSION + ), llm_litellmrouter_config=_parse_llm_router_config(ENV_LLM_LITELLMROUTER_CONFIG), # Vertex AI llm_vertexai_project_id=os.getenv(ENV_LLM_VERTEXAI_PROJECT_ID) or DEFAULT_LLM_VERTEXAI_PROJECT_ID, diff --git a/hindsight-api-slim/hindsight_api/engine/retain/bank_utils.py b/hindsight-api-slim/hindsight_api/engine/retain/bank_utils.py index b980e80eb..a587ae711 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/bank_utils.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/bank_utils.py @@ -380,7 +380,10 @@ async def _llm_merge_mission(llm_config, current: str, new_info: str) -> dict: messages = [{"role": "user", "content": prompt}] content = await llm_config.call( - messages=messages, scope="bank_mission", temperature=0.3, max_completion_tokens=8192 + messages=messages, + scope="bank_mission", + temperature=get_config().llm_temperature_bank_mission, + max_completion_tokens=8192, ) logger.info(f"LLM response for mission merge (first 500 chars): {content[:500]}") diff --git a/hindsight-api-slim/tests/test_llm_temperature_env.py b/hindsight-api-slim/tests/test_llm_temperature_env.py index 8fd673739..e7a34cc74 100644 --- a/hindsight-api-slim/tests/test_llm_temperature_env.py +++ b/hindsight-api-slim/tests/test_llm_temperature_env.py @@ -14,6 +14,7 @@ "HINDSIGHT_API_LLM_TEMPERATURE_RETAIN": ("llm_temperature_retain", 0.1), "HINDSIGHT_API_LLM_TEMPERATURE_REFLECT": ("llm_temperature_reflect", 0.9), "HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION": ("llm_temperature_consolidation", 0.0), + "HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION": ("llm_temperature_bank_mission", 0.3), } diff --git a/hindsight-api-slim/tests/test_llm_temperature_pipeline.py b/hindsight-api-slim/tests/test_llm_temperature_pipeline.py index cdf4af9b2..5e7fa95bc 100644 --- a/hindsight-api-slim/tests/test_llm_temperature_pipeline.py +++ b/hindsight-api-slim/tests/test_llm_temperature_pipeline.py @@ -14,6 +14,7 @@ import pytest from hindsight_api.config import clear_config_cache +from hindsight_api.engine.retain import bank_utils from hindsight_api.engine.search import think_utils @@ -97,3 +98,41 @@ async def test_global_none_omits_temperature_on_real_call(memory, monkeypatch): finally: # Restore the cached config so later tests see default temperatures. clear_config_cache() + + +@pytest.mark.asyncio +async def test_bank_mission_merge_forwards_configured_temperature(memory): + """Bank/folder mission merge must call the LLM with the bank_mission temperature (0.3 default).""" + # The engine routes mission merging through the reflect provider (memory_engine + # passes self._reflect_llm_config to bank_utils.merge_bank_mission). + reflect_config = memory._reflect_llm_config + reflect_config._provider_impl.clear_mock_calls() + + await bank_utils._llm_merge_mission( + reflect_config, "I help the team with ops.", "I now also handle billing questions." + ) + + merge_calls = [c for c in reflect_config._provider_impl.get_mock_calls() if c["scope"] == "bank_mission"] + assert merge_calls, "mission merge should have made a bank_mission LLM call" + assert all(c["temperature"] == 0.3 for c in merge_calls) + + +@pytest.mark.asyncio +async def test_bank_mission_global_none_omits_temperature(memory, monkeypatch): + """HINDSIGHT_API_LLM_TEMPERATURE=none must omit (None) the temperature on mission merge too.""" + monkeypatch.setenv("HINDSIGHT_API_LLM_TEMPERATURE", "none") + clear_config_cache() + try: + reflect_config = memory._reflect_llm_config + reflect_config._provider_impl.clear_mock_calls() + + await bank_utils._llm_merge_mission( + reflect_config, "I help the team with ops.", "I now also handle billing questions." + ) + + merge_calls = [c for c in reflect_config._provider_impl.get_mock_calls() if c["scope"] == "bank_mission"] + assert merge_calls, "mission merge should have made a bank_mission LLM call" + assert all(c["temperature"] is None for c in merge_calls), "temperature should be omitted" + finally: + # Restore the cached config so later tests see default temperatures. + clear_config_cache() diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index 4e523639d..1dc30ed78 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -179,6 +179,7 @@ For non-English banks (especially CJK) and the language/extraction-language trad | `HINDSIGHT_API_LLM_TEMPERATURE_RETAIN` | Temperature for fact extraction during retain. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.1` | | `HINDSIGHT_API_LLM_TEMPERATURE_REFLECT` | Temperature for the reflect "thinking" step. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.9` | | `HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION` | Temperature for consolidation (mental-model delta and dedup). Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.0` | +| `HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION` | Temperature for merging bank/folder mission statements. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.3` | | `HINDSIGHT_API_LLM_SEND_BANK_AS_USER` | Tag outbound LLM and embedding calls with `user=` so gateways (OpenRouter usage accounting, LiteLLM, Helicone) can attribute spend per bank. When enabled, the bank id is transmitted to the upstream provider as the end-user identifier. | `false` | | `HINDSIGHT_API_LLM_GROQ_SERVICE_TIER` | Groq service tier: `on_demand`, `flex`, `auto` | `auto` | | `HINDSIGHT_API_LLM_OPENAI_SERVICE_TIER` | OpenAI service tier: `flex` for 50% cost savings (OpenAI Flex Processing) | None (default) | diff --git a/skills/hindsight-docs/references/developer/configuration.md b/skills/hindsight-docs/references/developer/configuration.md index ac6567066..489d59517 100644 --- a/skills/hindsight-docs/references/developer/configuration.md +++ b/skills/hindsight-docs/references/developer/configuration.md @@ -179,6 +179,7 @@ For non-English banks (especially CJK) and the language/extraction-language trad | `HINDSIGHT_API_LLM_TEMPERATURE_RETAIN` | Temperature for fact extraction during retain. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.1` | | `HINDSIGHT_API_LLM_TEMPERATURE_REFLECT` | Temperature for the reflect "thinking" step. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.9` | | `HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION` | Temperature for consolidation (mental-model delta and dedup). Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.0` | +| `HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION` | Temperature for merging bank/folder mission statements. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.3` | | `HINDSIGHT_API_LLM_SEND_BANK_AS_USER` | Tag outbound LLM and embedding calls with `user=` so gateways (OpenRouter usage accounting, LiteLLM, Helicone) can attribute spend per bank. When enabled, the bank id is transmitted to the upstream provider as the end-user identifier. | `false` | | `HINDSIGHT_API_LLM_GROQ_SERVICE_TIER` | Groq service tier: `on_demand`, `flex`, `auto` | `auto` | | `HINDSIGHT_API_LLM_OPENAI_SERVICE_TIER` | OpenAI service tier: `flex` for 50% cost savings (OpenAI Flex Processing) | None (default) |