Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_LLM_TEMPERATURE_RETAIN = "HINDSIGHT_API_LLM_TEMPERATURE_RETAIN"
ENV_LLM_TEMPERATURE_REFLECT = "HINDSIGHT_API_LLM_TEMPERATURE_REFLECT"
ENV_LLM_TEMPERATURE_CONSOLIDATION = "HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION"
ENV_LLM_TEMPERATURE_BANK_MISSION = "HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION"

# Multi-LLM strategy. Extra LLMs are configured by index alongside the unindexed
# primary (e.g. HINDSIGHT_API_LLM_1_PROVIDER, HINDSIGHT_API_LLM_2_PROVIDER, ...),
Expand All @@ -184,6 +185,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
DEFAULT_LLM_TEMPERATURE_RETAIN = 0.1 # fact extraction
DEFAULT_LLM_TEMPERATURE_REFLECT = 0.9 # reflect "thinking"
DEFAULT_LLM_TEMPERATURE_CONSOLIDATION = 0.0 # mental-model delta / dedup
DEFAULT_LLM_TEMPERATURE_BANK_MISSION = 0.3 # bank/folder mission merge

# Defaults for service tiers
DEFAULT_LLM_GROQ_SERVICE_TIER = "auto" # "on_demand", "flex", or "auto"
Expand Down Expand Up @@ -1586,6 +1588,7 @@ class HindsightConfig:
llm_temperature_retain: float | None
llm_temperature_reflect: float | None
llm_temperature_consolidation: float | None
llm_temperature_bank_mission: float | None

# LiteLLM Router chain (provider-specific; consumed by the "litellmrouter" provider).
# List of deployment dicts evaluated in order with fallback on transient errors.
Expand Down Expand Up @@ -2333,6 +2336,9 @@ def from_env(cls) -> "HindsightConfig":
llm_temperature_consolidation=_resolve_operation_temperature(
ENV_LLM_TEMPERATURE_CONSOLIDATION, DEFAULT_LLM_TEMPERATURE_CONSOLIDATION
),
llm_temperature_bank_mission=_resolve_operation_temperature(
ENV_LLM_TEMPERATURE_BANK_MISSION, DEFAULT_LLM_TEMPERATURE_BANK_MISSION
),
llm_litellmrouter_config=_parse_llm_router_config(ENV_LLM_LITELLMROUTER_CONFIG),
# Vertex AI
llm_vertexai_project_id=os.getenv(ENV_LLM_VERTEXAI_PROJECT_ID) or DEFAULT_LLM_VERTEXAI_PROJECT_ID,
Expand Down
5 changes: 4 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/retain/bank_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,10 @@ async def _llm_merge_mission(llm_config, current: str, new_info: str) -> dict:
messages = [{"role": "user", "content": prompt}]

content = await llm_config.call(
messages=messages, scope="bank_mission", temperature=0.3, max_completion_tokens=8192
messages=messages,
scope="bank_mission",
temperature=get_config().llm_temperature_bank_mission,
max_completion_tokens=8192,
)

logger.info(f"LLM response for mission merge (first 500 chars): {content[:500]}")
Expand Down
1 change: 1 addition & 0 deletions hindsight-api-slim/tests/test_llm_temperature_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"HINDSIGHT_API_LLM_TEMPERATURE_RETAIN": ("llm_temperature_retain", 0.1),
"HINDSIGHT_API_LLM_TEMPERATURE_REFLECT": ("llm_temperature_reflect", 0.9),
"HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION": ("llm_temperature_consolidation", 0.0),
"HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION": ("llm_temperature_bank_mission", 0.3),
}


Expand Down
39 changes: 39 additions & 0 deletions hindsight-api-slim/tests/test_llm_temperature_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pytest

from hindsight_api.config import clear_config_cache
from hindsight_api.engine.retain import bank_utils
from hindsight_api.engine.search import think_utils


Expand Down Expand Up @@ -97,3 +98,41 @@ async def test_global_none_omits_temperature_on_real_call(memory, monkeypatch):
finally:
# Restore the cached config so later tests see default temperatures.
clear_config_cache()


@pytest.mark.asyncio
async def test_bank_mission_merge_forwards_configured_temperature(memory):
"""Bank/folder mission merge must call the LLM with the bank_mission temperature (0.3 default)."""
# The engine routes mission merging through the reflect provider (memory_engine
# passes self._reflect_llm_config to bank_utils.merge_bank_mission).
reflect_config = memory._reflect_llm_config
reflect_config._provider_impl.clear_mock_calls()

await bank_utils._llm_merge_mission(
reflect_config, "I help the team with ops.", "I now also handle billing questions."
)

merge_calls = [c for c in reflect_config._provider_impl.get_mock_calls() if c["scope"] == "bank_mission"]
assert merge_calls, "mission merge should have made a bank_mission LLM call"
assert all(c["temperature"] == 0.3 for c in merge_calls)


@pytest.mark.asyncio
async def test_bank_mission_global_none_omits_temperature(memory, monkeypatch):
"""HINDSIGHT_API_LLM_TEMPERATURE=none must omit (None) the temperature on mission merge too."""
monkeypatch.setenv("HINDSIGHT_API_LLM_TEMPERATURE", "none")
clear_config_cache()
try:
reflect_config = memory._reflect_llm_config
reflect_config._provider_impl.clear_mock_calls()

await bank_utils._llm_merge_mission(
reflect_config, "I help the team with ops.", "I now also handle billing questions."
)

merge_calls = [c for c in reflect_config._provider_impl.get_mock_calls() if c["scope"] == "bank_mission"]
assert merge_calls, "mission merge should have made a bank_mission LLM call"
assert all(c["temperature"] is None for c in merge_calls), "temperature should be omitted"
finally:
# Restore the cached config so later tests see default temperatures.
clear_config_cache()
1 change: 1 addition & 0 deletions hindsight-docs/docs/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ For non-English banks (especially CJK) and the language/extraction-language trad
| `HINDSIGHT_API_LLM_TEMPERATURE_RETAIN` | Temperature for fact extraction during retain. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.1` |
| `HINDSIGHT_API_LLM_TEMPERATURE_REFLECT` | Temperature for the reflect "thinking" step. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.9` |
| `HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION` | Temperature for consolidation (mental-model delta and dedup). Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.0` |
| `HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION` | Temperature for merging bank/folder mission statements. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.3` |
| `HINDSIGHT_API_LLM_SEND_BANK_AS_USER` | Tag outbound LLM and embedding calls with `user=<bank_id>` so gateways (OpenRouter usage accounting, LiteLLM, Helicone) can attribute spend per bank. When enabled, the bank id is transmitted to the upstream provider as the end-user identifier. | `false` |
| `HINDSIGHT_API_LLM_GROQ_SERVICE_TIER` | Groq service tier: `on_demand`, `flex`, `auto` | `auto` |
| `HINDSIGHT_API_LLM_OPENAI_SERVICE_TIER` | OpenAI service tier: `flex` for 50% cost savings (OpenAI Flex Processing) | None (default) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ For non-English banks (especially CJK) and the language/extraction-language trad
| `HINDSIGHT_API_LLM_TEMPERATURE_RETAIN` | Temperature for fact extraction during retain. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.1` |
| `HINDSIGHT_API_LLM_TEMPERATURE_REFLECT` | Temperature for the reflect "thinking" step. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.9` |
| `HINDSIGHT_API_LLM_TEMPERATURE_CONSOLIDATION` | Temperature for consolidation (mental-model delta and dedup). Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.0` |
| `HINDSIGHT_API_LLM_TEMPERATURE_BANK_MISSION` | Temperature for merging bank/folder mission statements. Number in `[0.0, 2.0]` or `none` to omit. Overrides `HINDSIGHT_API_LLM_TEMPERATURE`. | `0.3` |
| `HINDSIGHT_API_LLM_SEND_BANK_AS_USER` | Tag outbound LLM and embedding calls with `user=<bank_id>` so gateways (OpenRouter usage accounting, LiteLLM, Helicone) can attribute spend per bank. When enabled, the bank id is transmitted to the upstream provider as the end-user identifier. | `false` |
| `HINDSIGHT_API_LLM_GROQ_SERVICE_TIER` | Groq service tier: `on_demand`, `flex`, `auto` | `auto` |
| `HINDSIGHT_API_LLM_OPENAI_SERVICE_TIER` | OpenAI service tier: `flex` for 50% cost savings (OpenAI Flex Processing) | None (default) |
Expand Down