Skip to content

Commit 146819b

Browse files
committed
fix(llm): thread per-scope LLM timeout into constructed providers
The retain/reflect/consolidation LLM timeouts were resolved into config but never passed to the provider, so they were silently ignored and every call fell back to the global HINDSIGHT_API_LLM_TIMEOUT / 120s default. Thread timeout through create_llm_provider and LLMProvider into the LiteLLM (and bedrock-alias) constructions, and pass the resolved per-op value from the MemoryEngine builds (global llm_timeout fallback). Add threading tests. Fixes #2452
1 parent ef2e8ab commit 146819b

3 files changed

Lines changed: 91 additions & 0 deletions

File tree

hindsight-api-slim/hindsight_api/engine/llm_wrapper.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def create_llm_provider(
253253
prompt_cache_enabled: bool = False,
254254
litellmrouter_config: dict[str, Any] | None = None,
255255
gemini_service_tier: str | None = None,
256+
timeout: float | None = None,
256257
) -> Any: # Returns LLMInterface
257258
"""
258259
Factory function to create the appropriate LLM provider implementation.
@@ -267,6 +268,9 @@ def create_llm_provider(
267268
openai_service_tier: OpenAI service tier (for OpenAI provider) - None (default) or "flex" (50% cheaper).
268269
bedrock_service_tier: Bedrock service tier (for Bedrock provider) - None (default), "flex", "priority", or "reserved".
269270
gemini_service_tier: Gemini service tier (for Gemini provider) - None (default) or "flex" (50% cheaper).
271+
timeout: Per-call hard timeout (seconds) for the constructed provider. Threaded into LiteLLM
272+
(incl. the ``bedrock/`` alias), which wraps each request in ``asyncio.wait_for(timeout)``.
273+
``None`` lets the provider fall back to ``HINDSIGHT_API_LLM_TIMEOUT`` / the built-in default.
270274
extra_body: Extra request-body params merged into the provider's native
271275
call. Threaded into OpenAI-compatible, Fireworks, Anthropic, Gemini/
272276
VertexAI and LiteLLM providers (each merges them in its own parameter
@@ -378,6 +382,7 @@ def create_llm_provider(
378382
reasoning_effort=reasoning_effort,
379383
extra_body=extra_body,
380384
default_headers=default_headers,
385+
timeout=timeout,
381386
)
382387

383388
elif provider_lower == "litellmrouter":
@@ -411,6 +416,7 @@ def create_llm_provider(
411416
extra_body=extra_body,
412417
default_headers=default_headers,
413418
bedrock_service_tier=bedrock_service_tier,
419+
timeout=timeout,
414420
)
415421

416422
elif provider_lower == "llamacpp":
@@ -503,6 +509,7 @@ def __init__(
503509
base_url: str,
504510
model: str,
505511
reasoning_effort: str = "low",
512+
timeout: float | None = None,
506513
groq_service_tier: str | None = None,
507514
openai_service_tier: str | None = None,
508515
bedrock_service_tier: str | None = None,
@@ -525,6 +532,9 @@ def __init__(
525532
base_url: Base URL for the API.
526533
model: Model name.
527534
reasoning_effort: Reasoning effort level for supported providers.
535+
timeout: Per-call hard timeout (seconds) for this provider. Resolved by the caller from
536+
the per-op config (``retain``/``reflect``/``consolidation``) with the global
537+
``llm_timeout`` fallback; ``None`` defers to the provider's own env/default.
528538
groq_service_tier: Groq service tier ("on_demand", "flex", "auto") - from config.
529539
openai_service_tier: OpenAI service tier (None or "flex") - from config.
530540
bedrock_service_tier: Bedrock service tier (None, "flex", "priority", "reserved") - from config.
@@ -556,6 +566,7 @@ def __init__(
556566
self.base_url = base_url
557567
self.model = model
558568
self.reasoning_effort = reasoning_effort
569+
self.timeout = timeout
559570
self.litellmrouter_config = litellmrouter_config
560571
# Service tiers from hierarchical config (not env vars)
561572
self.groq_service_tier = groq_service_tier
@@ -694,6 +705,7 @@ def __init__(
694705
base_url=self.base_url,
695706
model=self.model,
696707
reasoning_effort=self.reasoning_effort,
708+
timeout=self.timeout,
697709
groq_service_tier=self.groq_service_tier,
698710
openai_service_tier=self.openai_service_tier,
699711
bedrock_service_tier=self.bedrock_service_tier,
@@ -1178,6 +1190,7 @@ def from_env(cls) -> "LLMProvider":
11781190
DEFAULT_LLM_PROMPT_CACHE_ENABLED,
11791191
DEFAULT_LLM_PROVIDER,
11801192
DEFAULT_LLM_REASONING_EFFORT,
1193+
DEFAULT_LLM_TIMEOUT,
11811194
ENV_LLM_API_KEY,
11821195
ENV_LLM_BASE_URL,
11831196
ENV_LLM_BEDROCK_SERVICE_TIER,
@@ -1192,6 +1205,7 @@ def from_env(cls) -> "LLMProvider":
11921205
ENV_LLM_PROMPT_CACHE_ENABLED,
11931206
ENV_LLM_PROVIDER,
11941207
ENV_LLM_REASONING_EFFORT,
1208+
ENV_LLM_TIMEOUT,
11951209
ENV_LLM_VERTEXAI_PROJECT_ID,
11961210
ENV_LLM_VERTEXAI_REGION,
11971211
ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY,
@@ -1229,6 +1243,7 @@ def from_env(cls) -> "LLMProvider":
12291243
base_url=base_url,
12301244
model=model,
12311245
reasoning_effort=os.getenv(ENV_LLM_REASONING_EFFORT, DEFAULT_LLM_REASONING_EFFORT),
1246+
timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
12321247
extra_body=extra_body,
12331248
default_headers=default_headers,
12341249
groq_service_tier=os.getenv(ENV_LLM_GROQ_SERVICE_TIER, DEFAULT_LLM_GROQ_SERVICE_TIER),

hindsight-api-slim/hindsight_api/engine/memory_engine.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ def _member_to_llm(member: "LLMMemberConfig", config: HindsightConfig) -> LLMCon
406406
base_url=member.base_url,
407407
model=member.model,
408408
reasoning_effort=member.reasoning_effort or config.llm_reasoning_effort,
409+
timeout=config.llm_timeout,
409410
extra_body=member.extra_body,
410411
default_headers=member.default_headers or config.llm_default_headers,
411412
bedrock_service_tier=member.bedrock_service_tier,
@@ -1030,6 +1031,7 @@ def __init__(
10301031
base_url=memory_llm_base_url,
10311032
model=memory_llm_model,
10321033
reasoning_effort=config.llm_reasoning_effort,
1034+
timeout=config.llm_timeout,
10331035
extra_body=config.llm_extra_body,
10341036
default_headers=config.llm_default_headers,
10351037
litellmrouter_config=config.llm_litellmrouter_config,
@@ -1073,6 +1075,7 @@ def __init__(
10731075
base_url=retain_base_url,
10741076
model=retain_model,
10751077
reasoning_effort=config.llm_reasoning_effort,
1078+
timeout=config.retain_llm_timeout if config.retain_llm_timeout is not None else config.llm_timeout,
10761079
extra_body=config.llm_extra_body,
10771080
default_headers=config.llm_default_headers,
10781081
litellmrouter_config=config.retain_llm_litellmrouter_config or config.llm_litellmrouter_config,
@@ -1110,6 +1113,7 @@ def __init__(
11101113
base_url=reflect_base_url,
11111114
model=reflect_model,
11121115
reasoning_effort=config.llm_reasoning_effort,
1116+
timeout=config.reflect_llm_timeout if config.reflect_llm_timeout is not None else config.llm_timeout,
11131117
extra_body=config.llm_extra_body,
11141118
default_headers=config.llm_default_headers,
11151119
litellmrouter_config=config.reflect_llm_litellmrouter_config or config.llm_litellmrouter_config,
@@ -1147,6 +1151,9 @@ def __init__(
11471151
base_url=consolidation_base_url,
11481152
model=consolidation_model,
11491153
reasoning_effort=config.llm_reasoning_effort,
1154+
timeout=config.consolidation_llm_timeout
1155+
if config.consolidation_llm_timeout is not None
1156+
else config.llm_timeout,
11501157
extra_body=config.llm_extra_body,
11511158
default_headers=config.llm_default_headers,
11521159
litellmrouter_config=config.consolidation_llm_litellmrouter_config or config.llm_litellmrouter_config,
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
"""Plumbing tests for the per-scope LLM timeout (HINDSIGHT_API_{RETAIN,REFLECT,CONSOLIDATION}_LLM_TIMEOUT).
2+
3+
These assert the *wiring* — that a configured timeout actually reaches the
4+
constructed provider — rather than just that the env var parses into config
5+
(covered by test_config_validation.py). The value is threaded
6+
config -> LLMProvider -> create_llm_provider -> LiteLLMLLM, and LiteLLMLLM wraps
7+
each request in ``asyncio.wait_for(timeout)`` (see test_litellm_timeout.py).
8+
9+
Before this wiring, the per-op timeouts were resolved into config but never passed
10+
to the provider, so ``retain``/``reflect``/``consolidation`` calls silently fell
11+
back to the global ``HINDSIGHT_API_LLM_TIMEOUT`` default and ignored the
12+
operator-set value (issue #2452). If MemoryEngine ever stops passing ``timeout``
13+
through, the per-op knob goes inert again — these checks guard that bridge.
14+
"""
15+
16+
from hindsight_api.config import DEFAULT_LLM_TIMEOUT, ENV_LLM_TIMEOUT
17+
from hindsight_api.engine.llm_wrapper import LLMConfig, create_llm_provider
18+
19+
_LITELLM_MODEL = "litellm_proxy/test-model"
20+
21+
22+
def test_llm_config_threads_timeout_to_provider_impl():
23+
"""End-to-end: LLMConfig -> create_llm_provider -> LiteLLMLLM carries the timeout."""
24+
llm = LLMConfig(
25+
provider="litellm",
26+
api_key="k",
27+
base_url="",
28+
model=_LITELLM_MODEL,
29+
timeout=300.0,
30+
)
31+
assert llm._provider_impl.timeout == 300.0
32+
33+
34+
def test_create_llm_provider_threads_timeout():
35+
"""The factory forwards ``timeout`` into the LiteLLM provider."""
36+
impl = create_llm_provider(
37+
provider="litellm",
38+
api_key="k",
39+
base_url="",
40+
model=_LITELLM_MODEL,
41+
reasoning_effort="low",
42+
timeout=42.0,
43+
)
44+
assert impl.timeout == 42.0
45+
46+
47+
def test_bedrock_alias_threads_timeout():
48+
"""The ``bedrock/`` LiteLLM alias path also carries the timeout."""
49+
llm = LLMConfig(
50+
provider="bedrock",
51+
api_key="",
52+
base_url="",
53+
model="us.amazon.nova-2-lite-v1:0",
54+
timeout=77.0,
55+
)
56+
assert llm._provider_impl.timeout == 77.0
57+
58+
59+
def test_unset_timeout_falls_back_to_default(monkeypatch):
60+
"""``None`` (no per-op or global override) resolves to the finite default,
61+
never ``None`` — preserving the existing hard-timeout backstop."""
62+
monkeypatch.delenv(ENV_LLM_TIMEOUT, raising=False)
63+
llm = LLMConfig(
64+
provider="litellm",
65+
api_key="k",
66+
base_url="",
67+
model=_LITELLM_MODEL,
68+
)
69+
assert llm._provider_impl.timeout == DEFAULT_LLM_TIMEOUT

0 commit comments

Comments
 (0)