Skip to content

Commit 229a3f5

Browse files
committed
review: harden against malformed Usage shapes (litellm / non-OpenAI providers)
If a provider returns a ModelResponse with a Usage shape the OpenAI Agents SDK didn't fully normalize — missing input_tokens_details, missing usage entirely, None token values — we want to record what we can and skip the rest, never crash the caller. - Move requests.add outside the usage-extraction try block so the success counter still fires when usage access raises (e.g., None). - Add three tests covering: response with raising .usage property, Usage missing input_tokens_details, and Usage with all-None token values.
1 parent 65d2e81 commit 229a3f5

2 files changed

Lines changed: 90 additions & 1 deletion

File tree

src/agentex/lib/core/observability/llm_metrics_hooks.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,18 @@ async def on_llm_end(
2727
del context # part of the RunHooks contract; unused here
2828
m = get_llm_metrics()
2929
attrs = {"model": str(agent.model) if agent.model else "unknown"}
30+
# Request counter only depends on agent.model, so emit it first and
31+
# outside the usage-extraction try block. Token counters reach into
32+
# nested optional fields and are best-effort: a non-OpenAI provider
33+
# (litellm-routed Anthropic, etc.) may return a Usage shape missing
34+
# input_tokens_details / output_tokens_details — we emit zeros where
35+
# we can and skip the rest rather than crash the caller.
3036
try:
31-
usage = response.usage
3237
m.requests.add(1, {**attrs, "status": "success"})
38+
except Exception:
39+
pass
40+
try:
41+
usage = response.usage
3342
m.input_tokens.add(usage.input_tokens or 0, attrs)
3443
m.output_tokens.add(usage.output_tokens or 0, attrs)
3544
m.cached_input_tokens.add(usage.input_tokens_details.cached_tokens or 0, attrs)

src/agentex/lib/core/observability/tests/test_llm_metrics_hooks.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,86 @@ async def test_swallows_exporter_failure(self, monkeypatch):
113113
response=_mock_response(),
114114
)
115115

116+
@pytest.mark.asyncio
117+
async def test_missing_usage_still_emits_request_counter(self, monkeypatch):
118+
"""Provider returns a response without `usage` — caller shouldn't crash,
119+
and we should still record the success request counter."""
120+
m = MagicMock()
121+
monkeypatch.setattr(hooks_module, "get_llm_metrics", lambda: m)
122+
123+
class _Response:
124+
@property
125+
def usage(self):
126+
raise AttributeError("no usage")
127+
128+
await LLMMetricsHooks().on_llm_end(
129+
context=MagicMock(),
130+
agent=_mock_agent(),
131+
response=_Response(), # type: ignore[arg-type]
132+
)
133+
134+
m.requests.add.assert_called_once_with(1, {"model": "gpt-5", "status": "success"})
135+
m.input_tokens.add.assert_not_called()
136+
m.output_tokens.add.assert_not_called()
137+
138+
@pytest.mark.asyncio
139+
async def test_missing_token_details_skips_those_counters(self, monkeypatch):
140+
"""Provider returns Usage without input_tokens_details (e.g. some
141+
litellm wrappers / non-OpenAI providers): top-level token counts
142+
still emit; the nested cached/reasoning counters are skipped."""
143+
m = MagicMock()
144+
monkeypatch.setattr(hooks_module, "get_llm_metrics", lambda: m)
145+
146+
class _Usage:
147+
input_tokens = 100
148+
output_tokens = 50
149+
150+
@property
151+
def input_tokens_details(self):
152+
raise AttributeError("no details")
153+
154+
class _Response:
155+
usage = _Usage()
156+
157+
await LLMMetricsHooks().on_llm_end(
158+
context=MagicMock(),
159+
agent=_mock_agent(),
160+
response=_Response(), # type: ignore[arg-type]
161+
)
162+
163+
# Request counter still fires (it's outside the usage-extraction try).
164+
m.requests.add.assert_called_once_with(1, {"model": "gpt-5", "status": "success"})
165+
# input_tokens.add fires before the nested attribute access.
166+
m.input_tokens.add.assert_called_once_with(100, {"model": "gpt-5"})
167+
# cached_input_tokens / reasoning_tokens skipped — the AttributeError
168+
# bailed before they could be called.
169+
m.cached_input_tokens.add.assert_not_called()
170+
m.reasoning_tokens.add.assert_not_called()
171+
172+
@pytest.mark.asyncio
173+
async def test_none_token_values_emit_as_zero(self, monkeypatch):
174+
"""Some providers report None instead of 0 for fields they don't track."""
175+
m = MagicMock()
176+
monkeypatch.setattr(hooks_module, "get_llm_metrics", lambda: m)
177+
178+
response = MagicMock()
179+
response.usage.input_tokens = None
180+
response.usage.output_tokens = None
181+
response.usage.input_tokens_details.cached_tokens = None
182+
response.usage.output_tokens_details.reasoning_tokens = None
183+
184+
await LLMMetricsHooks().on_llm_end(
185+
context=MagicMock(),
186+
agent=_mock_agent(),
187+
response=response,
188+
)
189+
190+
attrs = {"model": "gpt-5"}
191+
m.input_tokens.add.assert_called_once_with(0, attrs)
192+
m.output_tokens.add.assert_called_once_with(0, attrs)
193+
m.cached_input_tokens.add.assert_called_once_with(0, attrs)
194+
m.reasoning_tokens.add.assert_called_once_with(0, attrs)
195+
116196

117197
class TestRecordLLMFailure:
118198
def test_emits_classified_status(self, monkeypatch):

0 commit comments

Comments
 (0)