Skip to content

Commit 7e71e51

Browse files
committed
fix(langchain): handle prompt_tokens_details as dict in _parse_usage_model
When LiteLLM proxy or OpenAI returns prompt_tokens_details as a dict (e.g. {"cached_tokens": 12000}), _parse_usage_model only handled the Vertex AI list format and silently dropped the dict via the isinstance(v, int) filter on line 1318. This caused cached token counts to be lost and input costs to be inflated in Langfuse, since prompt_tokens was never adjusted for cache hits. Add dict handling for prompt_tokens_details mirroring the existing input_token_details pattern: flatten keys as input_{key}, subtract from input total. Existing Vertex AI list handling is preserved via elif. Closes langfuse/langfuse#13024
1 parent 3e530af commit 7e71e51

2 files changed

Lines changed: 80 additions & 1 deletion

File tree

langfuse/langchain/CallbackHandler.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1256,8 +1256,25 @@ def _parse_usage_model(usage: Union[pydantic.BaseModel, dict]) -> Any:
12561256
if "output" in usage_model:
12571257
usage_model["output"] = max(0, usage_model["output"] - value)
12581258

1259-
# Vertex AI
1259+
# OpenAI / LiteLLM — prompt_tokens_details as dict
1260+
# e.g. {"cached_tokens": 12000}
12601261
if "prompt_tokens_details" in usage_model and isinstance(
1262+
usage_model["prompt_tokens_details"], dict
1263+
):
1264+
prompt_tokens_details = usage_model.pop("prompt_tokens_details")
1265+
1266+
for key, value in prompt_tokens_details.items():
1267+
if not isinstance(value, int):
1268+
continue
1269+
1270+
usage_model[f"input_{key}"] = value
1271+
1272+
if "input" in usage_model:
1273+
usage_model["input"] = max(0, usage_model["input"] - value)
1274+
1275+
# Vertex AI — prompt_tokens_details as list
1276+
# e.g. [{"modality": "text", "token_count": N}]
1277+
elif "prompt_tokens_details" in usage_model and isinstance(
12611278
usage_model["prompt_tokens_details"], list
12621279
):
12631280
prompt_tokens_details = usage_model.pop("prompt_tokens_details")

tests/test_parse_usage_model.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,68 @@ def test_standard_tier_input_token_details():
1616
assert result["total"] == 14
1717

1818

19+
def test_prompt_tokens_details_dict_cached_tokens():
20+
"""OpenAI/LiteLLM: prompt_tokens_details as dict with cached_tokens."""
21+
usage = {
22+
"prompt_tokens": 15000,
23+
"completion_tokens": 500,
24+
"total_tokens": 15500,
25+
"prompt_tokens_details": {"cached_tokens": 12000},
26+
}
27+
result = _parse_usage_model(usage)
28+
assert result["input"] == 3000 # 15000 - 12000
29+
assert result["output"] == 500
30+
assert result["total"] == 15500
31+
assert result["input_cached_tokens"] == 12000
32+
33+
34+
def test_prompt_tokens_details_dict_with_cache_creation():
35+
"""OpenAI/LiteLLM: prompt_tokens_details dict + top-level cache_creation."""
36+
usage = {
37+
"prompt_tokens": 15000,
38+
"completion_tokens": 500,
39+
"total_tokens": 15500,
40+
"prompt_tokens_details": {"cached_tokens": 12000},
41+
"cache_creation_input_tokens": 3000,
42+
}
43+
result = _parse_usage_model(usage)
44+
assert result["input"] == 3000 # 15000 - 12000 (cached_tokens only subtracted here)
45+
assert result["input_cached_tokens"] == 12000
46+
assert result["cache_creation_input_tokens"] == 3000
47+
48+
49+
def test_prompt_tokens_details_list_vertex_ai():
50+
"""Vertex AI: prompt_tokens_details as list — existing behavior preserved."""
51+
usage = {
52+
"prompt_token_count": 1000,
53+
"candidates_token_count": 200,
54+
"total_token_count": 1200,
55+
"prompt_tokens_details": [
56+
{"modality": "text", "token_count": 800},
57+
{"modality": "image", "token_count": 200},
58+
],
59+
}
60+
result = _parse_usage_model(usage)
61+
assert result["input"] == 0 # 1000 - 800 - 200
62+
assert result["output"] == 200
63+
assert result["total"] == 1200
64+
assert result["input_modality_text"] == 800
65+
assert result["input_modality_image"] == 200
66+
67+
68+
def test_prompt_tokens_details_dict_empty():
69+
"""Empty dict prompt_tokens_details — no crash, input unchanged."""
70+
usage = {
71+
"prompt_tokens": 5000,
72+
"completion_tokens": 100,
73+
"total_tokens": 5100,
74+
"prompt_tokens_details": {},
75+
}
76+
result = _parse_usage_model(usage)
77+
assert result["input"] == 5000
78+
assert result["output"] == 100
79+
80+
1981
def test_priority_tier_not_subtracted():
2082
"""Priority tier: 'priority' and 'priority_*' keys must NOT be subtracted."""
2183
usage = {

0 commit comments

Comments
 (0)