Skip to content

Commit dc9c17c

Browse files
authored
feat: support token usage extraction for llama.cpp (#7358)
* feat: support token usage extraction for llama.cpp * chore: ruff format
1 parent 77d5d5c commit dc9c17c

File tree

2 files changed

+93
-6
lines changed

2 files changed

+93
-6
lines changed

astrbot/core/provider/sources/openai_source.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -532,19 +532,18 @@ async def _query_stream(
532532
**payloads,
533533
stream=True,
534534
extra_body=extra_body,
535+
stream_options={"include_usage": True},
535536
)
536537

537538
llm_response = LLMResponse("assistant", is_chunk=True)
538539

539540
state = ChatCompletionStreamState()
540541

541542
async for chunk in stream:
542-
if not chunk.choices:
543-
continue
544-
choice = chunk.choices[0]
545-
delta = choice.delta
543+
choice = chunk.choices[0] if chunk.choices else None
544+
delta = choice.delta if choice else None
546545

547-
if dtcs := delta.tool_calls:
546+
if delta and (dtcs := delta.tool_calls):
548547
for idx, tc in enumerate(dtcs):
549548
# siliconflow workaround
550549
if tc.function and tc.function.arguments:
@@ -574,7 +573,7 @@ async def _query_stream(
574573
_y = True
575574
if chunk.usage:
576575
llm_response.usage = self._extract_usage(chunk.usage)
577-
elif choice_usage := getattr(choice, "usage", None):
576+
elif choice and (choice_usage := getattr(choice, "usage", None)):
578577
# Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI
579578
# See https://github.com/AstrBotDevs/AstrBot/issues/6614
580579
llm_response.usage = self._extract_usage(choice_usage)

tests/test_openai_source.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pytest
44
from openai.types.chat.chat_completion import ChatCompletion
5+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
56
from PIL import Image as PILImage
67

78
from astrbot.core.exceptions import EmptyModelOutputError
@@ -1175,6 +1176,93 @@ async def test_parse_openai_completion_raises_empty_model_output_error():
11751176
await provider.terminate()
11761177

11771178

1179+
@pytest.mark.asyncio
1180+
async def test_query_stream_extracts_usage_from_empty_choices_chunk(monkeypatch):
1181+
provider = _make_provider()
1182+
try:
1183+
chunks = [
1184+
ChatCompletionChunk.model_validate(
1185+
{
1186+
"id": "chatcmpl-stream",
1187+
"object": "chat.completion.chunk",
1188+
"created": 0,
1189+
"model": "gpt-4o-mini",
1190+
"choices": [
1191+
{
1192+
"index": 0,
1193+
"delta": {
1194+
"role": "assistant",
1195+
"content": "ok",
1196+
},
1197+
"finish_reason": None,
1198+
}
1199+
],
1200+
}
1201+
),
1202+
ChatCompletionChunk.model_validate(
1203+
{
1204+
"id": "chatcmpl-stream",
1205+
"object": "chat.completion.chunk",
1206+
"created": 0,
1207+
"model": "gpt-4o-mini",
1208+
"choices": [
1209+
{
1210+
"index": 0,
1211+
"delta": {},
1212+
"finish_reason": "stop",
1213+
}
1214+
],
1215+
}
1216+
),
1217+
ChatCompletionChunk.model_validate(
1218+
{
1219+
"id": "chatcmpl-stream",
1220+
"object": "chat.completion.chunk",
1221+
"created": 0,
1222+
"model": "gpt-4o-mini",
1223+
"choices": [],
1224+
"usage": {
1225+
"prompt_tokens": 2550,
1226+
"completion_tokens": 125,
1227+
"total_tokens": 2675,
1228+
"prompt_tokens_details": {
1229+
"cached_tokens": 2488,
1230+
},
1231+
},
1232+
}
1233+
),
1234+
]
1235+
1236+
async def fake_stream():
1237+
for chunk in chunks:
1238+
yield chunk
1239+
1240+
async def fake_create(**kwargs):
1241+
return fake_stream()
1242+
1243+
monkeypatch.setattr(provider.client.chat.completions, "create", fake_create)
1244+
1245+
responses = [
1246+
response
1247+
async for response in provider._query_stream(
1248+
payloads={
1249+
"model": "gpt-4o-mini",
1250+
"messages": [{"role": "user", "content": "hello"}],
1251+
},
1252+
tools=None,
1253+
)
1254+
]
1255+
1256+
final_response = responses[-1]
1257+
assert final_response.completion_text == "ok"
1258+
assert final_response.usage is not None
1259+
assert final_response.usage.input_other == 62
1260+
assert final_response.usage.input_cached == 2488
1261+
assert final_response.usage.output == 125
1262+
finally:
1263+
await provider.terminate()
1264+
1265+
11781266
@pytest.mark.asyncio
11791267
async def test_query_filters_empty_assistant_message_without_tool_calls(monkeypatch):
11801268
"""Test that empty assistant messages without tool_calls are filtered out."""

0 commit comments

Comments
 (0)