Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions tensorrt_llm/serve/postprocess_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ def completion_stream_post_processor(rsp: DetokenizedGenerationResultBase,
cached_tokens=rsp.cached_tokens),
)

final_usage_chunk = ChatCompletionStreamResponse(choices=[],
model=args.model,
usage=final_usage)
final_usage_chunk = CompletionStreamResponse(choices=[],
model=args.model,
usage=final_usage)
final_usage_data = final_usage_chunk.model_dump_json()
res.append(f"data: {final_usage_data}\n\n")
args.first_iteration = False
Expand Down
4 changes: 4 additions & 0 deletions tests/unittest/llmapi/apps/_test_openai_completions.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -279,11 +279,13 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI,
False,
})
async for chunk in stream:
assert chunk.object == "text_completion"
if chunk.choices[0].finish_reason is None:
assert chunk.usage is None
else:
assert chunk.usage is None
final_chunk = await stream.__anext__()
assert final_chunk.object == "text_completion"
assert final_chunk.usage is not None
assert final_chunk.usage.prompt_tokens > 0
assert final_chunk.usage.completion_tokens > 0
Expand All @@ -306,13 +308,15 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI,
True,
})
async for chunk in stream:
assert chunk.object == "text_completion"
assert chunk.usage is not None
assert chunk.usage.prompt_tokens > 0
assert chunk.usage.completion_tokens > 0
assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens +
chunk.usage.completion_tokens)
if chunk.choices[0].finish_reason is not None:
final_chunk = await stream.__anext__()
assert final_chunk.object == "text_completion"
assert final_chunk.usage is not None
assert final_chunk.usage.prompt_tokens > 0
assert final_chunk.usage.completion_tokens > 0
Expand Down
Loading