diff --git a/tensorrt_llm/serve/postprocess_handlers.py b/tensorrt_llm/serve/postprocess_handlers.py index 75e918fe55d..ea5a4d26d92 100644 --- a/tensorrt_llm/serve/postprocess_handlers.py +++ b/tensorrt_llm/serve/postprocess_handlers.py @@ -509,9 +509,9 @@ def completion_stream_post_processor(rsp: DetokenizedGenerationResultBase, cached_tokens=rsp.cached_tokens), ) - final_usage_chunk = ChatCompletionStreamResponse(choices=[], - model=args.model, - usage=final_usage) + final_usage_chunk = CompletionStreamResponse(choices=[], + model=args.model, + usage=final_usage) final_usage_data = final_usage_chunk.model_dump_json() res.append(f"data: {final_usage_data}\n\n") args.first_iteration = False diff --git a/tests/unittest/llmapi/apps/_test_openai_completions.py b/tests/unittest/llmapi/apps/_test_openai_completions.py old mode 100644 new mode 100755 index 03f08a5c6b8..ff411be19ef --- a/tests/unittest/llmapi/apps/_test_openai_completions.py +++ b/tests/unittest/llmapi/apps/_test_openai_completions.py @@ -279,11 +279,13 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI, False, }) async for chunk in stream: + assert chunk.object == "text_completion" if chunk.choices[0].finish_reason is None: assert chunk.usage is None else: assert chunk.usage is None final_chunk = await stream.__anext__() + assert final_chunk.object == "text_completion" assert final_chunk.usage is not None assert final_chunk.usage.prompt_tokens > 0 assert final_chunk.usage.completion_tokens > 0 @@ -306,6 +308,7 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI, True, }) async for chunk in stream: + assert chunk.object == "text_completion" assert chunk.usage is not None assert chunk.usage.prompt_tokens > 0 assert chunk.usage.completion_tokens > 0 @@ -313,6 +316,7 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI, chunk.usage.completion_tokens) if chunk.choices[0].finish_reason is not None: final_chunk = await stream.__anext__() + assert final_chunk.object == "text_completion" assert final_chunk.usage is not None assert final_chunk.usage.prompt_tokens > 0 assert final_chunk.usage.completion_tokens > 0