NVIDIA · jhaotingc · Apr 4, 2026 · Apr 4, 2026
@@ -509,9 +509,9 @@ def completion_stream_post_processor(rsp: DetokenizedGenerationResultBase,
                 cached_tokens=rsp.cached_tokens),
         )
 
-        final_usage_chunk = ChatCompletionStreamResponse(choices=[],
-                                                         model=args.model,
-                                                         usage=final_usage)
+        final_usage_chunk = CompletionStreamResponse(choices=[],
+                                                     model=args.model,
+                                                     usage=final_usage)
         final_usage_data = final_usage_chunk.model_dump_json()
         res.append(f"data: {final_usage_data}\n\n")
     args.first_iteration = False

diff --git a/tests/unittest/llmapi/apps/_test_openai_completions.py b/tests/unittest/llmapi/apps/_test_openai_completions.py
@@ -279,11 +279,13 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI,
                                                        False,
                                                    })
     async for chunk in stream:
+        assert chunk.object == "text_completion"
         if chunk.choices[0].finish_reason is None:
             assert chunk.usage is None
         else:
             assert chunk.usage is None
             final_chunk = await stream.__anext__()
+            assert final_chunk.object == "text_completion"
             assert final_chunk.usage is not None
             assert final_chunk.usage.prompt_tokens > 0
             assert final_chunk.usage.completion_tokens > 0
@@ -306,13 +308,15 @@ async def test_completion_stream_options(async_client: openai.AsyncOpenAI,
                                                        True,
                                                    })
     async for chunk in stream:
+        assert chunk.object == "text_completion"
         assert chunk.usage is not None
         assert chunk.usage.prompt_tokens > 0
         assert chunk.usage.completion_tokens > 0
         assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens +
                                             chunk.usage.completion_tokens)
         if chunk.choices[0].finish_reason is not None:
             final_chunk = await stream.__anext__()
+            assert final_chunk.object == "text_completion"
             assert final_chunk.usage is not None
             assert final_chunk.usage.prompt_tokens > 0
             assert final_chunk.usage.completion_tokens > 0