@@ -649,10 +649,17 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::EncodedRes
649649 usage.promptTokens = results.perf_metrics .get_num_input_tokens ();
650650 usage.completionTokens = results.perf_metrics .get_num_generated_tokens ();
651651 std::vector<ParsedOutput> parsedOutputs;
652+ ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
652653 for (const auto & tokens : results.tokens ) {
653654 parsedOutputs.push_back (parseOutputIfNeeded (tokens));
654655 }
655- return serializeUnaryResponseImpl (parsedOutputs);
656+ for (const auto & finishReason : results.finish_reasons ) {
657+ if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
658+ responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
659+ break ;
660+ }
661+ }
662+ return serializeUnaryResponseImpl (parsedOutputs, responsesFinishReason);
656663}
657664
658665std::string OpenAIResponsesHandler::serializeUnaryResponse (ov::genai::VLMDecodedResults& results) {
@@ -661,6 +668,7 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecoded
661668 usage.completionTokens = results.perf_metrics .get_num_generated_tokens ();
662669 // Usage is already correctly set from perf_metrics above — no need for updateUsage.
663670 std::vector<ParsedOutput> parsedOutputs;
671+ ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
664672 for (const std::string& text : results.texts ) {
665673 if (outputParser != nullptr ) {
666674 // Same workaround as in chat completions
@@ -673,7 +681,13 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecoded
673681 parsedOutputs.push_back (std::move (output));
674682 }
675683 }
676- return serializeUnaryResponseImpl (parsedOutputs);
684+ for (const auto & finishReason : results.finish_reasons ) {
685+ if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
686+ responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
687+ break ;
688+ }
689+ }
690+ return serializeUnaryResponseImpl (parsedOutputs, responsesFinishReason);
677691}
678692
679693// --- Streaming event building blocks ---
0 commit comments