@@ -409,17 +409,18 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(ov::genai::Enco
409409
410410 // choices: array of size N, where N is related to n request parameter
411411 jsonResponse.StartArray (" choices" );
412- int index = 0 ;
413- for (int i = 0 ; i < results.tokens .size (); i++) {
412+ for (size_t i = 0 ; i < results.tokens .size (); ++i) {
414413 const std::vector<int64_t >& tokens = results.tokens [i];
415414 SPDLOG_LOGGER_TRACE (llm_calculator_logger, " Generated tokens: {}" , tokens);
416415 ParsedOutput parsedOutput = parseOutputIfNeeded (tokens);
417416 jsonResponse.StartObject ();
418417 // finish_reason: "stop" in regular scenario, "tool_calls" if output contains tool calls
419- auto finishReason = mapFinishReason (ov::genai::GenerationFinishReason::STOP, !parsedOutput.toolCalls .empty ());
418+ const ov::genai::GenerationFinishReason finishReasonRaw =
419+ (i < results.finish_reasons .size ()) ? results.finish_reasons [i] : ov::genai::GenerationFinishReason::STOP;
420+ auto finishReason = mapFinishReason (finishReasonRaw, !parsedOutput.toolCalls .empty ());
420421 jsonResponse.FinishReason (finishReason.value_or (" unknown" ));
421422 // index: integer; Choice index, only n=1 supported anyway
422- jsonResponse.Index (index++ );
423+ jsonResponse.Index (static_cast < int >(i) );
423424
424425 if (endpoint == Endpoint::CHAT_COMPLETIONS) {
425426 jsonResponse.MessageObject (parsedOutput);
@@ -462,7 +463,9 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(ov::genai::VLMD
462463 OVMS_PROFILE_FUNCTION ();
463464 usage.promptTokens = results.perf_metrics .get_num_input_tokens ();
464465 usage.completionTokens = results.perf_metrics .get_num_generated_tokens ();
465-
466+ if (results.finish_reasons .empty ()) {
467+ throw std::runtime_error (" Missing finish reason in unary VLM responses generation result" );
468+ }
466469 OpenAiJsonResponse jsonResponse;
467470 jsonResponse.StartObject ();
468471
@@ -480,8 +483,14 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(ov::genai::VLMD
480483 SPDLOG_LOGGER_TRACE (llm_calculator_logger, " Generated tokens: {}" , generatedTokens);
481484 ParsedOutput parsedOutput = parseOutputIfNeeded (generatedTokens);
482485 jsonResponse.StartObject ();
483- // finish_reason: "stop" in regular scenario, "tool_calls" if output contains tool calls
484- auto finishReason = mapFinishReason (ov::genai::GenerationFinishReason::STOP, !parsedOutput.toolCalls .empty ());
486+ ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
487+ for (const auto & finishReason : results.finish_reasons ) {
488+ if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
489+ responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
490+ break ;
491+ }
492+ }
493+ auto finishReason = mapFinishReason (responsesFinishReason, !parsedOutput.toolCalls .empty ());
485494 jsonResponse.FinishReason (finishReason.value_or (" unknown" ));
486495 // index: integer; Choice index, only n=1 supported anyway
487496 jsonResponse.Index (index++);
0 commit comments