Skip to content

Commit 31f3036

Browse files
author
Michal Kulakowski
committed
fix
1 parent af13512 commit 31f3036

1 file changed

Lines changed: 16 additions & 2 deletions

File tree

src/llm/apis/openai_responses.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,10 +649,17 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::EncodedRes
649649
usage.promptTokens = results.perf_metrics.get_num_input_tokens();
650650
usage.completionTokens = results.perf_metrics.get_num_generated_tokens();
651651
std::vector<ParsedOutput> parsedOutputs;
652+
ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
652653
for (const auto& tokens : results.tokens) {
653654
parsedOutputs.push_back(parseOutputIfNeeded(tokens));
654655
}
655-
return serializeUnaryResponseImpl(parsedOutputs);
656+
for (const auto& finishReason : results.finish_reasons) {
657+
if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
658+
responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
659+
break;
660+
}
661+
}
662+
return serializeUnaryResponseImpl(parsedOutputs, responsesFinishReason);
656663
}
657664

658665
std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecodedResults& results) {
@@ -661,6 +668,7 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecoded
661668
usage.completionTokens = results.perf_metrics.get_num_generated_tokens();
662669
// Usage is already correctly set from perf_metrics above — no need for updateUsage.
663670
std::vector<ParsedOutput> parsedOutputs;
671+
ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
664672
for (const std::string& text : results.texts) {
665673
if (outputParser != nullptr) {
666674
// Same workaround as in chat completions
@@ -673,7 +681,13 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecoded
673681
parsedOutputs.push_back(std::move(output));
674682
}
675683
}
676-
return serializeUnaryResponseImpl(parsedOutputs);
684+
for (const auto& finishReason : results.finish_reasons) {
685+
if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
686+
responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
687+
break;
688+
}
689+
}
690+
return serializeUnaryResponseImpl(parsedOutputs, responsesFinishReason);
677691
}
678692

679693
// --- Streaming event building blocks ---

0 commit comments

Comments
 (0)