Skip to content

Commit fbd018c

Browse files
Michal Kulakowskimichalkulakowski
authored andcommitted
Support finish reason in legacy pipelines
1 parent 7112cae commit fbd018c

5 files changed

Lines changed: 35 additions & 10 deletions

File tree

src/llm/apis/openai_completions.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,17 +409,18 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(ov::genai::Enco
409409

410410
// choices: array of size N, where N is related to n request parameter
411411
jsonResponse.StartArray("choices");
412-
int index = 0;
413-
for (int i = 0; i < results.tokens.size(); i++) {
412+
for (size_t i = 0; i < results.tokens.size(); ++i) {
414413
const std::vector<int64_t>& tokens = results.tokens[i];
415414
SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Generated tokens: {}", tokens);
416415
ParsedOutput parsedOutput = parseOutputIfNeeded(tokens);
417416
jsonResponse.StartObject();
418417
// finish_reason: "stop" in regular scenario, "tool_calls" if output contains tool calls
419-
auto finishReason = mapFinishReason(ov::genai::GenerationFinishReason::STOP, !parsedOutput.toolCalls.empty());
418+
const ov::genai::GenerationFinishReason finishReasonRaw =
419+
(!results.finish_reasons.empty()) ? results.finish_reasons[0] : ov::genai::GenerationFinishReason::STOP;
420+
auto finishReason = mapFinishReason(finishReasonRaw, !parsedOutput.toolCalls.empty());
420421
jsonResponse.FinishReason(finishReason.value_or("unknown"));
421422
// index: integer; Choice index, only n=1 supported anyway
422-
jsonResponse.Index(index++);
423+
jsonResponse.Index(static_cast<int>(i));
423424

424425
if (endpoint == Endpoint::CHAT_COMPLETIONS) {
425426
jsonResponse.MessageObject(parsedOutput);
@@ -481,7 +482,9 @@ std::string OpenAIChatCompletionsHandler::serializeUnaryResponse(ov::genai::VLMD
481482
ParsedOutput parsedOutput = parseOutputIfNeeded(generatedTokens);
482483
jsonResponse.StartObject();
483484
// finish_reason: "stop" in regular scenario, "tool_calls" if output contains tool calls
484-
auto finishReason = mapFinishReason(ov::genai::GenerationFinishReason::STOP, !parsedOutput.toolCalls.empty());
485+
const ov::genai::GenerationFinishReason finishReasonRaw =
486+
(!results.finish_reasons.empty()) ? results.finish_reasons[0] : ov::genai::GenerationFinishReason::STOP;
487+
auto finishReason = mapFinishReason(finishReasonRaw, !parsedOutput.toolCalls.empty());
485488
jsonResponse.FinishReason(finishReason.value_or("unknown"));
486489
// index: integer; Choice index, only n=1 supported anyway
487490
jsonResponse.Index(index++);

src/llm/apis/openai_responses.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,10 +649,17 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::EncodedRes
649649
usage.promptTokens = results.perf_metrics.get_num_input_tokens();
650650
usage.completionTokens = results.perf_metrics.get_num_generated_tokens();
651651
std::vector<ParsedOutput> parsedOutputs;
652+
ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
652653
for (const auto& tokens : results.tokens) {
653654
parsedOutputs.push_back(parseOutputIfNeeded(tokens));
654655
}
655-
return serializeUnaryResponseImpl(parsedOutputs);
656+
for (const auto& finishReason : results.finish_reasons) {
657+
if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
658+
responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
659+
break;
660+
}
661+
}
662+
return serializeUnaryResponseImpl(parsedOutputs, responsesFinishReason);
656663
}
657664

658665
std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecodedResults& results, const std::string& textResponse) {
@@ -673,7 +680,14 @@ std::string OpenAIResponsesHandler::serializeUnaryResponse(ov::genai::VLMDecoded
673680
parsedOutputs.push_back(std::move(output));
674681
}
675682
}
676-
return serializeUnaryResponseImpl(parsedOutputs);
683+
ov::genai::GenerationFinishReason responsesFinishReason = ov::genai::GenerationFinishReason::STOP;
684+
for (const auto& finishReason : results.finish_reasons) {
685+
if (finishReason == ov::genai::GenerationFinishReason::LENGTH) {
686+
responsesFinishReason = ov::genai::GenerationFinishReason::LENGTH;
687+
break;
688+
}
689+
}
690+
return serializeUnaryResponseImpl(parsedOutputs, responsesFinishReason);
677691
}
678692

679693
// --- Streaming event building blocks ---

src/llm/language_model/legacy/servable.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,11 @@ absl::Status LegacyServable::preparePartialResponse(std::shared_ptr<GenAiServabl
229229
if (!executionContext->lastStreamerCallbackOutput.empty()) {
230230
lastTextChunk = lastTextChunk + executionContext->lastStreamerCallbackOutput;
231231
}
232-
std::string serializedChunk = executionContext->apiHandler->serializeStreamingChunk(lastTextChunk, ov::genai::GenerationFinishReason::STOP);
232+
ov::genai::GenerationFinishReason finishReason = ov::genai::GenerationFinishReason::STOP;
233+
if (!legacyExecutionContext->results.finish_reasons.empty()) {
234+
finishReason = legacyExecutionContext->results.finish_reasons[0];
235+
}
236+
std::string serializedChunk = executionContext->apiHandler->serializeStreamingChunk(lastTextChunk, finishReason);
233237
if (!serializedChunk.empty()) {
234238
executionContext->response = wrapTextInServerSideEventMessage(serializedChunk);
235239
}

src/llm/visual_language_model/legacy/servable.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,11 @@ absl::Status VisualLanguageModelLegacyServable::preparePartialResponse(std::shar
245245
if (!executionContext->lastStreamerCallbackOutput.empty()) {
246246
lastTextChunk = lastTextChunk + executionContext->lastStreamerCallbackOutput;
247247
}
248-
std::string serializedChunk = executionContext->apiHandler->serializeStreamingChunk(lastTextChunk, ov::genai::GenerationFinishReason::STOP);
248+
ov::genai::GenerationFinishReason finishReason = ov::genai::GenerationFinishReason::STOP;
249+
if (!legacyExecutionContext->results.finish_reasons.empty()) {
250+
finishReason = legacyExecutionContext->results.finish_reasons[0];
251+
}
252+
std::string serializedChunk = executionContext->apiHandler->serializeStreamingChunk(lastTextChunk, finishReason);
249253
if (!serializedChunk.empty()) {
250254
executionContext->response = wrapTextInServerSideEventMessage(serializedChunk);
251255
}

src/test/llm/llmnode_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2688,7 +2688,7 @@ INSTANTIATE_TEST_SUITE_P(
26882688
::testing::Values(
26892689
// params: model name, generate expected output, check logprobs, check finish reason, test speculative decoding, supports empty handshake msg
26902690
TestParameters{"lm_cb_regular", true, true, true, false, true},
2691-
TestParameters{"lm_legacy_regular", false, false, false, false, false},
2691+
TestParameters{"lm_legacy_regular", false, false, true, false, false},
26922692
TestParameters{"vlm_cb_regular", false, true, true, false, true},
26932693
TestParameters{"vlm_legacy_regular", false, false, false, false, false}));
26942694

0 commit comments

Comments
 (0)