@@ -1715,6 +1715,173 @@ TEST_F(HttpOpenAIHandlerParsingTest, serializeUnaryResponseForResponsesCompleted
17151715 ASSERT_NE (serialized.find (" \" metadata\" :{}" ), std::string::npos) << serialized;
17161716}
17171717
1718+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseForResponsesEncodedResultsIncompleteOnLength) {
1719+ std::string json = R"( {
1720+ "model": "llama",
1721+ "input": "What is OpenVINO?",
1722+ "max_output_tokens": 5
1723+ })" ;
1724+ doc.Parse (json.c_str ());
1725+ ASSERT_FALSE (doc.HasParseError ());
1726+
1727+ auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(doc, ovms::Endpoint::RESPONSES , std::chrono::system_clock::now (), *tokenizer);
1728+ std::optional<uint32_t > maxTokensLimit;
1729+ uint32_t bestOfLimit = 0 ;
1730+ std::optional<uint32_t > maxModelLength;
1731+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1732+
1733+ ov::genai::EncodedResults results;
1734+ ov::Tensor outputIds = tokenizer->encode (" OVMS" , ov::genai::add_special_tokens (false )).input_ids ;
1735+ const auto & shape = outputIds.get_shape ();
1736+ ASSERT_EQ (shape.size (), 2 );
1737+ ASSERT_EQ (shape[0 ], 1 );
1738+ ASSERT_EQ (outputIds.get_element_type (), ov::element::i64 );
1739+ int64_t * outputIdsData = reinterpret_cast <int64_t *>(outputIds.data ());
1740+ results.tokens = {std::vector<int64_t >(outputIdsData, outputIdsData + shape[1 ])};
1741+ results.finish_reasons = {ov::genai::GenerationFinishReason::LENGTH };
1742+
1743+ std::string serialized = apiHandler->serializeUnaryResponse (results);
1744+
1745+ ASSERT_NE (serialized.find (" \" status\" :\" incomplete\" " ), std::string::npos) << serialized;
1746+ ASSERT_NE (serialized.find (" \" incomplete_details\" " ), std::string::npos) << serialized;
1747+ ASSERT_NE (serialized.find (" \" reason\" :\" max_tokens\" " ), std::string::npos) << serialized;
1748+ ASSERT_EQ (serialized.find (" \" completed_at\" " ), std::string::npos) << serialized;
1749+ ASSERT_EQ (serialized.find (" \" status\" :\" completed\" " ), std::string::npos) << serialized;
1750+ }
1751+
1752+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseForResponsesEncodedResultsCompletedOnStop) {
1753+ std::string json = R"( {
1754+ "model": "llama",
1755+ "input": "What is OpenVINO?",
1756+ "max_output_tokens": 5
1757+ })" ;
1758+ doc.Parse (json.c_str ());
1759+ ASSERT_FALSE (doc.HasParseError ());
1760+
1761+ auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(doc, ovms::Endpoint::RESPONSES , std::chrono::system_clock::now (), *tokenizer);
1762+ std::optional<uint32_t > maxTokensLimit;
1763+ uint32_t bestOfLimit = 0 ;
1764+ std::optional<uint32_t > maxModelLength;
1765+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1766+
1767+ ov::genai::EncodedResults results;
1768+ ov::Tensor outputIds = tokenizer->encode (" OVMS" , ov::genai::add_special_tokens (false )).input_ids ;
1769+ int64_t * outputIdsData = reinterpret_cast <int64_t *>(outputIds.data ());
1770+ results.tokens = {std::vector<int64_t >(outputIdsData, outputIdsData + outputIds.get_shape ()[1 ])};
1771+ results.finish_reasons = {ov::genai::GenerationFinishReason::STOP };
1772+
1773+ std::string serialized = apiHandler->serializeUnaryResponse (results);
1774+
1775+ ASSERT_NE (serialized.find (" \" status\" :\" completed\" " ), std::string::npos) << serialized;
1776+ ASSERT_NE (serialized.find (" \" completed_at\" " ), std::string::npos) << serialized;
1777+ ASSERT_EQ (serialized.find (" \" incomplete_details\" " ), std::string::npos) << serialized;
1778+ }
1779+
1780+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseForResponsesVLMDecodedResultsIncompleteOnLength) {
1781+ std::string json = R"( {
1782+ "model": "llama",
1783+ "input": "What is OpenVINO?",
1784+ "max_output_tokens": 5
1785+ })" ;
1786+ doc.Parse (json.c_str ());
1787+ ASSERT_FALSE (doc.HasParseError ());
1788+
1789+ auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(doc, ovms::Endpoint::RESPONSES , std::chrono::system_clock::now (), *tokenizer);
1790+ std::optional<uint32_t > maxTokensLimit;
1791+ uint32_t bestOfLimit = 0 ;
1792+ std::optional<uint32_t > maxModelLength;
1793+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1794+
1795+ ov::genai::VLMDecodedResults results;
1796+ std::string text = " OVMS" ;
1797+ results.texts = {text};
1798+ results.finish_reasons = {ov::genai::GenerationFinishReason::LENGTH };
1799+
1800+ std::string serialized = apiHandler->serializeUnaryResponse (results, text);
1801+
1802+ ASSERT_NE (serialized.find (" \" status\" :\" incomplete\" " ), std::string::npos) << serialized;
1803+ ASSERT_NE (serialized.find (" \" incomplete_details\" " ), std::string::npos) << serialized;
1804+ ASSERT_NE (serialized.find (" \" reason\" :\" max_tokens\" " ), std::string::npos) << serialized;
1805+ ASSERT_EQ (serialized.find (" \" completed_at\" " ), std::string::npos) << serialized;
1806+ ASSERT_EQ (serialized.find (" \" status\" :\" completed\" " ), std::string::npos) << serialized;
1807+ }
1808+
1809+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseForResponsesVLMDecodedResultsCompletedOnStop) {
1810+ std::string json = R"( {
1811+ "model": "llama",
1812+ "input": "What is OpenVINO?",
1813+ "max_output_tokens": 5
1814+ })" ;
1815+ doc.Parse (json.c_str ());
1816+ ASSERT_FALSE (doc.HasParseError ());
1817+
1818+ auto apiHandler = std::make_shared<ovms::OpenAIResponsesHandler>(doc, ovms::Endpoint::RESPONSES , std::chrono::system_clock::now (), *tokenizer);
1819+ std::optional<uint32_t > maxTokensLimit;
1820+ uint32_t bestOfLimit = 0 ;
1821+ std::optional<uint32_t > maxModelLength;
1822+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1823+
1824+ ov::genai::VLMDecodedResults results;
1825+ std::string text = " OVMS" ;
1826+ results.texts = {text};
1827+ results.finish_reasons = {ov::genai::GenerationFinishReason::STOP };
1828+
1829+ std::string serialized = apiHandler->serializeUnaryResponse (results, text);
1830+
1831+ ASSERT_NE (serialized.find (" \" status\" :\" completed\" " ), std::string::npos) << serialized;
1832+ ASSERT_NE (serialized.find (" \" completed_at\" " ), std::string::npos) << serialized;
1833+ ASSERT_EQ (serialized.find (" \" incomplete_details\" " ), std::string::npos) << serialized;
1834+ }
1835+
1836+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseChatCompletionsEncodedResultsLengthFinishReason) {
1837+ std::string json = R"( {
1838+ "model": "llama",
1839+ "stream": false,
1840+ "messages": [{"role": "user", "content": "What is OpenVINO?"}]
1841+ })" ;
1842+ doc.Parse (json.c_str ());
1843+ ASSERT_FALSE (doc.HasParseError ());
1844+
1845+ auto apiHandler = std::make_shared<ovms::OpenAIChatCompletionsHandler>(doc, ovms::Endpoint::CHAT_COMPLETIONS , std::chrono::system_clock::now (), *tokenizer);
1846+ uint32_t maxTokensLimit = 100 ;
1847+ uint32_t bestOfLimit = 0 ;
1848+ std::optional<uint32_t > maxModelLength;
1849+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1850+
1851+ ov::genai::EncodedResults results;
1852+ ov::Tensor outputIds = tokenizer->encode (" OVMS" , ov::genai::add_special_tokens (false )).input_ids ;
1853+ int64_t * outputIdsData = reinterpret_cast <int64_t *>(outputIds.data ());
1854+ results.tokens = {std::vector<int64_t >(outputIdsData, outputIdsData + outputIds.get_shape ()[1 ])};
1855+ results.finish_reasons = {ov::genai::GenerationFinishReason::LENGTH };
1856+
1857+ std::string serialized = apiHandler->serializeUnaryResponse (results);
1858+ ASSERT_NE (serialized.find (" \" finish_reason\" :\" length\" " ), std::string::npos) << serialized;
1859+ }
1860+
1861+ TEST_F (HttpOpenAIHandlerParsingTest, serializeUnaryResponseChatCompletionsVLMDecodedResultsLengthFinishReason) {
1862+ std::string json = R"( {
1863+ "model": "llama",
1864+ "stream": false,
1865+ "messages": [{"role": "user", "content": "What is OpenVINO?"}]
1866+ })" ;
1867+ doc.Parse (json.c_str ());
1868+ ASSERT_FALSE (doc.HasParseError ());
1869+
1870+ auto apiHandler = std::make_shared<ovms::OpenAIChatCompletionsHandler>(doc, ovms::Endpoint::CHAT_COMPLETIONS , std::chrono::system_clock::now (), *tokenizer);
1871+ uint32_t maxTokensLimit = 100 ;
1872+ uint32_t bestOfLimit = 0 ;
1873+ std::optional<uint32_t > maxModelLength;
1874+ ASSERT_EQ (apiHandler->parseRequest (maxTokensLimit, bestOfLimit, maxModelLength), absl::OkStatus ());
1875+
1876+ ov::genai::VLMDecodedResults results;
1877+ std::string text = " OVMS" ;
1878+ results.texts = {text};
1879+ results.finish_reasons = {ov::genai::GenerationFinishReason::LENGTH };
1880+
1881+ std::string serialized = apiHandler->serializeUnaryResponse (results, text);
1882+ ASSERT_NE (serialized.find (" \" finish_reason\" :\" length\" " ), std::string::npos) << serialized;
1883+ }
1884+
17181885TEST_F (HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsBase64) {
17191886 std::string json = R"( {
17201887 "model": "llama",
0 commit comments