@@ -144,7 +144,9 @@ class AIAdapter {
144144
145145 virtual Status build_multimodal_embedding_request (
146146 const std::vector<MultimodalType>& /* media_types*/ ,
147- const std::vector<std::string>& /* media_urls*/ , std::string& /* request_body*/ ) const {
147+ const std::vector<std::string>& /* media_urls*/ ,
148+ const std::vector<std::string>& /* media_content_types*/ ,
149+ std::string& /* request_body*/ ) const {
148150 return Status::NotSupported (" {} does not support multimodal Embed feature." ,
149151 _config.provider_type );
150152 }
@@ -180,24 +182,19 @@ class AIAdapter {
180182 --end;
181183 }
182184
183- if (begin < end && text[begin] == ' [' && text[end - 1 ] == ' ]' && end - begin >= 4 &&
184- (text[begin + 1 ] == ' "' || text[begin + 1 ] == ' \' ' )) {
185+ if (begin < end && text[begin] == ' [' && text[end - 1 ] == ' ]' ) {
185186 rapidjson::Document doc;
186187 doc.Parse (text.data () + begin, end - begin);
187- if (doc.HasParseError ()) {
188- return Status::InternalError (" Invalid batch result format: {}" , std::string (text));
189- }
190- if (!doc.IsArray ()) {
191- return Status::InternalError (" Invalid batch result format: {}" , std::string (text));
192- }
193- for (rapidjson::SizeType i = 0 ; i < doc.Size (); ++i) {
194- if (!doc[i].IsString ()) {
195- return Status::InternalError (
196- " Invalid batch result format, array element {} is not a string" , i);
188+ if (!doc.HasParseError () && doc.IsArray ()) {
189+ for (rapidjson::SizeType i = 0 ; i < doc.Size (); ++i) {
190+ if (!doc[i].IsString ()) {
191+ return Status::InternalError (
192+ " Invalid batch result format, array element {} is not a string" , i);
193+ }
194+ results.emplace_back (doc[i].GetString (), doc[i].GetStringLength ());
197195 }
198- results. emplace_back (doc[i]. GetString () );
196+ return Status::OK ( );
199197 }
200- return Status::OK ();
201198 }
202199
203200 results.emplace_back (text.data (), text.size ());
@@ -276,6 +273,7 @@ class VoyageAIAdapter : public AIAdapter {
276273 }
277274
278275 Status build_embedding_request (const std::vector<std::string>& inputs,
276+ const std::vector<std::string>& /* media_content_types*/ ,
279277 std::string& request_body) const override {
280278 rapidjson::Document doc;
281279 doc.SetObject ();
@@ -307,9 +305,11 @@ class VoyageAIAdapter : public AIAdapter {
307305 return Status::OK ();
308306 }
309307
310- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
311- const std::vector<std::string>& media_urls,
312- std::string& request_body) const override {
308+ Status build_multimodal_embedding_request (
309+ const std::vector<MultimodalType>& media_types,
310+ const std::vector<std::string>& media_urls,
311+ const std::vector<std::string>& /* media_content_types*/ ,
312+ std::string& request_body) const override {
313313 RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
314314 " VoyageAI" , media_types, media_urls,
315315 {MultimodalType::IMAGE, MultimodalType::VIDEO}));
@@ -503,6 +503,7 @@ class LocalAdapter : public AIAdapter {
503503 }
504504
505505 Status build_embedding_request (const std::vector<std::string>& inputs,
506+ const std::vector<std::string>& /* media_content_types*/ ,
506507 std::string& request_body) const override {
507508 rapidjson::Document doc;
508509 doc.SetObject ();
@@ -529,9 +530,11 @@ class LocalAdapter : public AIAdapter {
529530 return Status::OK ();
530531 }
531532
532- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& /* media_types*/ ,
533- const std::vector<std::string>& /* media_urls*/ ,
534- std::string& /* request_body*/ ) const override {
533+ Status build_multimodal_embedding_request (
534+ const std::vector<MultimodalType>& /* media_types*/ ,
535+ const std::vector<std::string>& /* media_urls*/ ,
536+ const std::vector<std::string>& /* media_content_types*/ ,
537+ std::string& /* request_body*/ ) const override {
535538 return Status::NotSupported (" {} does not support multimodal Embed feature." ,
536539 _config.provider_type );
537540 }
@@ -886,9 +889,11 @@ class OpenAIAdapter : public VoyageAIAdapter {
886889 return Status::OK ();
887890 }
888891
889- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& /* media_types*/ ,
890- const std::vector<std::string>& /* media_urls*/ ,
891- std::string& /* request_body*/ ) const override {
892+ Status build_multimodal_embedding_request (
893+ const std::vector<MultimodalType>& /* media_types*/ ,
894+ const std::vector<std::string>& /* media_urls*/ ,
895+ const std::vector<std::string>& /* media_content_types*/ ,
896+ std::string& /* request_body*/ ) const override {
892897 return Status::NotSupported (" {} does not support multimodal Embed feature." ,
893898 _config.provider_type );
894899 }
@@ -904,6 +909,7 @@ class OpenAIAdapter : public VoyageAIAdapter {
904909class DeepSeekAdapter : public OpenAIAdapter {
905910public:
906911 Status build_embedding_request (const std::vector<std::string>& inputs,
912+ const std::vector<std::string>& /* media_content_types*/ ,
907913 std::string& request_body) const override {
908914 return embed_not_supported_status ();
909915 }
@@ -917,6 +923,7 @@ class DeepSeekAdapter : public OpenAIAdapter {
917923class MoonShotAdapter : public OpenAIAdapter {
918924public:
919925 Status build_embedding_request (const std::vector<std::string>& inputs,
926+ const std::vector<std::string>& /* media_content_types*/ ,
920927 std::string& request_body) const override {
921928 return embed_not_supported_status ();
922929 }
@@ -930,6 +937,7 @@ class MoonShotAdapter : public OpenAIAdapter {
930937class MinimaxAdapter : public OpenAIAdapter {
931938public:
932939 Status build_embedding_request (const std::vector<std::string>& inputs,
940+ const std::vector<std::string>& /* media_content_types*/ ,
933941 std::string& request_body) const override {
934942 rapidjson::Document doc;
935943 doc.SetObject ();
@@ -966,9 +974,11 @@ class ZhipuAdapter : public OpenAIAdapter {
966974
967975class QwenAdapter : public OpenAIAdapter {
968976public:
969- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
970- const std::vector<std::string>& media_urls,
971- std::string& request_body) const override {
977+ Status build_multimodal_embedding_request (
978+ const std::vector<MultimodalType>& media_types,
979+ const std::vector<std::string>& media_urls,
980+ const std::vector<std::string>& /* media_content_types*/ ,
981+ std::string& request_body) const override {
972982 RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
973983 " QWEN" , media_types, media_urls, {MultimodalType::IMAGE, MultimodalType::VIDEO}));
974984
@@ -1074,9 +1084,11 @@ class QwenAdapter : public OpenAIAdapter {
10741084
10751085class JinaAdapter : public VoyageAIAdapter {
10761086public:
1077- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
1078- const std::vector<std::string>& media_urls,
1079- std::string& request_body) const override {
1087+ Status build_multimodal_embedding_request (
1088+ const std::vector<MultimodalType>& media_types,
1089+ const std::vector<std::string>& media_urls,
1090+ const std::vector<std::string>& /* media_content_types*/ ,
1091+ std::string& request_body) const override {
10801092 RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
10811093 " JINA" , media_types, media_urls, {MultimodalType::IMAGE, MultimodalType::VIDEO}));
10821094
@@ -1257,6 +1269,7 @@ class GeminiAdapter : public AIAdapter {
12571269 }
12581270
12591271 Status build_embedding_request (const std::vector<std::string>& inputs,
1272+ const std::vector<std::string>& /* media_content_types*/ ,
12601273 std::string& request_body) const override {
12611274 rapidjson::Document doc;
12621275 doc.SetObject ();
@@ -1322,10 +1335,17 @@ class GeminiAdapter : public AIAdapter {
13221335
13231336 Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
13241337 const std::vector<std::string>& media_urls,
1338+ const std::vector<std::string>& media_content_types,
13251339 std::string& request_body) const override {
13261340 RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
13271341 " Gemini" , media_types, media_urls,
13281342 {MultimodalType::IMAGE, MultimodalType::AUDIO, MultimodalType::VIDEO}));
1343+ if (media_content_types.size () != media_urls.size ()) {
1344+ return Status::InvalidArgument (
1345+ " Gemini multimodal embed input size mismatch, media_content_types={}, "
1346+ " media_urls={}" ,
1347+ media_content_types.size (), media_urls.size ());
1348+ }
13291349
13301350 rapidjson::Document doc;
13311351 doc.SetObject ();
@@ -1337,7 +1357,7 @@ class GeminiAdapter : public AIAdapter {
13371357 "model": "models/gemini-embedding-2-preview",
13381358 "content": {
13391359 "parts": [
1340- {"file_data": {"mime_type": "image/png ", "file_uri": "<url>"}}
1360+ {"file_data": {"mime_type": "<original content_type> ", "file_uri": "<url>"}}
13411361 ]
13421362 },
13431363 "outputDimensionality": 768
@@ -1346,7 +1366,7 @@ class GeminiAdapter : public AIAdapter {
13461366 "model": "models/gemini-embedding-2-preview",
13471367 "content": {
13481368 "parts": [
1349- {"file_data": {"mime_type": "video/mp4 ", "file_uri": "<url>"}}
1369+ {"file_data": {"mime_type": "<original content_type> ", "file_uri": "<url>"}}
13501370 ]
13511371 },
13521372 "outputDimensionality": 768
@@ -1369,7 +1389,7 @@ class GeminiAdapter : public AIAdapter {
13691389 rapidjson::Value part (rapidjson::kObjectType );
13701390 rapidjson::Value file_data (rapidjson::kObjectType );
13711391 file_data.AddMember (" mime_type" ,
1372- rapidjson::Value (_gemini_mime_type (media_types [i]), allocator),
1392+ rapidjson::Value (media_content_types [i]. c_str ( ), allocator),
13731393 allocator);
13741394 file_data.AddMember (" file_uri" , rapidjson::Value (media_urls[i].c_str (), allocator),
13751395 allocator);
@@ -1447,19 +1467,6 @@ class GeminiAdapter : public AIAdapter {
14471467 }
14481468
14491469 std::string get_dimension_param_name () const override { return " outputDimensionality" ; }
1450-
1451- private:
1452- static const char * _gemini_mime_type (MultimodalType media_type) {
1453- switch (media_type) {
1454- case MultimodalType::IMAGE:
1455- return " image/png" ;
1456- case MultimodalType::AUDIO:
1457- return " audio/mpeg" ;
1458- case MultimodalType::VIDEO:
1459- return " video/mp4" ;
1460- }
1461- return " application/octet-stream" ;
1462- }
14631470};
14641471
14651472class AnthropicAdapter : public VoyageAIAdapter {
@@ -1585,9 +1592,11 @@ class MockAdapter : public AIAdapter {
15851592 return Status::OK ();
15861593 }
15871594
1588- Status build_multimodal_embedding_request (const std::vector<MultimodalType>& /* media_types*/ ,
1589- const std::vector<std::string>& /* media_urls*/ ,
1590- std::string& /* request_body*/ ) const override {
1595+ Status build_multimodal_embedding_request (
1596+ const std::vector<MultimodalType>& /* media_types*/ ,
1597+ const std::vector<std::string>& /* media_urls*/ ,
1598+ const std::vector<std::string>& /* media_content_types*/ ,
1599+ std::string& /* request_body*/ ) const override {
15911600 return Status::OK ();
15921601 }
15931602
0 commit comments