@@ -180,7 +180,8 @@ class AIAdapter {
180180 --end;
181181 }
182182
183- if (begin < end && text[begin] == ' [' && text[end - 1 ] == ' ]' ) {
183+ if (begin < end && text[begin] == ' [' && text[end - 1 ] == ' ]' && end - begin >= 4 &&
184+ (text[begin + 1 ] == ' "' || text[begin + 1 ] == ' \' ' )) {
184185 rapidjson::Document doc;
185186 doc.Parse (text.data () + begin, end - begin);
186187 if (doc.HasParseError ()) {
@@ -217,6 +218,50 @@ class AIAdapter {
217218 doc.AddMember (name, _config.dimensions , allocator);
218219 }
219220 }
221+
222+ // Validates common multimodal embedding request invariants shared by providers.
223+ Status validate_multimodal_embedding_inputs (
224+ std::string_view provider_name, const std::vector<MultimodalType>& media_types,
225+ const std::vector<std::string>& media_urls,
226+ std::initializer_list<MultimodalType> supported_types) const {
227+ if (media_urls.empty ()) {
228+ return Status::InvalidArgument (" {} multimodal embed inputs can not be empty" ,
229+ provider_name);
230+ }
231+ if (media_types.size () != media_urls.size ()) {
232+ return Status::InvalidArgument (
233+ " {} multimodal embed input size mismatch, media_types={}, media_urls={}" ,
234+ provider_name, media_types.size (), media_urls.size ());
235+ }
236+ for (MultimodalType media_type : media_types) {
237+ bool supported = false ;
238+ for (MultimodalType supported_type : supported_types) {
239+ if (media_type == supported_type) {
240+ supported = true ;
241+ break ;
242+ }
243+ }
244+ if (!supported) [[unlikely]] {
245+ return Status::InvalidArgument (
246+ " {} only supports {} multimodal embed, got {}" , provider_name,
247+ supported_multimodal_types_to_string (supported_types),
248+ multimodal_type_to_string (media_type));
249+ }
250+ }
251+ return Status::OK ();
252+ }
253+
254+ static std::string supported_multimodal_types_to_string (
255+ std::initializer_list<MultimodalType> supported_types) {
256+ std::string result;
257+ for (MultimodalType type : supported_types) {
258+ if (!result.empty ()) {
259+ result += " /" ;
260+ }
261+ result += multimodal_type_to_string (type);
262+ }
263+ return result;
264+ }
220265};
221266
222267// Most LLM-providers' Embedding formats are based on VoyageAI.
@@ -265,22 +310,9 @@ class VoyageAIAdapter : public AIAdapter {
265310 Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
266311 const std::vector<std::string>& media_urls,
267312 std::string& request_body) const override {
268- if (media_urls.empty ()) {
269- return Status::InvalidArgument (" VoyageAI multimodal embed inputs can not be empty" );
270- }
271- if (media_types.size () != media_urls.size ()) {
272- return Status::InvalidArgument (
273- " VoyageAI multimodal embed input size mismatch, media_types={}, media_urls={}" ,
274- media_types.size (), media_urls.size ());
275- }
276- for (MultimodalType media_type : media_types) {
277- if (media_type != MultimodalType::IMAGE && media_type != MultimodalType::VIDEO)
278- [[unlikely]] {
279- return Status::InvalidArgument (
280- " VoyageAI only supports image/video multimodal embed, got {}" ,
281- multimodal_type_to_string (media_type));
282- }
283- }
313+ RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
314+ " VoyageAI" , media_types, media_urls,
315+ {MultimodalType::IMAGE, MultimodalType::VIDEO}));
284316 if (_config.dimensions != -1 ) {
285317 LOG (WARNING) << " VoyageAI multimodal embedding currently ignores dimensions parameter, "
286318 << " model=" << _config.model_name << " , dimensions=" << _config.dimensions ;
@@ -937,21 +969,8 @@ class QwenAdapter : public OpenAIAdapter {
937969 Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
938970 const std::vector<std::string>& media_urls,
939971 std::string& request_body) const override {
940- if (media_urls.empty ()) {
941- return Status::InvalidArgument (" QWEN multimodal embed inputs can not be empty" );
942- }
943- if (media_types.size () != media_urls.size ()) {
944- return Status::InvalidArgument (
945- " QWEN multimodal embed input size mismatch, media_types={}, media_urls={}" ,
946- media_types.size (), media_urls.size ());
947- }
948- for (MultimodalType media_type : media_types) {
949- if (media_type != MultimodalType::IMAGE && media_type != MultimodalType::VIDEO) {
950- return Status::InvalidArgument (
951- " QWEN only supports image/video multimodal embed, got {}" ,
952- multimodal_type_to_string (media_type));
953- }
954- }
972+ RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
973+ " QWEN" , media_types, media_urls, {MultimodalType::IMAGE, MultimodalType::VIDEO}));
955974
956975 rapidjson::Document doc;
957976 doc.SetObject ();
@@ -1058,22 +1077,8 @@ class JinaAdapter : public VoyageAIAdapter {
10581077 Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
10591078 const std::vector<std::string>& media_urls,
10601079 std::string& request_body) const override {
1061- if (media_urls.empty ()) {
1062- return Status::InvalidArgument (" JINA multimodal embed inputs can not be empty" );
1063- }
1064- if (media_types.size () != media_urls.size ()) {
1065- return Status::InvalidArgument (
1066- " JINA multimodal embed input size mismatch, media_types={}, media_urls={}" ,
1067- media_types.size (), media_urls.size ());
1068- }
1069- for (MultimodalType media_type : media_types) {
1070- if (media_type != MultimodalType::IMAGE && media_type != MultimodalType::VIDEO)
1071- [[unlikely]] {
1072- return Status::InvalidArgument (
1073- " JINA only supports image/video multimodal embed, got {}" ,
1074- multimodal_type_to_string (media_type));
1075- }
1076- }
1080+ RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
1081+ " JINA" , media_types, media_urls, {MultimodalType::IMAGE, MultimodalType::VIDEO}));
10771082
10781083 rapidjson::Document doc;
10791084 doc.SetObject ();
@@ -1318,14 +1323,9 @@ class GeminiAdapter : public AIAdapter {
13181323 Status build_multimodal_embedding_request (const std::vector<MultimodalType>& media_types,
13191324 const std::vector<std::string>& media_urls,
13201325 std::string& request_body) const override {
1321- if (media_urls.empty ()) {
1322- return Status::InvalidArgument (" Gemini multimodal embed inputs can not be empty" );
1323- }
1324- if (media_types.size () != media_urls.size ()) {
1325- return Status::InvalidArgument (
1326- " Gemini multimodal embed input size mismatch, media_types={}, media_urls={}" ,
1327- media_types.size (), media_urls.size ());
1328- }
1326+ RETURN_IF_ERROR (validate_multimodal_embedding_inputs (
1327+ " Gemini" , media_types, media_urls,
1328+ {MultimodalType::IMAGE, MultimodalType::AUDIO, MultimodalType::VIDEO}));
13291329
13301330 rapidjson::Document doc;
13311331 doc.SetObject ();
0 commit comments