Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions relay/channel/vertex/adaptor.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package vertex

import (
"bytes"
"encoding/json"
"errors"
"fmt"
Expand All @@ -10,6 +11,7 @@ import (

"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/relay/channel"
"github.com/QuantumNous/new-api/relay/channel/claude"
"github.com/QuantumNous/new-api/relay/channel/gemini"
Expand Down Expand Up @@ -190,7 +192,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
suffix = "generateContent"
}

if strings.HasPrefix(info.UpstreamModelName, "imagen") {
if strings.HasPrefix(info.UpstreamModelName, "imagen") || strings.Contains(info.UpstreamModelName, "embedding") {
suffix = "predict"
}
return a.getRequestUrl(info, info.UpstreamModelName, suffix)
Expand Down Expand Up @@ -314,8 +316,8 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt
}

func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
//TODO implement me
return nil, errors.New("not implemented")
geminiAdaptor := gemini.Adaptor{}
return geminiAdaptor.ConvertEmbeddingRequest(c, info, request)
}

func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
Expand All @@ -324,6 +326,67 @@ func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommo
}

func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
if a.RequestMode == RequestModeGemini && strings.Contains(c.Request.URL.Path, "embed") {
bodyBytes, err := io.ReadAll(requestBody)
if err != nil {
return nil, err
}

vertexReq := make(map[string]interface{})
instances := make([]interface{}, 0)

if info.IsGeminiBatchEmbedding {
var req dto.GeminiBatchEmbeddingRequest
if err := common.Unmarshal(bodyBytes, &req); err == nil {
for _, r := range req.Requests {
instance := make(map[string]interface{})
content := ""
for _, part := range r.Content.Parts {
if part.Text != "" {
content += part.Text
}
}
instance["content"] = content
if r.TaskType != "" {
instance["task_type"] = r.TaskType
}
if r.Title != "" {
instance["title"] = r.Title
}
instances = append(instances, instance)
}
}
} else {
var req dto.GeminiEmbeddingRequest
if err := common.Unmarshal(bodyBytes, &req); err == nil {
instance := make(map[string]interface{})
content := ""
for _, part := range req.Content.Parts {
if part.Text != "" {
content += part.Text
}
}
instance["content"] = content
if req.TaskType != "" {
instance["task_type"] = req.TaskType
}
if req.Title != "" {
instance["title"] = req.Title
}
instances = append(instances, instance)

if req.OutputDimensionality > 0 {
vertexReq["parameters"] = map[string]interface{}{
"outputDimensionality": req.OutputDimensionality,
}
}
}
}
vertexReq["instances"] = instances
newBodyBytes, _ := common.Marshal(vertexReq)
requestBody = bytes.NewReader(newBodyBytes)
logger.LogDebug(c, "Vertex Embedding request body: "+string(newBodyBytes))
}
return channel.DoApiRequest(a, c, info, requestBody)
}
Comment on lines 328 to 391
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Surface body-conversion errors instead of swallowing them.

The new embedding rewrite path silently ignores both unmarshal and marshal failures:

  • Lines 340 and 361: if err := common.Unmarshal(bodyBytes, &req); err == nil { ... } — when parsing fails, control just falls through and the code sends {"instances":[]} to Vertex. The client then sees a confusing upstream 400 INVALID_ARGUMENT: Should provide instances for text model prediction while the real parse error is gone.
  • Line 386: newBodyBytes, _ := common.Marshal(vertexReq) — if marshalling fails, an empty/nil body is forwarded silently.

Please return the error in both cases so misuse / schema drift is observable.

🛠️ Suggested fix
 		if info.IsGeminiBatchEmbedding {
 			var req dto.GeminiBatchEmbeddingRequest
-			if err := common.Unmarshal(bodyBytes, &req); err == nil {
-				for _, r := range req.Requests {
-					instance := make(map[string]interface{})
-					content := ""
-					for _, part := range r.Content.Parts {
-						if part.Text != "" {
-							content += part.Text
-						}
-					}
-					instance["content"] = content
-					if r.TaskType != "" {
-						instance["task_type"] = r.TaskType
-					}
-					if r.Title != "" {
-						instance["title"] = r.Title
-					}
-					instances = append(instances, instance)
-				}
-			}
+			if err := common.Unmarshal(bodyBytes, &req); err != nil {
+				return nil, fmt.Errorf("failed to parse gemini batch embedding request: %w", err)
+			}
+			for _, r := range req.Requests {
+				instance := make(map[string]interface{})
+				content := ""
+				for _, part := range r.Content.Parts {
+					if part.Text != "" {
+						content += part.Text
+					}
+				}
+				instance["content"] = content
+				if r.TaskType != "" {
+					instance["task_type"] = r.TaskType
+				}
+				if r.Title != "" {
+					instance["title"] = r.Title
+				}
+				instances = append(instances, instance)
+			}
 		} else {
 			var req dto.GeminiEmbeddingRequest
-			if err := common.Unmarshal(bodyBytes, &req); err == nil {
-				instance := make(map[string]interface{})
-				content := ""
-				for _, part := range req.Content.Parts {
-					if part.Text != "" {
-						content += part.Text
-					}
-				}
-				instance["content"] = content
-				if req.TaskType != "" {
-					instance["task_type"] = req.TaskType
-				}
-				if req.Title != "" {
-					instance["title"] = req.Title
-				}
-				instances = append(instances, instance)
-
-				if req.OutputDimensionality > 0 {
-					vertexReq["parameters"] = map[string]interface{}{
-						"outputDimensionality": req.OutputDimensionality,
-					}
-				}
-			}
+			if err := common.Unmarshal(bodyBytes, &req); err != nil {
+				return nil, fmt.Errorf("failed to parse gemini embedding request: %w", err)
+			}
+			instance := make(map[string]interface{})
+			content := ""
+			for _, part := range req.Content.Parts {
+				if part.Text != "" {
+					content += part.Text
+				}
+			}
+			instance["content"] = content
+			if req.TaskType != "" {
+				instance["task_type"] = req.TaskType
+			}
+			if req.Title != "" {
+				instance["title"] = req.Title
+			}
+			instances = append(instances, instance)
+
+			if req.OutputDimensionality > 0 {
+				vertexReq["parameters"] = map[string]interface{}{
+					"outputDimensionality": req.OutputDimensionality,
+				}
+			}
 		}
 		vertexReq["instances"] = instances
-		newBodyBytes, _ := common.Marshal(vertexReq)
+		newBodyBytes, err := common.Marshal(vertexReq)
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal vertex embedding request: %w", err)
+		}
 		requestBody = bytes.NewReader(newBodyBytes)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@relay/channel/vertex/adaptor.go` around lines 328 - 391, The DoRequest
embedding-rewrite silently swallows Unmarshal and Marshal errors, causing
confusing upstream 400s; update the blocks in DoRequest (function
Adaptor.DoRequest) so that when common.Unmarshal(bodyBytes, &req) returns a
non-nil error you immediately return nil and that error (or a wrapped error with
context like "gemini embedding unmarshal"), and likewise check the error from
common.Marshal(vertexReq) and return it instead of ignoring it; reference the
Unmarshal locations that handle dto.GeminiBatchEmbeddingRequest and
dto.GeminiEmbeddingRequest and the Marshal call that produces newBodyBytes to
make the changes.


Expand All @@ -347,6 +410,9 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
case RequestModeClaude:
return claudeAdaptor.DoResponse(c, resp, info)
case RequestModeGemini:
if isVertexEmbedding(info) {
return vertexEmbeddingHandler(c, resp, info)
}
if info.RelayMode == constant.RelayModeGemini {
return gemini.GeminiTextGenerationHandler(c, info, resp)
} else {
Expand Down
94 changes: 93 additions & 1 deletion relay/channel/vertex/relay-vertex.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
package vertex

import "github.com/QuantumNous/new-api/common"
import (
"io"
"net/http"
"strings"

"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/types"

"github.com/gin-gonic/gin"
)

// isVertexEmbedding decides whether to route the response through the Vertex
// embedding handler. Both the OpenAI-compatible /v1/embeddings path and the
// Gemini-native :embedContent / :batchEmbedContents paths land here, plus
// embedding model names regardless of relay mode.
func isVertexEmbedding(info *relaycommon.RelayInfo) bool {
if strings.Contains(info.RequestURLPath, "embed") {
return true
}
m := info.UpstreamModelName
return strings.HasPrefix(m, "gemini-embedding") ||
strings.HasPrefix(m, "text-embedding") ||
strings.HasPrefix(m, "text-multilingual-embedding")
}

func GetModelRegion(other string, localModelName string) string {
// if other is json string
Expand All @@ -20,3 +47,68 @@ func GetModelRegion(other string, localModelName string) string {
}
return other
}

type VertexEmbeddingResponse struct {
Predictions []struct {
Embeddings struct {
Statistics struct {
TokenCount int `json:"token_count"`
} `json:"statistics"`
Values []float64 `json:"values"`
} `json:"embeddings"`
} `json:"predictions"`
Metadata struct {
BillableCharacterCount int `json:"billableCharacterCount"`
} `json:"metadata"`
}

func vertexEmbeddingHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.Usage, *types.NewAPIError) {
defer service.CloseResponseBodyGracefully(resp)

responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}

if common.DebugEnabled {
logger.LogDebug(c, "Vertex Embedding response body: "+string(responseBody))
}

var vertexResponse VertexEmbeddingResponse
if err := common.Unmarshal(responseBody, &vertexResponse); err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}

openAIResponse := dto.OpenAIEmbeddingResponse{
Object: "list",
Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(vertexResponse.Predictions)),
Model: info.UpstreamModelName,
}

tokenCount := 0
for i, prediction := range vertexResponse.Predictions {
openAIResponse.Data = append(openAIResponse.Data, dto.OpenAIEmbeddingResponseItem{
Object: "embedding",
Embedding: prediction.Embeddings.Values,
Index: i,
})
tokenCount += prediction.Embeddings.Statistics.TokenCount
}

usage := &dto.Usage{
PromptTokens: tokenCount,
TotalTokens: tokenCount,
}
openAIResponse.Usage = *usage

jsonResponse, err := common.Marshal(openAIResponse)
if err != nil {
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
}

c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(http.StatusOK)
_, _ = c.Writer.Write(jsonResponse)

return usage, nil
}
Comment on lines +65 to +114
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check whether other handlers in this repo guard on resp.StatusCode and how relay dispatches DoResponse.
rg -nP -C3 '\bresp\.StatusCode\b' relay/channel/vertex relay/channel/gemini
rg -nP -C5 'DoResponse\(' relay/relay_adaptor.go relay/relay_text.go relay/relay_embedding.go 2>/dev/null
fd -t f -e go . relay | xargs rg -nP -C2 'StatusCode\s*!=\s*http\.StatusOK' | head -n 80

Repository: QuantumNous/new-api

Length of output: 6210


Add status code check to vertexEmbeddingHandler.

vertexEmbeddingHandler unconditionally unmarshals resp.Body without checking resp.StatusCode. When Vertex returns an error (4xx/5xx), the body contains {"error":{...}} which silently unmarshals into a struct with empty Predictions. The client receives a 200 OK with an empty embedding list while the billing layer records 0 tokens for what was actually an upstream failure.

Add an early return on non-2xx status, mirroring the pattern used throughout the relay framework (e.g., relay/embedding_handler.go:72, relay/claude_handler.go:196):

if resp.StatusCode != http.StatusOK {
    newAPIError := service.RelayErrorHandler(c.Request.Context(), resp, false)
    return nil, newAPIError
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@relay/channel/vertex/relay-vertex.go` around lines 65 - 114,
vertexEmbeddingHandler currently unmarshals resp.Body even for non-2xx upstream
responses, producing empty Predictions and returning 200; add an early status
check after reading the body (or immediately after defer) to mirror the relay
pattern: if resp.StatusCode != http.StatusOK call
service.RelayErrorHandler(c.Request.Context(), resp, false) and return its
result as the *types.NewAPIError so the handler stops and propagates the proper
error; update vertexEmbeddingHandler (and ensure this behavior applies before
unmarshalling into VertexEmbeddingResponse and before constructing the
OpenAIEmbeddingResponse/usage).