Skip to content

Commit 93de196

Browse files
authored
Merge pull request #3359 from alfadb/fix/glm-effort-mapping
修复 GLM 推理强度映射
2 parents bed12b7 + 89cfe24 commit 93de196

4 files changed

Lines changed: 170 additions & 0 deletions

File tree

backend/internal/service/gateway_request.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,53 @@ func ApplyThinkingEnabledFallback(effort *string, body []byte, mappedModel strin
12691269
return DefaultEffortForThinkingEnabled(mappedModel)
12701270
}
12711271

1272+
// NormalizeGLMOpenAIReasoningEffort rewrites OpenAI Chat Completions
1273+
// reasoning_effort values to the GLM native scale used by z.ai: high/max.
1274+
// It only applies to glm-* mapped models and leaves all other providers untouched.
1275+
func NormalizeGLMOpenAIReasoningEffort(body []byte, mappedModel string) ([]byte, bool) {
1276+
if !strings.HasPrefix(strings.ToLower(strings.TrimSpace(mappedModel)), "glm-") {
1277+
return body, false
1278+
}
1279+
1280+
path := "reasoning.effort"
1281+
raw := strings.TrimSpace(gjson.GetBytes(body, path).String())
1282+
if raw == "" {
1283+
path = "reasoning_effort"
1284+
raw = strings.TrimSpace(gjson.GetBytes(body, path).String())
1285+
}
1286+
if raw == "" {
1287+
return body, false
1288+
}
1289+
1290+
mapped := normalizeGLMOpenAIReasoningEffort(raw)
1291+
if mapped == "" || mapped == raw {
1292+
return body, false
1293+
}
1294+
1295+
modified, err := sjson.SetBytes(body, path, mapped)
1296+
if err != nil {
1297+
return body, false
1298+
}
1299+
return modified, true
1300+
}
1301+
1302+
func normalizeGLMOpenAIReasoningEffort(raw string) string {
1303+
value := strings.ToLower(strings.TrimSpace(raw))
1304+
if value == "" {
1305+
return ""
1306+
}
1307+
value = strings.NewReplacer("-", "", "_", "", " ", "").Replace(value)
1308+
1309+
switch value {
1310+
case "low", "medium", "high":
1311+
return "high"
1312+
case "xhigh", "extrahigh", "max", "ultracode":
1313+
return "max"
1314+
default:
1315+
return ""
1316+
}
1317+
}
1318+
12721319
// =========================
12731320
// Thinking Budget Rectifier
12741321
// =========================

backend/internal/service/gateway_request_test.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,3 +1512,96 @@ func TestApplyThinkingEnabledFallback(t *testing.T) {
15121512
})
15131513
}
15141514
}
1515+
1516+
func TestNormalizeGLMOpenAIReasoningEffort(t *testing.T) {
1517+
tests := []struct {
1518+
name string
1519+
model string
1520+
input string
1521+
wantApplied bool
1522+
wantPath string
1523+
wantValue string
1524+
wantUnchanged bool
1525+
}{
1526+
{
1527+
name: "flat xhigh maps to max",
1528+
model: "glm-5.2",
1529+
input: `{"model":"glm-5.2","reasoning_effort":"xhigh","messages":[]}`,
1530+
wantApplied: true,
1531+
wantPath: "reasoning_effort",
1532+
wantValue: "max",
1533+
},
1534+
{
1535+
name: "flat x-high maps to max",
1536+
model: "GLM-5.2",
1537+
input: `{"model":"glm-5.2","reasoning_effort":"x-high","messages":[]}`,
1538+
wantApplied: true,
1539+
wantPath: "reasoning_effort",
1540+
wantValue: "max",
1541+
},
1542+
{
1543+
name: "flat ultracode maps to max",
1544+
model: "glm-5.2",
1545+
input: `{"model":"glm-5.2","reasoning_effort":"ultracode","messages":[]}`,
1546+
wantApplied: true,
1547+
wantPath: "reasoning_effort",
1548+
wantValue: "max",
1549+
},
1550+
{
1551+
name: "flat medium maps to high",
1552+
model: "glm-5.2",
1553+
input: `{"model":"glm-5.2","reasoning_effort":"medium","messages":[]}`,
1554+
wantApplied: true,
1555+
wantPath: "reasoning_effort",
1556+
wantValue: "high",
1557+
},
1558+
{
1559+
name: "nested high case-normalizes",
1560+
model: "glm-5.2",
1561+
input: `{"model":"glm-5.2","reasoning":{"effort":"HIGH"},"messages":[]}`,
1562+
wantApplied: true,
1563+
wantPath: "reasoning.effort",
1564+
wantValue: "high",
1565+
},
1566+
{
1567+
name: "native max unchanged",
1568+
model: "glm-5.2",
1569+
input: `{"model":"glm-5.2","reasoning_effort":"max","messages":[]}`,
1570+
wantApplied: false,
1571+
wantUnchanged: true,
1572+
},
1573+
{
1574+
name: "non glm unchanged",
1575+
model: "deepseek-v4-pro",
1576+
input: `{"model":"deepseek-v4-pro","reasoning_effort":"xhigh","messages":[]}`,
1577+
wantApplied: false,
1578+
wantUnchanged: true,
1579+
},
1580+
{
1581+
name: "missing effort unchanged",
1582+
model: "glm-5.2",
1583+
input: `{"model":"glm-5.2","messages":[]}`,
1584+
wantApplied: false,
1585+
wantUnchanged: true,
1586+
},
1587+
{
1588+
name: "unknown effort unchanged",
1589+
model: "glm-5.2",
1590+
input: `{"model":"glm-5.2","reasoning_effort":"banana","messages":[]}`,
1591+
wantApplied: false,
1592+
wantUnchanged: true,
1593+
},
1594+
}
1595+
1596+
for _, tt := range tests {
1597+
t.Run(tt.name, func(t *testing.T) {
1598+
got, applied := NormalizeGLMOpenAIReasoningEffort([]byte(tt.input), tt.model)
1599+
require.Equal(t, tt.wantApplied, applied)
1600+
if tt.wantUnchanged {
1601+
require.Equal(t, tt.input, string(got))
1602+
return
1603+
}
1604+
require.Equal(t, tt.wantValue, gjson.GetBytes(got, tt.wantPath).String())
1605+
})
1606+
}
1607+
}

backend/internal/service/openai_gateway_chat_completions_raw.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ func (s *OpenAIGatewayService) forwardAsRawChatCompletions(
8989
if upstreamModel != originalModel {
9090
upstreamBody = ReplaceModelInBody(body, upstreamModel)
9191
}
92+
if normalizedBody, normalized := NormalizeGLMOpenAIReasoningEffort(upstreamBody, upstreamModel); normalized {
93+
upstreamBody = normalizedBody
94+
}
9295

9396
// 4. Apply OpenAI fast policy on the CC body
9497
updatedBody, policyErr := s.applyOpenAIFastPolicyToBody(ctx, account, upstreamModel, upstreamBody)

backend/internal/service/openai_gateway_chat_completions_raw_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,33 @@ func TestForwardAsRawChatCompletions_PreservesDeepSeekReasoningContentInRequest(
224224
require.Equal(t, "get_weather", gjson.GetBytes(upstream.lastBody, "messages.1.tool_calls.0.function.name").String())
225225
}
226226

227+
func TestForwardAsRawChatCompletions_NormalizesGLMReasoningEffortForUpstream(t *testing.T) {
228+
gin.SetMode(gin.TestMode)
229+
230+
body := []byte(`{"model":"glm-5.2","messages":[{"role":"user","content":"hello"}],"reasoning_effort":"xhigh","stream":false}`)
231+
rec := httptest.NewRecorder()
232+
c, _ := gin.CreateTestContext(rec)
233+
c.Request = httptest.NewRequest(http.MethodPost, "/v1/chat/completions", bytes.NewReader(body))
234+
c.Request.Header.Set("Content-Type", "application/json")
235+
236+
upstream := &httpUpstreamRecorder{resp: &http.Response{
237+
StatusCode: http.StatusOK,
238+
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid_glm_effort"}},
239+
Body: io.NopCloser(strings.NewReader(`{"id":"chatcmpl_glm","object":"chat.completion","model":"glm-5.2","choices":[{"index":0,"message":{"role":"assistant","content":"ok"},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3}}`)),
240+
}}
241+
242+
svc := &OpenAIGatewayService{
243+
cfg: rawChatCompletionsTestConfig(),
244+
httpUpstream: upstream,
245+
}
246+
account := rawChatCompletionsTestAccount()
247+
248+
result, err := svc.forwardAsRawChatCompletions(context.Background(), c, account, body, "")
249+
require.NoError(t, err)
250+
require.NotNil(t, result)
251+
require.Equal(t, "max", gjson.GetBytes(upstream.lastBody, "reasoning_effort").String())
252+
}
253+
227254
func TestForwardAsRawChatCompletions_SilentRefusalTriggersFailover(t *testing.T) {
228255
gin.SetMode(gin.TestMode)
229256

0 commit comments

Comments
 (0)