Skip to content

Commit a838645

Browse files
authored
Merge pull request #696 from Yumiue/codex/gateway-plan-approval-rpc
修复 max_turn_exceeded 中断提示与错误透传
2 parents 5c8c3cd + 7372b00 commit a838645

14 files changed

Lines changed: 497 additions & 15 deletions

docs/gateway-error-catalog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
| `missing_required_field` | 200 | -32602 | 缺失必填字段(如 `params.session_id``params.request_id``payload.run_id`)。 | 直接失败,补齐字段。 |
1111
| `unsupported_action` | 200 | -32601 | 方法不存在或当前版本未实现。 | 降级到兼容方法,或提示版本不支持。 |
1212
| `internal_error` | 200 | -32603 | 网关内部异常、运行时不可用、不可归类的执行失败。 | 可短暂重试;持续失败需告警。 |
13+
| `max_turn_exceeded` | 200 | -32602 | Runtime 达到 `runtime.max_turns` 后受控停止;异步 `gateway.run` 会通过 `run_error.stop_reason=max_turn_exceeded` 透传。 | 提示用户可继续发送消息、拆分任务或调高 `runtime.max_turns`,不要按网关内部错误告警。 |
1314
| `timeout` | 200 | -32603 | Gateway 调用 runtime 超过操作超时窗口。 | 可重试并增加客户端超时预算;必要时调用 `gateway.cancel`|
1415
| `unauthorized` | 401 | -32602 | 未提供有效 token 或连接未完成认证。 | 刷新凭据并重新认证,不建议盲重试。 |
1516
| `access_denied` | 403 | -32602 | 已认证但 ACL/主体权限不允许当前动作或资源访问。 | 直接失败,提示权限不足。 |

docs/reference/gateway-error-catalog.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
本文档用于第三方客户端实现统一异常处理策略,覆盖 Gateway 稳定错误码集合:
44

5-
`invalid_frame``invalid_action``invalid_multimodal_payload``missing_required_field``unsupported_action``internal_error``timeout``unauthorized``access_denied``resource_not_found`
5+
`invalid_frame``invalid_action``invalid_multimodal_payload``missing_required_field``unsupported_action``internal_error``max_turn_exceeded``timeout``unauthorized``access_denied``resource_not_found`
66

77
## 1. 错误码对照表
88

@@ -14,6 +14,7 @@
1414
| `missing_required_field` | `200` | `-32600` / `-32602` | 缺失必填字段。请求层字段缺失多映射为 `-32600`,方法参数层字段缺失多映射为 `-32602`| 缺失 `id`;缺失 `params``cancel` 缺失 `run_id`| 调整参数补齐必填项再重试。 |
1515
| `unsupported_action` | `200` | `-32601` | 方法未注册或不被网关识别。 | 调用不存在的方法名。 | 客户端按能力探测降级,或升级服务端版本。 |
1616
| `internal_error` | `200` | `-32603` | 网关内部异常或未分类下游异常。 | 结果编码失败;runtime port 不可用;未知运行时错误。 | 采用指数退避重试;持续失败时告警。 |
17+
| `max_turn_exceeded` | `200` | `-32602` | Runtime 达到 `runtime.max_turns` 后受控停止。 | 异步 `gateway.run` 通过 `run_error` 返回 `stop_reason=max_turn_exceeded`| 提示用户继续发送消息、拆分任务或调高 `runtime.max_turns`;不要按网关内部错误告警。 |
1718
| `timeout` | `200` | `-32603` | 网关调用 runtime 超时(`context.DeadlineExceeded`)。 | `run/compact/cancel/loadSession/resolvePermission` 下游调用超时。 | 可重试且建议带幂等键(如固定 `run_id`)。 |
1819
| `unauthorized` | `401`(仅 /rpc) | `-32602` | 请求未通过认证。 | 未携带 token;token 非法;连接未先 `authenticate`| 先刷新凭证并重新认证,认证成功后再发业务请求。 |
1920
| `access_denied` | `403`(仅 /rpc) | `-32602` | 已认证但不具备该方法或资源权限。 | ACL 拒绝当前来源调用该方法;runtime 返回 access denied。 | 终止当前请求并提示授权不足,不要盲重试。 |

internal/cli/gateway_runtime_bridge.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,9 @@ func (b *gatewayRuntimePortBridge) Run(ctx context.Context, input gateway.RunInp
314314
return err
315315
}
316316
err := b.runtime.Submit(ctx, convertGatewayRunInput(input))
317+
if agentruntime.IsMaxTurnLimitError(err) {
318+
return gateway.NewRuntimeMaxTurnExceededError(err.Error())
319+
}
317320
if err != nil && isRuntimeNotFoundError(err) {
318321
sessionID := strings.TrimSpace(input.SessionID)
319322
if sessionID == "" {
@@ -326,7 +329,11 @@ func (b *gatewayRuntimePortBridge) Run(ctx context.Context, input gateway.RunInp
326329
if _, createErr := creator.CreateSession(ctx, sessionID); createErr != nil {
327330
return err
328331
}
329-
return b.runtime.Submit(ctx, convertGatewayRunInput(input))
332+
retryErr := b.runtime.Submit(ctx, convertGatewayRunInput(input))
333+
if agentruntime.IsMaxTurnLimitError(retryErr) {
334+
return gateway.NewRuntimeMaxTurnExceededError(retryErr.Error())
335+
}
336+
return retryErr
330337
}
331338
return err
332339
}

internal/gateway/bootstrap.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -523,15 +523,19 @@ func dispatchRunFrameWithSubjectID(
523523
)
524524
}
525525
if relayExists && relay != nil {
526-
errorCode := "INTERNAL_ERROR"
526+
errorCode := ErrorCodeInternalError.String()
527527
errorMessage := "run failed"
528+
stopReason := ""
528529
if failedFrame.Error != nil {
529-
if normalizedCode := strings.ToUpper(strings.TrimSpace(failedFrame.Error.Code)); normalizedCode != "" {
530+
if normalizedCode := strings.TrimSpace(failedFrame.Error.Code); normalizedCode != "" {
530531
errorCode = normalizedCode
531532
}
532533
if normalizedMessage := strings.TrimSpace(failedFrame.Error.Message); normalizedMessage != "" {
533534
errorMessage = normalizedMessage
534535
}
536+
if strings.TrimSpace(failedFrame.Error.Code) == ErrorCodeMaxTurnExceeded.String() {
537+
stopReason = ErrorCodeMaxTurnExceeded.String()
538+
}
535539
}
536540
fallbackSessionID := strings.TrimSpace(frameSnapshot.SessionID)
537541
if fallbackSessionID == "" {
@@ -542,14 +546,18 @@ func dispatchRunFrameWithSubjectID(
542546
fallbackRunID = strings.TrimSpace(inputSnapshot.RunID)
543547
}
544548
if fallbackSessionID != "" {
549+
payload := map[string]any{
550+
"code": errorCode,
551+
"message": errorMessage,
552+
}
553+
if stopReason != "" {
554+
payload["stop_reason"] = stopReason
555+
}
545556
relay.PublishRuntimeEvent(RuntimeEvent{
546557
Type: RuntimeEventTypeRunError,
547558
SessionID: fallbackSessionID,
548559
RunID: fallbackRunID,
549-
Payload: map[string]any{
550-
"code": errorCode,
551-
"message": errorMessage,
552-
},
560+
Payload: payload,
553561
})
554562
}
555563
}
@@ -1842,6 +1850,13 @@ func runtimeCallFailedFrame(ctx context.Context, frame MessageFrame, err error,
18421850
case errors.Is(err, ErrRuntimeResourceNotFound):
18431851
errorCode = ErrorCodeResourceNotFound
18441852
message = fmt.Sprintf("%s target not found", normalizedOperation)
1853+
case errors.Is(err, ErrRuntimeMaxTurnExceeded):
1854+
errorCode = ErrorCodeMaxTurnExceeded
1855+
if detail := RuntimeMaxTurnExceededDetail(err); detail != "" {
1856+
message = detail
1857+
} else {
1858+
message = fmt.Sprintf("%s max turn exceeded", normalizedOperation)
1859+
}
18451860
case errors.Is(err, ErrRuntimeInvalidAction):
18461861
errorCode = ErrorCodeInvalidAction
18471862
message = fmt.Sprintf("%s invalid action", normalizedOperation)

internal/gateway/bootstrap_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,95 @@ ASSERT:
20572057
}
20582058
}
20592059

2060+
func TestDispatchRequestFrameRunMaxTurnFailurePublishesStopReason(t *testing.T) {
2061+
relay := NewStreamRelay(StreamRelayOptions{})
2062+
ctx, cancel := context.WithCancel(context.Background())
2063+
defer cancel()
2064+
2065+
connectionID := NewConnectionID()
2066+
connectionCtx := WithConnectionID(ctx, connectionID)
2067+
connectionCtx = WithStreamRelay(connectionCtx, relay)
2068+
2069+
messageCh := make(chan RelayMessage, 8)
2070+
if err := relay.RegisterConnection(ConnectionRegistration{
2071+
ConnectionID: connectionID,
2072+
Channel: StreamChannelIPC,
2073+
Context: connectionCtx,
2074+
Cancel: cancel,
2075+
Write: func(message RelayMessage) error {
2076+
messageCh <- message
2077+
return nil
2078+
},
2079+
Close: func() {},
2080+
}); err != nil {
2081+
t.Fatalf("register connection: %v", err)
2082+
}
2083+
defer relay.dropConnection(connectionID)
2084+
2085+
if err := relay.BindConnection(connectionID, StreamBinding{
2086+
SessionID: "run-session-max-turn",
2087+
RunID: "run-max-turn",
2088+
Channel: StreamChannelIPC,
2089+
Role: StreamRoleNone,
2090+
Explicit: true,
2091+
}); err != nil {
2092+
t.Fatalf("bind connection: %v", err)
2093+
}
2094+
2095+
runtime := &bootstrapRuntimeStub{
2096+
runFn: func(_ context.Context, _ RunInput) error {
2097+
return NewRuntimeMaxTurnExceededError("runtime: max turn limit reached (40)")
2098+
},
2099+
}
2100+
response := dispatchRequestFrame(connectionCtx, MessageFrame{
2101+
Type: FrameTypeRequest,
2102+
Action: FrameActionRun,
2103+
RequestID: "req-run-max-turn",
2104+
SessionID: "run-session-max-turn",
2105+
RunID: "run-max-turn",
2106+
InputText: "hello",
2107+
}, runtime)
2108+
if response.Type != FrameTypeAck {
2109+
t.Fatalf("response type = %q, want %q", response.Type, FrameTypeAck)
2110+
}
2111+
2112+
deadline := time.After(2 * time.Second)
2113+
for {
2114+
select {
2115+
case message := <-messageCh:
2116+
notification, ok := message.Payload.(protocol.JSONRPCNotification)
2117+
if !ok || notification.Method != protocol.MethodGatewayEvent {
2118+
continue
2119+
}
2120+
eventFrame := MessageFrame{}
2121+
raw, err := json.Marshal(notification.Params)
2122+
if err != nil {
2123+
t.Fatalf("marshal payload params: %v", err)
2124+
}
2125+
if err := json.Unmarshal(raw, &eventFrame); err != nil {
2126+
t.Fatalf("unmarshal event frame: %v", err)
2127+
}
2128+
payloadMap, _ := eventFrame.Payload.(map[string]any)
2129+
if strings.TrimSpace(fmt.Sprint(payloadMap["event_type"])) != string(RuntimeEventTypeRunError) {
2130+
continue
2131+
}
2132+
envelope, _ := payloadMap["payload"].(map[string]any)
2133+
if got := strings.TrimSpace(fmt.Sprint(envelope["code"])); got != ErrorCodeMaxTurnExceeded.String() {
2134+
t.Fatalf("payload.code = %q, want %q", got, ErrorCodeMaxTurnExceeded.String())
2135+
}
2136+
if got := strings.TrimSpace(fmt.Sprint(envelope["stop_reason"])); got != ErrorCodeMaxTurnExceeded.String() {
2137+
t.Fatalf("payload.stop_reason = %q, want %q", got, ErrorCodeMaxTurnExceeded.String())
2138+
}
2139+
if got := strings.TrimSpace(fmt.Sprint(envelope["message"])); got != "runtime: max turn limit reached (40)" {
2140+
t.Fatalf("payload.message = %q, want max turn detail", got)
2141+
}
2142+
return
2143+
case <-deadline:
2144+
t.Fatal("expected max-turn run_error event")
2145+
}
2146+
}
2147+
}
2148+
20602149
func TestRuntimeCallFailedFrameSanitizesErrorAndMapsCode(t *testing.T) {
20612150
var buf bytes.Buffer
20622151
ctx := WithGatewayLogger(context.Background(), log.New(&buf, "", 0))
@@ -2108,6 +2197,19 @@ func TestRuntimeCallFailedFrameSanitizesErrorAndMapsCode(t *testing.T) {
21082197
if invalidActionErr.Error.Message != "approve_plan invalid action" {
21092198
t.Fatalf("invalid action message = %q, want %q", invalidActionErr.Error.Message, "approve_plan invalid action")
21102199
}
2200+
2201+
maxTurnErr := runtimeCallFailedFrame(
2202+
context.Background(),
2203+
frame,
2204+
NewRuntimeMaxTurnExceededError("runtime: max turn limit reached (40)"),
2205+
"run",
2206+
)
2207+
if maxTurnErr.Error == nil || maxTurnErr.Error.Code != ErrorCodeMaxTurnExceeded.String() {
2208+
t.Fatalf("max turn error payload = %#v, want max_turn_exceeded", maxTurnErr.Error)
2209+
}
2210+
if maxTurnErr.Error.Message != "runtime: max turn limit reached (40)" {
2211+
t.Fatalf("max turn message = %q, want runtime detail", maxTurnErr.Error.Message)
2212+
}
21112213
}
21122214

21132215
func TestNormalizeRunID(t *testing.T) {

internal/gateway/errors.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ const (
1818
ErrorCodeUnsupportedAction ErrorCode = "unsupported_action"
1919
// ErrorCodeInternalError 表示网关内部错误。
2020
ErrorCodeInternalError ErrorCode = "internal_error"
21+
// ErrorCodeMaxTurnExceeded 表示 runtime 达到单次运行最大轮数后受控停止。
22+
ErrorCodeMaxTurnExceeded ErrorCode = "max_turn_exceeded"
2123
// ErrorCodeTimeout 表示网关下游调用超时。
2224
ErrorCodeTimeout ErrorCode = "timeout"
2325
// ErrorCodeUnauthorized 表示请求未通过认证校验。
@@ -41,6 +43,7 @@ var stableErrorCodes = map[string]struct{}{
4143
string(ErrorCodeMissingRequiredField): {},
4244
string(ErrorCodeUnsupportedAction): {},
4345
string(ErrorCodeInternalError): {},
46+
string(ErrorCodeMaxTurnExceeded): {},
4447
string(ErrorCodeTimeout): {},
4548
string(ErrorCodeUnauthorized): {},
4649
string(ErrorCodeAccessDenied): {},

internal/gateway/errors_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@ func TestStableErrorCodes(t *testing.T) {
1010
ErrorCodeMissingRequiredField,
1111
ErrorCodeUnsupportedAction,
1212
ErrorCodeInternalError,
13+
ErrorCodeMaxTurnExceeded,
1314
ErrorCodeTimeout,
1415
ErrorCodeUnauthorized,
1516
ErrorCodeAccessDenied,
17+
ErrorCodeResourceNotFound,
18+
ErrorCodeRunnerOffline,
19+
ErrorCodeCapabilityDenied,
20+
ErrorCodeToolExecutionFailed,
1621
}
1722

1823
for _, code := range codes {

internal/gateway/protocol/jsonrpc.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ const (
140140
GatewayCodeUnsupportedAction = "unsupported_action"
141141
// GatewayCodeInternalError 表示网关内部错误。
142142
GatewayCodeInternalError = "internal_error"
143+
// GatewayCodeMaxTurnExceeded 表示 runtime 达到单次运行最大轮数后受控停止。
144+
GatewayCodeMaxTurnExceeded = "max_turn_exceeded"
143145
// GatewayCodeTimeout 表示网关处理请求时发生超时。
144146
GatewayCodeTimeout = "timeout"
145147
// GatewayCodeUnsafePath 表示路径存在安全风险。
@@ -1201,7 +1203,8 @@ func MapGatewayCodeToJSONRPCCode(gatewayCode string) int {
12011203
GatewayCodeUnsafePath,
12021204
GatewayCodeUnauthorized,
12031205
GatewayCodeAccessDenied,
1204-
GatewayCodeResourceNotFound:
1206+
GatewayCodeResourceNotFound,
1207+
GatewayCodeMaxTurnExceeded:
12051208
return JSONRPCCodeInvalidParams
12061209
case GatewayCodeInternalError:
12071210
return JSONRPCCodeInternalError

internal/gateway/runtime_errors.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package gateway
22

3-
import "errors"
3+
import (
4+
"errors"
5+
"strings"
6+
)
47

58
var (
69
// ErrRuntimeAccessDenied 表示运行时拒绝当前主体访问目标资源。
@@ -9,4 +12,42 @@ var (
912
ErrRuntimeResourceNotFound = errors.New("runtime resource not found")
1013
// ErrRuntimeInvalidAction 表示运行时拒绝了语义非法或已过期的动作。
1114
ErrRuntimeInvalidAction = errors.New("runtime invalid action")
15+
// ErrRuntimeMaxTurnExceeded 表示运行时达到 runtime.max_turns 后受控停止。
16+
ErrRuntimeMaxTurnExceeded = errors.New("runtime max turn exceeded")
1217
)
18+
19+
// RuntimeMaxTurnExceededError 携带 runtime 原始 max_turns 停止说明,供 Gateway 对外展示。
20+
type RuntimeMaxTurnExceededError struct {
21+
Detail string
22+
}
23+
24+
// Error 返回可展示的 max_turns 停止说明。
25+
func (e RuntimeMaxTurnExceededError) Error() string {
26+
detail := strings.TrimSpace(e.Detail)
27+
if detail != "" {
28+
return detail
29+
}
30+
return ErrRuntimeMaxTurnExceeded.Error()
31+
}
32+
33+
// Unwrap 保留稳定哨兵错误,便于 errors.Is 做语义判断。
34+
func (e RuntimeMaxTurnExceededError) Unwrap() error {
35+
return ErrRuntimeMaxTurnExceeded
36+
}
37+
38+
// NewRuntimeMaxTurnExceededError 创建带细节的 max_turns 受控停止错误。
39+
func NewRuntimeMaxTurnExceededError(detail string) error {
40+
return RuntimeMaxTurnExceededError{Detail: detail}
41+
}
42+
43+
// RuntimeMaxTurnExceededDetail 提取 max_turns 受控停止错误中的展示文本。
44+
func RuntimeMaxTurnExceededDetail(err error) string {
45+
var target RuntimeMaxTurnExceededError
46+
if errors.As(err, &target) {
47+
return target.Error()
48+
}
49+
if errors.Is(err, ErrRuntimeMaxTurnExceeded) {
50+
return ErrRuntimeMaxTurnExceeded.Error()
51+
}
52+
return ""
53+
}

internal/runtime/errors_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,16 @@ func TestHandleRunErrorProviderErrorDoesNotWriteStdLog(t *testing.T) {
4040
}
4141

4242
}
43+
44+
func TestIsMaxTurnLimitError(t *testing.T) {
45+
err := newMaxTurnLimitError(40)
46+
if !IsMaxTurnLimitError(err) {
47+
t.Fatal("expected direct max turn error to be recognized")
48+
}
49+
if !IsMaxTurnLimitError(errors.Join(errors.New("outer"), err)) {
50+
t.Fatal("expected joined max turn error to be recognized")
51+
}
52+
if IsMaxTurnLimitError(errors.New("runtime: max turn limit reached (40)")) {
53+
t.Fatal("plain text error should not be treated as max turn error")
54+
}
55+
}

0 commit comments

Comments
 (0)