|
| 1 | +//go:build !windows |
| 2 | + |
| 3 | +package ptyproxy |
| 4 | + |
| 5 | +import ( |
| 6 | + "context" |
| 7 | + "crypto/sha256" |
| 8 | + "encoding/hex" |
| 9 | + "encoding/json" |
| 10 | + "errors" |
| 11 | + "fmt" |
| 12 | + "io" |
| 13 | + "runtime" |
| 14 | + "strings" |
| 15 | + "sync" |
| 16 | + "time" |
| 17 | + |
| 18 | + "neo-code/internal/gateway" |
| 19 | + gatewayclient "neo-code/internal/gateway/client" |
| 20 | + "neo-code/internal/gateway/protocol" |
| 21 | + "neo-code/internal/tools" |
| 22 | +) |
| 23 | + |
| 24 | +const ( |
| 25 | + diagnosisCacheTTL = 5 * time.Minute |
| 26 | + diagnosisCacheMaxEntries = 64 |
| 27 | + diagnosisAutoDedupeTTL = 10 * time.Second |
| 28 | + diagnosisQuickMaxConfidence = 0.55 |
| 29 | +) |
| 30 | + |
| 31 | +type preparedDiagnosisRequest struct { |
| 32 | + Payload []byte |
| 33 | + Fingerprint string |
| 34 | + SanitizedErrorLog string |
| 35 | + SanitizedCommand string |
| 36 | +} |
| 37 | + |
| 38 | +type diagnosisOutcome struct { |
| 39 | + Result tools.ToolResult |
| 40 | + Err error |
| 41 | +} |
| 42 | + |
| 43 | +type diagnosisFlight struct { |
| 44 | + done chan struct{} |
| 45 | + outcome diagnosisOutcome |
| 46 | +} |
| 47 | + |
| 48 | +type diagnosisCacheEntry struct { |
| 49 | + outcome diagnosisOutcome |
| 50 | + expiresAt time.Time |
| 51 | +} |
| 52 | + |
| 53 | +// diagnosisCoordinator 负责诊断请求去重、短期缓存与自动诊断去抖。 |
| 54 | +type diagnosisCoordinator struct { |
| 55 | + mu sync.Mutex |
| 56 | + inFlight map[string]*diagnosisFlight |
| 57 | + cache map[string]diagnosisCacheEntry |
| 58 | + cacheOrder []string |
| 59 | + recentAuto map[string]time.Time |
| 60 | + now func() time.Time |
| 61 | +} |
| 62 | + |
| 63 | +// newDiagnosisCoordinator 创建一次 shell 会话内复用的诊断调度器。 |
| 64 | +func newDiagnosisCoordinator() *diagnosisCoordinator { |
| 65 | + return &diagnosisCoordinator{ |
| 66 | + inFlight: make(map[string]*diagnosisFlight), |
| 67 | + cache: make(map[string]diagnosisCacheEntry), |
| 68 | + recentAuto: make(map[string]time.Time), |
| 69 | + now: time.Now, |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +// prepareDiagnoseRequest 统一构建脱敏后的 diagnose payload 与 fingerprint。 |
| 74 | +func prepareDiagnoseRequest( |
| 75 | + buffer *UTF8RingBuffer, |
| 76 | + options ManualShellOptions, |
| 77 | + socketPath string, |
| 78 | + trigger diagnoseTrigger, |
| 79 | +) (preparedDiagnosisRequest, error) { |
| 80 | + if buffer == nil { |
| 81 | + buffer = NewUTF8RingBuffer(DefaultRingBufferCapacity) |
| 82 | + } |
| 83 | + logSnapshot := buffer.SnapshotString() |
| 84 | + if strings.TrimSpace(trigger.OutputText) != "" { |
| 85 | + logSnapshot = trigger.OutputText |
| 86 | + } |
| 87 | + sanitizedErrorLog := SanitizeDiagnosisText(logSnapshot, defaultDiagnosisPayloadMaxBytes) |
| 88 | + if strings.TrimSpace(sanitizedErrorLog) == "" { |
| 89 | + sanitizedErrorLog = "no terminal output captured" |
| 90 | + } |
| 91 | + sanitizedCommand := SanitizeDiagnosisText(trigger.CommandText, 1024) |
| 92 | + |
| 93 | + requestArgs := diagnoseToolArgs{ |
| 94 | + ErrorLog: sanitizedErrorLog, |
| 95 | + OSEnv: map[string]string{ |
| 96 | + "os": runtime.GOOS, |
| 97 | + "shell": resolveShellPath(options.Shell), |
| 98 | + "cwd": options.Workdir, |
| 99 | + "socket": socketPath, |
| 100 | + }, |
| 101 | + CommandText: sanitizedCommand, |
| 102 | + ExitCode: trigger.ExitCode, |
| 103 | + } |
| 104 | + requestPayload, err := json.Marshal(requestArgs) |
| 105 | + if err != nil { |
| 106 | + return preparedDiagnosisRequest{}, err |
| 107 | + } |
| 108 | + return preparedDiagnosisRequest{ |
| 109 | + Payload: requestPayload, |
| 110 | + Fingerprint: fingerprintDiagnosisRequest(sanitizedCommand, trigger.ExitCode, sanitizedErrorLog), |
| 111 | + SanitizedErrorLog: sanitizedErrorLog, |
| 112 | + SanitizedCommand: sanitizedCommand, |
| 113 | + }, nil |
| 114 | +} |
| 115 | + |
| 116 | +// fingerprintDiagnosisRequest 为脱敏后的诊断输入生成稳定指纹。 |
| 117 | +func fingerprintDiagnosisRequest(command string, exitCode int, errorLog string) string { |
| 118 | + sum := sha256.Sum256([]byte(strings.Join([]string{ |
| 119 | + strings.TrimSpace(command), |
| 120 | + fmt.Sprint(exitCode), |
| 121 | + strings.TrimSpace(errorLog), |
| 122 | + }, "\x00"))) |
| 123 | + return hex.EncodeToString(sum[:]) |
| 124 | +} |
| 125 | + |
| 126 | +// executePreparedDiagnoseToolWithTimeout 执行已构建好的 diagnose payload。 |
| 127 | +func executePreparedDiagnoseToolWithTimeout( |
| 128 | + rpcClient *gatewayclient.GatewayRPCClient, |
| 129 | + options ManualShellOptions, |
| 130 | + prepared preparedDiagnosisRequest, |
| 131 | + timeout time.Duration, |
| 132 | +) (tools.ToolResult, error) { |
| 133 | + if rpcClient == nil { |
| 134 | + return tools.ToolResult{}, errors.New("诊断服务未就绪,请确认 gateway 已连接后重试") |
| 135 | + } |
| 136 | + |
| 137 | + callContext, cancel := context.WithTimeout(context.Background(), timeout) |
| 138 | + defer cancel() |
| 139 | + |
| 140 | + var frame gateway.MessageFrame |
| 141 | + if err := rpcClient.CallWithOptions( |
| 142 | + callContext, |
| 143 | + protocol.MethodGatewayExecuteSystemTool, |
| 144 | + protocol.ExecuteSystemToolParams{ |
| 145 | + Workdir: options.Workdir, |
| 146 | + ToolName: tools.ToolNameDiagnose, |
| 147 | + Arguments: prepared.Payload, |
| 148 | + }, |
| 149 | + &frame, |
| 150 | + gatewayclient.GatewayRPCCallOptions{ |
| 151 | + Timeout: timeout, |
| 152 | + Retries: 1, |
| 153 | + }, |
| 154 | + ); err != nil { |
| 155 | + if options.Stderr != nil { |
| 156 | + writeProxyf(options.Stderr, "neocode diag: executeSystemTool rpc failed: %v\n", err) |
| 157 | + } |
| 158 | + return tools.ToolResult{}, errors.New("诊断调用失败,请检查 gateway 连接后重试,或使用 `neocode diag -i` 继续排查") |
| 159 | + } |
| 160 | + |
| 161 | + if frame.Type == gateway.FrameTypeError && frame.Error != nil { |
| 162 | + if options.Stderr != nil { |
| 163 | + writeProxyf( |
| 164 | + options.Stderr, |
| 165 | + "neocode diag: gateway returned frame error code=%s message=%s\n", |
| 166 | + strings.TrimSpace(frame.Error.Code), |
| 167 | + strings.TrimSpace(frame.Error.Message), |
| 168 | + ) |
| 169 | + } |
| 170 | + return tools.ToolResult{}, errors.New("诊断服务暂不可用,请稍后重试,或使用 `neocode diag -i` 继续排查") |
| 171 | + } |
| 172 | + if frame.Type != gateway.FrameTypeAck { |
| 173 | + if options.Stderr != nil { |
| 174 | + writeProxyf(options.Stderr, "neocode diag: unexpected gateway frame type: %s\n", frame.Type) |
| 175 | + } |
| 176 | + return tools.ToolResult{}, errors.New("诊断服务返回异常响应,请稍后重试") |
| 177 | + } |
| 178 | + |
| 179 | + toolResult, err := decodeToolResult(frame.Payload) |
| 180 | + if err != nil { |
| 181 | + if options.Stderr != nil { |
| 182 | + writeProxyf(options.Stderr, "neocode diag: decode diagnose payload failed: %v\n", err) |
| 183 | + } |
| 184 | + return tools.ToolResult{}, errors.New("诊断结果解析失败,请重试或更新 NeoCode") |
| 185 | + } |
| 186 | + return toolResult, nil |
| 187 | +} |
| 188 | + |
| 189 | +// shouldDropAuto 判断自动诊断是否命中短窗口去抖。 |
| 190 | +func (c *diagnosisCoordinator) shouldDropAuto(fingerprint string) bool { |
| 191 | + if c == nil || strings.TrimSpace(fingerprint) == "" { |
| 192 | + return false |
| 193 | + } |
| 194 | + c.mu.Lock() |
| 195 | + defer c.mu.Unlock() |
| 196 | + now := c.currentTime() |
| 197 | + for key, seenAt := range c.recentAuto { |
| 198 | + if now.Sub(seenAt) > diagnosisAutoDedupeTTL { |
| 199 | + delete(c.recentAuto, key) |
| 200 | + } |
| 201 | + } |
| 202 | + if seenAt, ok := c.recentAuto[fingerprint]; ok && now.Sub(seenAt) <= diagnosisAutoDedupeTTL { |
| 203 | + return true |
| 204 | + } |
| 205 | + c.recentAuto[fingerprint] = now |
| 206 | + return false |
| 207 | +} |
| 208 | + |
| 209 | +// cached 返回仍在有效期内的缓存诊断结果。 |
| 210 | +func (c *diagnosisCoordinator) cached(fingerprint string) (diagnosisOutcome, bool) { |
| 211 | + if c == nil || strings.TrimSpace(fingerprint) == "" || !IsDiagCacheEnabledFromEnv() { |
| 212 | + return diagnosisOutcome{}, false |
| 213 | + } |
| 214 | + c.mu.Lock() |
| 215 | + defer c.mu.Unlock() |
| 216 | + entry, ok := c.cache[fingerprint] |
| 217 | + if !ok { |
| 218 | + return diagnosisOutcome{}, false |
| 219 | + } |
| 220 | + if c.currentTime().After(entry.expiresAt) { |
| 221 | + delete(c.cache, fingerprint) |
| 222 | + return diagnosisOutcome{}, false |
| 223 | + } |
| 224 | + return entry.outcome, true |
| 225 | +} |
| 226 | + |
| 227 | +// run 执行或复用一次诊断请求,成功结果会进入短期缓存。 |
| 228 | +func (c *diagnosisCoordinator) run( |
| 229 | + ctx context.Context, |
| 230 | + fingerprint string, |
| 231 | + execute func() (tools.ToolResult, error), |
| 232 | +) diagnosisOutcome { |
| 233 | + if c == nil || strings.TrimSpace(fingerprint) == "" || !IsDiagCacheEnabledFromEnv() { |
| 234 | + result, err := execute() |
| 235 | + return diagnosisOutcome{Result: result, Err: err} |
| 236 | + } |
| 237 | + if cached, ok := c.cached(fingerprint); ok { |
| 238 | + return cached |
| 239 | + } |
| 240 | + |
| 241 | + c.mu.Lock() |
| 242 | + if flight, ok := c.inFlight[fingerprint]; ok { |
| 243 | + c.mu.Unlock() |
| 244 | + return waitDiagnosisFlight(ctx, flight) |
| 245 | + } |
| 246 | + flight := &diagnosisFlight{done: make(chan struct{})} |
| 247 | + c.inFlight[fingerprint] = flight |
| 248 | + c.mu.Unlock() |
| 249 | + |
| 250 | + result, err := execute() |
| 251 | + outcome := diagnosisOutcome{Result: result, Err: err} |
| 252 | + |
| 253 | + c.mu.Lock() |
| 254 | + flight.outcome = outcome |
| 255 | + delete(c.inFlight, fingerprint) |
| 256 | + if err == nil { |
| 257 | + c.storeCacheLocked(fingerprint, outcome) |
| 258 | + } |
| 259 | + close(flight.done) |
| 260 | + c.mu.Unlock() |
| 261 | + return outcome |
| 262 | +} |
| 263 | + |
| 264 | +// waitDiagnosisFlight 等待已存在的同指纹诊断完成。 |
| 265 | +func waitDiagnosisFlight(ctx context.Context, flight *diagnosisFlight) diagnosisOutcome { |
| 266 | + if flight == nil { |
| 267 | + return diagnosisOutcome{Err: errors.New("diagnosis flight is nil")} |
| 268 | + } |
| 269 | + select { |
| 270 | + case <-ctx.Done(): |
| 271 | + return diagnosisOutcome{Err: ctx.Err()} |
| 272 | + case <-flight.done: |
| 273 | + return flight.outcome |
| 274 | + } |
| 275 | +} |
| 276 | + |
| 277 | +// storeCacheLocked 在持锁状态下写入缓存并维护容量上限。 |
| 278 | +func (c *diagnosisCoordinator) storeCacheLocked(fingerprint string, outcome diagnosisOutcome) { |
| 279 | + if c == nil || strings.TrimSpace(fingerprint) == "" { |
| 280 | + return |
| 281 | + } |
| 282 | + if _, exists := c.cache[fingerprint]; !exists { |
| 283 | + c.cacheOrder = append(c.cacheOrder, fingerprint) |
| 284 | + } |
| 285 | + c.cache[fingerprint] = diagnosisCacheEntry{ |
| 286 | + outcome: outcome, |
| 287 | + expiresAt: c.currentTime().Add(diagnosisCacheTTL), |
| 288 | + } |
| 289 | + for len(c.cacheOrder) > diagnosisCacheMaxEntries { |
| 290 | + oldest := c.cacheOrder[0] |
| 291 | + c.cacheOrder = c.cacheOrder[1:] |
| 292 | + delete(c.cache, oldest) |
| 293 | + } |
| 294 | +} |
| 295 | + |
| 296 | +// currentTime 返回可在测试中替换的当前时间。 |
| 297 | +func (c *diagnosisCoordinator) currentTime() time.Time { |
| 298 | + if c != nil && c.now != nil { |
| 299 | + return c.now() |
| 300 | + } |
| 301 | + return time.Now() |
| 302 | +} |
| 303 | + |
| 304 | +// renderDiagnosisInitialFeedback 输出诊断快速首响或低干扰预判。 |
| 305 | +func renderDiagnosisInitialFeedback(output io.Writer, prepared preparedDiagnosisRequest, isAuto bool) { |
| 306 | + if output == nil || !IsDiagFastResponseEnabledFromEnv() { |
| 307 | + return |
| 308 | + } |
| 309 | + hint, ok := buildDiagnosisQuickHint(prepared) |
| 310 | + if isAuto && !ok { |
| 311 | + return |
| 312 | + } |
| 313 | + if isAuto { |
| 314 | + writeProxyLine(output, "\n\033[36m[NeoCode Diagnosis]\033[0m 快速预判(低置信度,完整诊断稍后返回)") |
| 315 | + } else { |
| 316 | + writeProxyLine(output, "\n\033[36m[NeoCode Diagnosis]\033[0m 正在诊断,完整结果稍后返回。") |
| 317 | + if !ok { |
| 318 | + return |
| 319 | + } |
| 320 | + writeProxyLine(output, "快速预判(低置信度):") |
| 321 | + } |
| 322 | + if !ok { |
| 323 | + return |
| 324 | + } |
| 325 | + writeProxyf(output, "置信度: %.2f\n", hint.Confidence) |
| 326 | + writeProxyf(output, "可能根因: %s\n", strings.TrimSpace(hint.RootCause)) |
| 327 | + if len(hint.InvestigationCommands) > 0 { |
| 328 | + writeProxyLine(output, "建议先查:") |
| 329 | + for _, command := range hint.InvestigationCommands { |
| 330 | + writeProxyf(output, "- %s\n", strings.TrimSpace(command)) |
| 331 | + } |
| 332 | + } |
| 333 | +} |
| 334 | + |
| 335 | +// buildDiagnosisQuickHint 根据常见终端错误模式生成低置信度快速预判。 |
| 336 | +func buildDiagnosisQuickHint(prepared preparedDiagnosisRequest) (diagnoseToolResult, bool) { |
| 337 | + text := strings.ToLower(strings.TrimSpace(prepared.SanitizedErrorLog + "\n" + prepared.SanitizedCommand)) |
| 338 | + switch { |
| 339 | + case strings.Contains(text, "command not found") || strings.Contains(text, "not recognized as"): |
| 340 | + return quickHint("命令不存在或未加入 PATH。", []string{"which <command>", "echo $PATH"}), true |
| 341 | + case strings.Contains(text, "permission denied"): |
| 342 | + return quickHint("当前用户缺少执行或访问目标路径的权限。", []string{"ls -la", "id"}), true |
| 343 | + case strings.Contains(text, "no such file or directory") || strings.Contains(text, "cannot find the path"): |
| 344 | + return quickHint("路径或工作目录可能不正确,目标文件不存在。", []string{"pwd", "ls -la"}), true |
| 345 | + case strings.Contains(text, "address already in use") || strings.Contains(text, "port already in use"): |
| 346 | + return quickHint("端口已被其他进程占用。", []string{"lsof -i :<port>", "netstat -an | grep <port>"}), true |
| 347 | + case strings.Contains(text, "module not found") || strings.Contains(text, "cannot find module") || |
| 348 | + strings.Contains(text, "cannot find package") || strings.Contains(text, "undefined reference"): |
| 349 | + return quickHint("依赖缺失或链接配置不完整。", []string{"go env GOPATH", "go mod tidy"}), true |
| 350 | + case strings.Contains(text, "context deadline exceeded") || strings.Contains(text, "connection refused"): |
| 351 | + return quickHint("外部服务或网络连接暂不可用。", []string{"ping 127.0.0.1", "curl -v <url>"}), true |
| 352 | + default: |
| 353 | + return diagnoseToolResult{}, false |
| 354 | + } |
| 355 | +} |
| 356 | + |
| 357 | +// quickHint 统一限制快速预判的置信度上限。 |
| 358 | +func quickHint(rootCause string, investigation []string) diagnoseToolResult { |
| 359 | + return diagnoseToolResult{ |
| 360 | + Confidence: diagnosisQuickMaxConfidence, |
| 361 | + RootCause: rootCause, |
| 362 | + FixCommands: []string{}, |
| 363 | + InvestigationCommands: investigation, |
| 364 | + } |
| 365 | +} |
0 commit comments