Skip to content

Commit 0f3f9eb

Browse files
authored
Merge pull request #565 from pionxe/feature/diag-phase4-altscreen-guard
feat(Phase4):提升终端诊断与 IDM 模式可用性
2 parents 622abaf + 12a23f8 commit 0f3f9eb

12 files changed

Lines changed: 1733 additions & 99 deletions
Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
//go:build !windows
2+
3+
package ptyproxy
4+
5+
import (
6+
"context"
7+
"crypto/sha256"
8+
"encoding/hex"
9+
"encoding/json"
10+
"errors"
11+
"fmt"
12+
"io"
13+
"runtime"
14+
"strings"
15+
"sync"
16+
"time"
17+
18+
"neo-code/internal/gateway"
19+
gatewayclient "neo-code/internal/gateway/client"
20+
"neo-code/internal/gateway/protocol"
21+
"neo-code/internal/tools"
22+
)
23+
24+
const (
25+
diagnosisCacheTTL = 5 * time.Minute
26+
diagnosisCacheMaxEntries = 64
27+
diagnosisAutoDedupeTTL = 10 * time.Second
28+
diagnosisQuickMaxConfidence = 0.55
29+
)
30+
31+
type preparedDiagnosisRequest struct {
32+
Payload []byte
33+
Fingerprint string
34+
SanitizedErrorLog string
35+
SanitizedCommand string
36+
}
37+
38+
type diagnosisOutcome struct {
39+
Result tools.ToolResult
40+
Err error
41+
}
42+
43+
type diagnosisFlight struct {
44+
done chan struct{}
45+
outcome diagnosisOutcome
46+
}
47+
48+
type diagnosisCacheEntry struct {
49+
outcome diagnosisOutcome
50+
expiresAt time.Time
51+
}
52+
53+
// diagnosisCoordinator 负责诊断请求去重、短期缓存与自动诊断去抖。
54+
type diagnosisCoordinator struct {
55+
mu sync.Mutex
56+
inFlight map[string]*diagnosisFlight
57+
cache map[string]diagnosisCacheEntry
58+
cacheOrder []string
59+
recentAuto map[string]time.Time
60+
now func() time.Time
61+
}
62+
63+
// newDiagnosisCoordinator 创建一次 shell 会话内复用的诊断调度器。
64+
func newDiagnosisCoordinator() *diagnosisCoordinator {
65+
return &diagnosisCoordinator{
66+
inFlight: make(map[string]*diagnosisFlight),
67+
cache: make(map[string]diagnosisCacheEntry),
68+
recentAuto: make(map[string]time.Time),
69+
now: time.Now,
70+
}
71+
}
72+
73+
// prepareDiagnoseRequest 统一构建脱敏后的 diagnose payload 与 fingerprint。
74+
func prepareDiagnoseRequest(
75+
buffer *UTF8RingBuffer,
76+
options ManualShellOptions,
77+
socketPath string,
78+
trigger diagnoseTrigger,
79+
) (preparedDiagnosisRequest, error) {
80+
if buffer == nil {
81+
buffer = NewUTF8RingBuffer(DefaultRingBufferCapacity)
82+
}
83+
logSnapshot := buffer.SnapshotString()
84+
if strings.TrimSpace(trigger.OutputText) != "" {
85+
logSnapshot = trigger.OutputText
86+
}
87+
sanitizedErrorLog := SanitizeDiagnosisText(logSnapshot, defaultDiagnosisPayloadMaxBytes)
88+
if strings.TrimSpace(sanitizedErrorLog) == "" {
89+
sanitizedErrorLog = "no terminal output captured"
90+
}
91+
sanitizedCommand := SanitizeDiagnosisText(trigger.CommandText, 1024)
92+
93+
requestArgs := diagnoseToolArgs{
94+
ErrorLog: sanitizedErrorLog,
95+
OSEnv: map[string]string{
96+
"os": runtime.GOOS,
97+
"shell": resolveShellPath(options.Shell),
98+
"cwd": options.Workdir,
99+
"socket": socketPath,
100+
},
101+
CommandText: sanitizedCommand,
102+
ExitCode: trigger.ExitCode,
103+
}
104+
requestPayload, err := json.Marshal(requestArgs)
105+
if err != nil {
106+
return preparedDiagnosisRequest{}, err
107+
}
108+
return preparedDiagnosisRequest{
109+
Payload: requestPayload,
110+
Fingerprint: fingerprintDiagnosisRequest(sanitizedCommand, trigger.ExitCode, sanitizedErrorLog),
111+
SanitizedErrorLog: sanitizedErrorLog,
112+
SanitizedCommand: sanitizedCommand,
113+
}, nil
114+
}
115+
116+
// fingerprintDiagnosisRequest 为脱敏后的诊断输入生成稳定指纹。
117+
func fingerprintDiagnosisRequest(command string, exitCode int, errorLog string) string {
118+
sum := sha256.Sum256([]byte(strings.Join([]string{
119+
strings.TrimSpace(command),
120+
fmt.Sprint(exitCode),
121+
strings.TrimSpace(errorLog),
122+
}, "\x00")))
123+
return hex.EncodeToString(sum[:])
124+
}
125+
126+
// executePreparedDiagnoseToolWithTimeout 执行已构建好的 diagnose payload。
127+
func executePreparedDiagnoseToolWithTimeout(
128+
rpcClient *gatewayclient.GatewayRPCClient,
129+
options ManualShellOptions,
130+
prepared preparedDiagnosisRequest,
131+
timeout time.Duration,
132+
) (tools.ToolResult, error) {
133+
if rpcClient == nil {
134+
return tools.ToolResult{}, errors.New("诊断服务未就绪,请确认 gateway 已连接后重试")
135+
}
136+
137+
callContext, cancel := context.WithTimeout(context.Background(), timeout)
138+
defer cancel()
139+
140+
var frame gateway.MessageFrame
141+
if err := rpcClient.CallWithOptions(
142+
callContext,
143+
protocol.MethodGatewayExecuteSystemTool,
144+
protocol.ExecuteSystemToolParams{
145+
Workdir: options.Workdir,
146+
ToolName: tools.ToolNameDiagnose,
147+
Arguments: prepared.Payload,
148+
},
149+
&frame,
150+
gatewayclient.GatewayRPCCallOptions{
151+
Timeout: timeout,
152+
Retries: 1,
153+
},
154+
); err != nil {
155+
if options.Stderr != nil {
156+
writeProxyf(options.Stderr, "neocode diag: executeSystemTool rpc failed: %v\n", err)
157+
}
158+
return tools.ToolResult{}, errors.New("诊断调用失败,请检查 gateway 连接后重试,或使用 `neocode diag -i` 继续排查")
159+
}
160+
161+
if frame.Type == gateway.FrameTypeError && frame.Error != nil {
162+
if options.Stderr != nil {
163+
writeProxyf(
164+
options.Stderr,
165+
"neocode diag: gateway returned frame error code=%s message=%s\n",
166+
strings.TrimSpace(frame.Error.Code),
167+
strings.TrimSpace(frame.Error.Message),
168+
)
169+
}
170+
return tools.ToolResult{}, errors.New("诊断服务暂不可用,请稍后重试,或使用 `neocode diag -i` 继续排查")
171+
}
172+
if frame.Type != gateway.FrameTypeAck {
173+
if options.Stderr != nil {
174+
writeProxyf(options.Stderr, "neocode diag: unexpected gateway frame type: %s\n", frame.Type)
175+
}
176+
return tools.ToolResult{}, errors.New("诊断服务返回异常响应,请稍后重试")
177+
}
178+
179+
toolResult, err := decodeToolResult(frame.Payload)
180+
if err != nil {
181+
if options.Stderr != nil {
182+
writeProxyf(options.Stderr, "neocode diag: decode diagnose payload failed: %v\n", err)
183+
}
184+
return tools.ToolResult{}, errors.New("诊断结果解析失败,请重试或更新 NeoCode")
185+
}
186+
return toolResult, nil
187+
}
188+
189+
// shouldDropAuto 判断自动诊断是否命中短窗口去抖。
190+
func (c *diagnosisCoordinator) shouldDropAuto(fingerprint string) bool {
191+
if c == nil || strings.TrimSpace(fingerprint) == "" {
192+
return false
193+
}
194+
c.mu.Lock()
195+
defer c.mu.Unlock()
196+
now := c.currentTime()
197+
for key, seenAt := range c.recentAuto {
198+
if now.Sub(seenAt) > diagnosisAutoDedupeTTL {
199+
delete(c.recentAuto, key)
200+
}
201+
}
202+
if seenAt, ok := c.recentAuto[fingerprint]; ok && now.Sub(seenAt) <= diagnosisAutoDedupeTTL {
203+
return true
204+
}
205+
c.recentAuto[fingerprint] = now
206+
return false
207+
}
208+
209+
// cached 返回仍在有效期内的缓存诊断结果。
210+
func (c *diagnosisCoordinator) cached(fingerprint string) (diagnosisOutcome, bool) {
211+
if c == nil || strings.TrimSpace(fingerprint) == "" || !IsDiagCacheEnabledFromEnv() {
212+
return diagnosisOutcome{}, false
213+
}
214+
c.mu.Lock()
215+
defer c.mu.Unlock()
216+
entry, ok := c.cache[fingerprint]
217+
if !ok {
218+
return diagnosisOutcome{}, false
219+
}
220+
if c.currentTime().After(entry.expiresAt) {
221+
delete(c.cache, fingerprint)
222+
return diagnosisOutcome{}, false
223+
}
224+
return entry.outcome, true
225+
}
226+
227+
// run 执行或复用一次诊断请求,成功结果会进入短期缓存。
228+
func (c *diagnosisCoordinator) run(
229+
ctx context.Context,
230+
fingerprint string,
231+
execute func() (tools.ToolResult, error),
232+
) diagnosisOutcome {
233+
if c == nil || strings.TrimSpace(fingerprint) == "" || !IsDiagCacheEnabledFromEnv() {
234+
result, err := execute()
235+
return diagnosisOutcome{Result: result, Err: err}
236+
}
237+
if cached, ok := c.cached(fingerprint); ok {
238+
return cached
239+
}
240+
241+
c.mu.Lock()
242+
if flight, ok := c.inFlight[fingerprint]; ok {
243+
c.mu.Unlock()
244+
return waitDiagnosisFlight(ctx, flight)
245+
}
246+
flight := &diagnosisFlight{done: make(chan struct{})}
247+
c.inFlight[fingerprint] = flight
248+
c.mu.Unlock()
249+
250+
result, err := execute()
251+
outcome := diagnosisOutcome{Result: result, Err: err}
252+
253+
c.mu.Lock()
254+
flight.outcome = outcome
255+
delete(c.inFlight, fingerprint)
256+
if err == nil {
257+
c.storeCacheLocked(fingerprint, outcome)
258+
}
259+
close(flight.done)
260+
c.mu.Unlock()
261+
return outcome
262+
}
263+
264+
// waitDiagnosisFlight 等待已存在的同指纹诊断完成。
265+
func waitDiagnosisFlight(ctx context.Context, flight *diagnosisFlight) diagnosisOutcome {
266+
if flight == nil {
267+
return diagnosisOutcome{Err: errors.New("diagnosis flight is nil")}
268+
}
269+
select {
270+
case <-ctx.Done():
271+
return diagnosisOutcome{Err: ctx.Err()}
272+
case <-flight.done:
273+
return flight.outcome
274+
}
275+
}
276+
277+
// storeCacheLocked 在持锁状态下写入缓存并维护容量上限。
278+
func (c *diagnosisCoordinator) storeCacheLocked(fingerprint string, outcome diagnosisOutcome) {
279+
if c == nil || strings.TrimSpace(fingerprint) == "" {
280+
return
281+
}
282+
if _, exists := c.cache[fingerprint]; !exists {
283+
c.cacheOrder = append(c.cacheOrder, fingerprint)
284+
}
285+
c.cache[fingerprint] = diagnosisCacheEntry{
286+
outcome: outcome,
287+
expiresAt: c.currentTime().Add(diagnosisCacheTTL),
288+
}
289+
for len(c.cacheOrder) > diagnosisCacheMaxEntries {
290+
oldest := c.cacheOrder[0]
291+
c.cacheOrder = c.cacheOrder[1:]
292+
delete(c.cache, oldest)
293+
}
294+
}
295+
296+
// currentTime 返回可在测试中替换的当前时间。
297+
func (c *diagnosisCoordinator) currentTime() time.Time {
298+
if c != nil && c.now != nil {
299+
return c.now()
300+
}
301+
return time.Now()
302+
}
303+
304+
// renderDiagnosisInitialFeedback 输出诊断快速首响或低干扰预判。
305+
func renderDiagnosisInitialFeedback(output io.Writer, prepared preparedDiagnosisRequest, isAuto bool) {
306+
if output == nil || !IsDiagFastResponseEnabledFromEnv() {
307+
return
308+
}
309+
hint, ok := buildDiagnosisQuickHint(prepared)
310+
if isAuto && !ok {
311+
return
312+
}
313+
if isAuto {
314+
writeProxyLine(output, "\n\033[36m[NeoCode Diagnosis]\033[0m 快速预判(低置信度,完整诊断稍后返回)")
315+
} else {
316+
writeProxyLine(output, "\n\033[36m[NeoCode Diagnosis]\033[0m 正在诊断,完整结果稍后返回。")
317+
if !ok {
318+
return
319+
}
320+
writeProxyLine(output, "快速预判(低置信度):")
321+
}
322+
if !ok {
323+
return
324+
}
325+
writeProxyf(output, "置信度: %.2f\n", hint.Confidence)
326+
writeProxyf(output, "可能根因: %s\n", strings.TrimSpace(hint.RootCause))
327+
if len(hint.InvestigationCommands) > 0 {
328+
writeProxyLine(output, "建议先查:")
329+
for _, command := range hint.InvestigationCommands {
330+
writeProxyf(output, "- %s\n", strings.TrimSpace(command))
331+
}
332+
}
333+
}
334+
335+
// buildDiagnosisQuickHint 根据常见终端错误模式生成低置信度快速预判。
336+
func buildDiagnosisQuickHint(prepared preparedDiagnosisRequest) (diagnoseToolResult, bool) {
337+
text := strings.ToLower(strings.TrimSpace(prepared.SanitizedErrorLog + "\n" + prepared.SanitizedCommand))
338+
switch {
339+
case strings.Contains(text, "command not found") || strings.Contains(text, "not recognized as"):
340+
return quickHint("命令不存在或未加入 PATH。", []string{"which <command>", "echo $PATH"}), true
341+
case strings.Contains(text, "permission denied"):
342+
return quickHint("当前用户缺少执行或访问目标路径的权限。", []string{"ls -la", "id"}), true
343+
case strings.Contains(text, "no such file or directory") || strings.Contains(text, "cannot find the path"):
344+
return quickHint("路径或工作目录可能不正确,目标文件不存在。", []string{"pwd", "ls -la"}), true
345+
case strings.Contains(text, "address already in use") || strings.Contains(text, "port already in use"):
346+
return quickHint("端口已被其他进程占用。", []string{"lsof -i :<port>", "netstat -an | grep <port>"}), true
347+
case strings.Contains(text, "module not found") || strings.Contains(text, "cannot find module") ||
348+
strings.Contains(text, "cannot find package") || strings.Contains(text, "undefined reference"):
349+
return quickHint("依赖缺失或链接配置不完整。", []string{"go env GOPATH", "go mod tidy"}), true
350+
case strings.Contains(text, "context deadline exceeded") || strings.Contains(text, "connection refused"):
351+
return quickHint("外部服务或网络连接暂不可用。", []string{"ping 127.0.0.1", "curl -v <url>"}), true
352+
default:
353+
return diagnoseToolResult{}, false
354+
}
355+
}
356+
357+
// quickHint 统一限制快速预判的置信度上限。
358+
func quickHint(rootCause string, investigation []string) diagnoseToolResult {
359+
return diagnoseToolResult{
360+
Confidence: diagnosisQuickMaxConfidence,
361+
RootCause: rootCause,
362+
FixCommands: []string{},
363+
InvestigationCommands: investigation,
364+
}
365+
}

0 commit comments

Comments
 (0)