Skip to content

Commit 47e9cb4

Browse files
committed
fix(workflow): code-implementation reliability + Windows compatibility
Several issues caused the Paper-to-Code pipeline to crash or hang on Windows. This commit addresses all of them. workflows/code_implementation_workflow.py workflows/code_implementation_workflow_index.py - Import LoopDetector and ProgressTracker from utils.loop_detector. They were instantiated but never imported, raising NameError mid-run as soon as the implementation agent kicked in. workflows/agent_orchestration_engine.py - get_default_search_server() now falls back to 'filesystem' instead of the (now removed) 'brave' default; get_search_server_names() always appends 'fetch' so the agent has at least one network tool. - Drop the brave_web_search example from the orchestrator system prompt. - Replace a bare 'except:' with 'except Exception:' (ruff E722) when probing whether a tool result is JSON. tools/command_executor.py (Windows hardening) - Detect the host platform and expose _PLATFORM_HINT in the tool descriptions so the LLM knows which shell syntax is safe. - Add _try_native_execute(): for the common file-tree commands (mkdir -p, touch, rm -rf, cp -r, mv) we now run them through pathlib / shutil natively, bypassing cmd.exe entirely. This fixes the bug where cmd.exe interpreted '-p' literally and produced directories named '-p' before deadlocking the MCP stdio channel. - Subprocess fallback now passes encoding='utf-8', errors='replace' so non-ASCII output no longer raises UnicodeDecodeError on Windows. - Strip Chinese strings from the human-readable summary to keep stdout safe under the default GBK code page. Made-with: Cursor
1 parent 7918c09 commit 47e9cb4

2 files changed

Lines changed: 213 additions & 79 deletions

File tree

tools/command_executor.py

Lines changed: 197 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -6,37 +6,149 @@
66
Specialized in executing LLM-generated shell commands to create file tree structures
77
"""
88

9+
import platform
10+
import shlex
11+
import shutil
912
import subprocess
1013
from pathlib import Path
11-
from typing import List, Dict
14+
from typing import Dict, List, Optional, Tuple
1215
from mcp.server.models import InitializationOptions
1316
import mcp.types as types
1417
from mcp.server import NotificationOptions, Server
1518
import mcp.server.stdio
1619

17-
# 创建MCP服务器实例 / Create MCP server instance
20+
IS_WINDOWS = platform.system() == "Windows"
21+
1822
app = Server("command-executor")
1923

2024

25+
def _try_native_execute(command: str, cwd: Path) -> Optional[Tuple[int, str, str]]:
26+
"""Try to execute common file-tree commands natively (no shell).
27+
28+
Handles Unix-style commands so they work on Windows where cmd.exe would
29+
misinterpret flags like ``-p`` as directory names. Returns
30+
``(returncode, stdout, stderr)`` when the command is handled, otherwise
31+
``None`` so the caller can fall back to running through the system shell.
32+
"""
33+
try:
34+
tokens = shlex.split(command, posix=True)
35+
except ValueError:
36+
return None
37+
if not tokens:
38+
return None
39+
40+
cmd = tokens[0]
41+
args = tokens[1:]
42+
flags = [a for a in args if a.startswith("-") and a != "-"]
43+
paths = [a for a in args if not a.startswith("-")]
44+
45+
def _resolve(p: str) -> Path:
46+
pp = Path(p)
47+
return pp if pp.is_absolute() else (cwd / pp)
48+
49+
try:
50+
if cmd == "mkdir":
51+
for p in paths:
52+
_resolve(p).mkdir(parents=True, exist_ok=True)
53+
return 0, f"Created {len(paths)} directory/directories", ""
54+
55+
if cmd == "touch":
56+
count = 0
57+
for p in paths:
58+
target = _resolve(p)
59+
target.parent.mkdir(parents=True, exist_ok=True)
60+
target.touch(exist_ok=True)
61+
count += 1
62+
return 0, f"Touched {count} file(s)", ""
63+
64+
if cmd == "rm":
65+
recursive = any(("r" in f) or ("R" in f) for f in flags)
66+
force = any("f" in f for f in flags)
67+
removed = 0
68+
for p in paths:
69+
target = _resolve(p)
70+
if target.is_dir():
71+
if recursive:
72+
shutil.rmtree(target, ignore_errors=force)
73+
removed += 1
74+
else:
75+
if not force:
76+
return 1, "", f"rm: cannot remove '{p}': Is a directory"
77+
elif target.exists():
78+
target.unlink()
79+
removed += 1
80+
elif not force:
81+
return 1, "", f"rm: cannot remove '{p}': No such file or directory"
82+
return 0, f"Removed {removed} item(s)", ""
83+
84+
if cmd in ("cp", "copy"):
85+
recursive = any(("r" in f) or ("R" in f) for f in flags)
86+
if len(paths) < 2:
87+
return None
88+
*srcs, dst = paths
89+
dst_path = _resolve(dst)
90+
for s in srcs:
91+
sp = _resolve(s)
92+
if sp.is_dir():
93+
if not recursive:
94+
return 1, "", f"cp: -r not specified; omitting directory '{s}'"
95+
target = (
96+
dst_path / sp.name
97+
if dst_path.exists() and dst_path.is_dir()
98+
else dst_path
99+
)
100+
shutil.copytree(sp, target, dirs_exist_ok=True)
101+
else:
102+
if dst_path.exists() and dst_path.is_dir():
103+
shutil.copy2(sp, dst_path / sp.name)
104+
else:
105+
dst_path.parent.mkdir(parents=True, exist_ok=True)
106+
shutil.copy2(sp, dst_path)
107+
return 0, f"Copied {len(srcs)} item(s)", ""
108+
109+
if cmd in ("mv", "move"):
110+
if len(paths) < 2:
111+
return None
112+
*srcs, dst = paths
113+
dst_path = _resolve(dst)
114+
for s in srcs:
115+
sp = _resolve(s)
116+
if dst_path.exists() and dst_path.is_dir():
117+
shutil.move(str(sp), str(dst_path / sp.name))
118+
else:
119+
dst_path.parent.mkdir(parents=True, exist_ok=True)
120+
shutil.move(str(sp), str(dst_path))
121+
return 0, f"Moved {len(srcs)} item(s)", ""
122+
123+
except Exception as e:
124+
return 1, "", f"{cmd}: {e}"
125+
126+
return None
127+
128+
129+
_PLATFORM_HINT = (
130+
f"Current host OS: {platform.system()} ({platform.platform()}). "
131+
"Common Unix file-tree commands (mkdir -p, touch, rm -rf, cp -r, mv) are "
132+
"auto-translated to native cross-platform operations, so you may use them "
133+
"directly. Avoid shell-specific syntax like heredocs or process substitution. "
134+
"Prefer one filesystem operation per line."
135+
)
136+
137+
21138
@app.list_tools()
22139
async def handle_list_tools() -> list[types.Tool]:
23-
"""
24-
列出可用工具 / List available tools
25-
"""
140+
"""List available tools."""
26141
return [
27142
types.Tool(
28143
name="execute_commands",
29-
description="""
30-
执行shell命令列表来创建文件树结构
31-
Execute shell command list to create file tree structure
32-
33-
Args:
34-
commands: 要执行的shell命令列表(每行一个命令)
35-
working_directory: 执行命令的工作目录
36-
37-
Returns:
38-
命令执行结果和详细报告
39-
""",
144+
description=(
145+
"Execute a list of shell commands to build a file tree structure.\n"
146+
f"{_PLATFORM_HINT}\n\n"
147+
"Args:\n"
148+
" commands: shell commands, one per line\n"
149+
" working_directory: working directory for command execution\n\n"
150+
"Returns: execution results and a detailed report."
151+
),
40152
inputSchema={
41153
"type": "object",
42154
"properties": {
@@ -56,17 +168,14 @@ async def handle_list_tools() -> list[types.Tool]:
56168
),
57169
types.Tool(
58170
name="execute_single_command",
59-
description="""
60-
执行单个shell命令
61-
Execute single shell command
62-
63-
Args:
64-
command: 要执行的单个命令
65-
working_directory: 执行命令的工作目录
66-
67-
Returns:
68-
命令执行结果
69-
""",
171+
description=(
172+
"Execute a single shell command.\n"
173+
f"{_PLATFORM_HINT}\n\n"
174+
"Args:\n"
175+
" command: a single shell command\n"
176+
" working_directory: working directory for execution\n\n"
177+
"Returns: execution result."
178+
),
70179
inputSchema={
71180
"type": "object",
72181
"properties": {
@@ -143,36 +252,54 @@ async def execute_command_batch(
143252
]
144253

145254
results = []
146-
stats = {"successful": 0, "failed": 0, "timeout": 0}
255+
stats = {"successful": 0, "failed": 0, "timeout": 0, "native": 0}
256+
cwd_path = Path(working_directory)
147257

148258
for i, command in enumerate(command_lines, 1):
259+
native = _try_native_execute(command, cwd_path)
260+
if native is not None:
261+
rc, out, err = native
262+
if rc == 0:
263+
results.append(f"✅ Command {i}: {command}")
264+
if out.strip():
265+
results.append(f" Output: {out.strip()}")
266+
stats["successful"] += 1
267+
stats["native"] += 1
268+
else:
269+
results.append(f"❌ Command {i}: {command}")
270+
if err.strip():
271+
results.append(f" Error: {err.strip()}")
272+
stats["failed"] += 1
273+
continue
274+
149275
try:
150-
# 执行命令 / Execute command
151276
result = subprocess.run(
152277
command,
153278
shell=True,
154279
cwd=working_directory,
155280
capture_output=True,
156281
text=True,
157-
timeout=30, # 30秒超时
282+
timeout=30,
283+
encoding="utf-8",
284+
errors="replace",
158285
)
159286

160287
if result.returncode == 0:
161288
results.append(f"✅ Command {i}: {command}")
162289
if result.stdout.strip():
163-
results.append(f" 输出 / Output: {result.stdout.strip()}")
290+
results.append(f" Output: {result.stdout.strip()}")
164291
stats["successful"] += 1
165292
else:
166293
results.append(f"❌ Command {i}: {command}")
167294
if result.stderr.strip():
168-
results.append(f" 错误 / Error: {result.stderr.strip()}")
295+
results.append(f" Error: {result.stderr.strip()}")
169296
stats["failed"] += 1
170297

171298
except subprocess.TimeoutExpired:
172-
results.append(f"⏱️ Command {i} 超时 / timeout: {command}")
299+
results.append(f"⏱️ Command {i} timeout: {command}")
173300
stats["timeout"] += 1
174301
except Exception as e:
175-
results.append(f"💥 Command {i} 异常 / exception: {command} - {str(e)}")
302+
results.append(f"💥 Command {i} exception: {command} - {str(e)}")
176303
stats["failed"] += 1
177304

178305
# 生成执行报告 / Generate execution report
@@ -204,20 +331,27 @@ async def execute_single_command(
204331
执行结果 / Execution result
205332
"""
206333
try:
207-
# 确保工作目录存在 / Ensure working directory exists
208-
Path(working_directory).mkdir(parents=True, exist_ok=True)
209-
210-
# 执行命令 / Execute command
211-
result = subprocess.run(
212-
command,
213-
shell=True,
214-
cwd=working_directory,
215-
capture_output=True,
216-
text=True,
217-
timeout=30,
218-
)
334+
cwd_path = Path(working_directory)
335+
cwd_path.mkdir(parents=True, exist_ok=True)
336+
337+
native = _try_native_execute(command, cwd_path)
338+
if native is not None:
339+
rc, out, err = native
340+
result = subprocess.CompletedProcess(
341+
args=command, returncode=rc, stdout=out, stderr=err
342+
)
343+
else:
344+
result = subprocess.run(
345+
command,
346+
shell=True,
347+
cwd=working_directory,
348+
capture_output=True,
349+
text=True,
350+
timeout=30,
351+
encoding="utf-8",
352+
errors="replace",
353+
)
219354

220-
# 格式化输出 / Format output
221355
output = format_single_command_result(command, working_directory, result)
222356

223357
return [types.TextContent(type="text", text=output)]
@@ -250,16 +384,17 @@ def generate_execution_summary(
250384
Returns:
251385
格式化的总结 / Formatted summary
252386
"""
387+
native_count = stats.get("native", 0)
253388
return f"""
254-
命令执行总结 / Command Execution Summary:
389+
Command Execution Summary:
255390
{'='*50}
256-
工作目录 / Working Directory: {working_directory}
257-
总命令数 / Total Commands: {len(command_lines)}
258-
成功 / Successful: {stats['successful']}
259-
失败 / Failed: {stats['failed']}
260-
超时 / Timeout: {stats['timeout']}
391+
Working Directory: {working_directory}
392+
Total Commands: {len(command_lines)}
393+
Successful: {stats['successful']} (native: {native_count})
394+
Failed: {stats['failed']}
395+
Timeout: {stats['timeout']}
261396
262-
详细结果 / Detailed Results:
397+
Detailed Results:
263398
{'-'*50}"""
264399

265400

@@ -278,22 +413,22 @@ def format_single_command_result(
278413
格式化的结果 / Formatted result
279414
"""
280415
output = f"""
281-
单命令执行 / Single Command Execution:
416+
Single Command Execution:
282417
{'='*40}
283-
工作目录 / Working Directory: {working_directory}
284-
命令 / Command: {command}
285-
返回码 / Return Code: {result.returncode}
418+
Working Directory: {working_directory}
419+
Command: {command}
420+
Return Code: {result.returncode}
286421
287422
"""
288423

289424
if result.returncode == 0:
290-
output += "✅ 状态 / Status: SUCCESS / 成功\n"
425+
output += "Status: SUCCESS\n"
291426
if result.stdout.strip():
292-
output += f"输出 / Output:\n{result.stdout.strip()}\n"
427+
output += f"Output:\n{result.stdout.strip()}\n"
293428
else:
294-
output += "❌ 状态 / Status: FAILED / 失败\n"
429+
output += "Status: FAILED\n"
295430
if result.stderr.strip():
296-
output += f"错误 / Error:\n{result.stderr.strip()}\n"
431+
output += f"Error:\n{result.stderr.strip()}\n"
297432

298433
return output
299434

0 commit comments

Comments
 (0)