Skip to content

Commit 08a838c

Browse files
committed
Improve debugger skills and make it available from the top skills dir
Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
1 parent b6c6ec3 commit 08a838c

File tree

3 files changed

+53
-9
lines changed

3 files changed

+53
-9
lines changed

.claude/skills/debug/SKILL.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
---
2+
name: debug
3+
description: Run commands inside a remote Docker container via the file-based command relay (tools/debugger). Use when the user says "run in Docker", "run on GPU", "debug remotely", "run test in container", "check nvidia-smi", "run pytest in Docker", or needs to execute any command inside a Docker container that shares the repo filesystem. Requires the user to have started server.sh inside the container first.
4+
---
5+
6+
# Remote Docker Debugger
7+
8+
Execute commands inside a Docker container from the host using the file-based command relay.
9+
10+
**Read `tools/debugger/CLAUDE.md` for full usage details** — it has the protocol, examples, and troubleshooting.
11+
12+
## Quick Reference
13+
14+
```bash
15+
# Check connection
16+
bash tools/debugger/client.sh status
17+
18+
# Connect to server (user must start server.sh in Docker first)
19+
bash tools/debugger/client.sh handshake
20+
21+
# Run a command
22+
bash tools/debugger/client.sh run "<command>"
23+
24+
# Long-running command (default timeout is 600s)
25+
bash tools/debugger/client.sh --timeout 1800 run "<command>"
26+
27+
# Reconnect after server restart
28+
bash tools/debugger/client.sh flush
29+
bash tools/debugger/client.sh handshake
30+
```

tools/debugger/client.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ case "$SUBCOMMAND" in
9191
# Generate a unique command ID (timestamp + PID to avoid collisions)
9292
cmd_id="$(date +%s%N)_$$"
9393

94+
echo "[client] Running: $*"
95+
9496
# Write the command file atomically (tmp + mv)
9597
echo "$*" > "$CMD_DIR/$cmd_id.sh.tmp"
9698
mv "$CMD_DIR/$cmd_id.sh.tmp" "$CMD_DIR/$cmd_id.sh"

tools/debugger/server.sh

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,28 @@ fi
8787
rm -rf "$RELAY_DIR"
8888
mkdir -p "$CMD_DIR" "$RESULT_DIR"
8989

90-
# Install modelopt in editable mode (skip if already editable-installed from WORKDIR)
91-
if python -c "
92-
import modelopt, os
93-
assert os.path.realpath(modelopt.__path__[0]).startswith(os.path.realpath('$WORKDIR'))
94-
" 2>/dev/null; then
90+
# Ensure modelopt is editable-installed from WORKDIR
91+
check_modelopt_local() {
92+
python -c "
93+
import modelopt, os, sys
94+
actual = os.path.realpath(modelopt.__path__[0])
95+
expected = os.path.realpath('$WORKDIR')
96+
if not actual.startswith(expected):
97+
print(f'modelopt loaded from {actual}, expected under {expected}', file=sys.stderr)
98+
sys.exit(1)
99+
" 2>&1
100+
}
101+
102+
if check_modelopt_local >/dev/null 2>&1; then
95103
echo "[server] modelopt already editable-installed from $WORKDIR, skipping pip install."
96104
else
97105
echo "[server] Installing modelopt (pip install -e .[dev]) ..."
98-
(cd "$WORKDIR" && pip install -e ".[dev]") || {
99-
echo "[server] WARNING: pip install failed (exit=$?), continuing anyway."
100-
}
106+
(cd "$WORKDIR" && pip install -e ".[dev]")
107+
if ! check_modelopt_local; then
108+
echo "[server] ERROR: modelopt is not running from the local folder ($WORKDIR)."
109+
echo "[server] Try: pip install -e '.[dev]' inside the container, then restart the server."
110+
exit 1
111+
fi
101112
echo "[server] Install done."
102113
fi
103114

@@ -130,7 +141,8 @@ while true; do
130141

131142
for cmd_file in "$CMD_DIR"/*.sh; do
132143
cmd_id="$(basename "$cmd_file" .sh)"
133-
echo "[server] Executing command $cmd_id..."
144+
cmd_content=$(cat "$cmd_file")
145+
echo "[server] Executing command $cmd_id: $cmd_content"
134146

135147
# Execute the command, tee stdout+stderr to console and result file
136148
(cd "$WORKDIR" && bash "$cmd_file" 2>&1) | tee "$RESULT_DIR/$cmd_id.log" || true

0 commit comments

Comments
 (0)