Skip to content

Commit cd3e985

Browse files
authored
feat(replay): debug and replay mode (#315)
## Summary - 3 new DB tables (execution_steps, llm_interactions, file_operations) for recording execution traces - ExecutionRecorder class integrated into ReactAgent (optional, backward-compatible) - 4 new CLI commands: cf work replay, cf work diff, cf work export-trace, cf work rerun - ReplaySession for interactive step-through navigation - JSON and Markdown export formats - State reconstruction at any step via file operation replay ## Validation - Review feedback: All addressed (2 rounds — Claude review + CodeRabbit) - Demo: All 5 acceptance criteria verified - Tests: 78 new tests + 2481 total passing - CI: All checks green - Linting: Clean Closes #315
1 parent 5646f8d commit cd3e985

8 files changed

Lines changed: 3086 additions & 0 deletions

File tree

codeframe/cli/app.py

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
codeframe status
1515
"""
1616

17+
import json
1718
from pathlib import Path
1819
from typing import Optional
1920

@@ -3129,6 +3130,345 @@ def work_follow(
31293130
raise typer.Exit(1)
31303131

31313132

3133+
# =============================================================================
3134+
# Replay / Debug commands (cf work replay, cf work diff, cf work export-trace)
3135+
# =============================================================================
3136+
3137+
3138+
@work_app.command("replay")
3139+
def work_replay(
3140+
run_id: str = typer.Argument(..., help="Run ID to replay"),
3141+
workspace_path: Optional[Path] = typer.Option(
3142+
None,
3143+
"--workspace",
3144+
"-w",
3145+
help="Workspace path (defaults to current directory)",
3146+
),
3147+
step: Optional[int] = typer.Option(
3148+
None,
3149+
"--step",
3150+
"-s",
3151+
help="Jump to a specific step number",
3152+
),
3153+
show_llm: bool = typer.Option(
3154+
False,
3155+
"--show-llm",
3156+
help="Show LLM prompts and responses",
3157+
),
3158+
show_files: bool = typer.Option(
3159+
True,
3160+
"--show-files/--no-files",
3161+
help="Show file changes at each step",
3162+
),
3163+
) -> None:
3164+
"""Replay a past execution step by step.
3165+
3166+
Shows what happened during an agent run: which tools were called,
3167+
what files were changed, and what the LLM produced at each step.
3168+
3169+
Example:
3170+
cf work replay <run-id>
3171+
cf work replay <run-id> --step 3
3172+
cf work replay <run-id> --show-llm
3173+
"""
3174+
from rich.panel import Panel
3175+
3176+
from codeframe.core.replay import (
3177+
load_execution_trace,
3178+
)
3179+
from codeframe.core.workspace import get_workspace
3180+
3181+
path = workspace_path or Path.cwd()
3182+
3183+
try:
3184+
workspace = get_workspace(path)
3185+
trace = load_execution_trace(workspace, run_id)
3186+
3187+
if not trace:
3188+
console.print(f"[red]Error:[/red] No trace found for run '{run_id}'")
3189+
raise typer.Exit(1)
3190+
3191+
# Header
3192+
console.print(
3193+
Panel(
3194+
f"[bold]Run:[/bold] {trace.run_id}\n"
3195+
f"[bold]Task:[/bold] {trace.task_id}\n"
3196+
f"[bold]Status:[/bold] {trace.status}\n"
3197+
f"[bold]Steps:[/bold] {len(trace.steps)}",
3198+
title="Execution Replay",
3199+
)
3200+
)
3201+
3202+
# Build lookups
3203+
ops_by_step = {}
3204+
for op in trace.file_operations:
3205+
ops_by_step.setdefault(op.step_id, []).append(op)
3206+
3207+
llm_by_step = {}
3208+
for llm in trace.llm_interactions:
3209+
llm_by_step.setdefault(llm.step_id, []).append(llm)
3210+
3211+
# Filter to specific step if requested
3212+
steps_to_show = trace.steps
3213+
if step is not None:
3214+
steps_to_show = [s for s in trace.steps if s.step_number == step]
3215+
if not steps_to_show:
3216+
console.print(f"[yellow]No step {step} found (max: {len(trace.steps)})[/yellow]")
3217+
raise typer.Exit(1)
3218+
3219+
for s in steps_to_show:
3220+
status_color = {"completed": "green", "failed": "red"}.get(s.status, "yellow")
3221+
console.print(
3222+
f"\n[bold]Step {s.step_number}:[/bold] {s.description} "
3223+
f"[{status_color}][{s.status}][/{status_color}]"
3224+
)
3225+
3226+
if show_files:
3227+
step_ops = ops_by_step.get(s.id, [])
3228+
for op in step_ops:
3229+
op_color = {"create": "green", "edit": "yellow", "delete": "red"}.get(
3230+
op.operation_type, "white"
3231+
)
3232+
console.print(f" [{op_color}]{op.operation_type}[/{op_color}] {op.file_path}")
3233+
3234+
if show_llm:
3235+
step_llms = llm_by_step.get(s.id, [])
3236+
for llm in step_llms:
3237+
console.print(f" [dim]LLM ({llm.model}, {llm.tokens_used} tokens):[/dim]")
3238+
console.print(f" [cyan]Prompt:[/cyan] {llm.prompt[:200]}")
3239+
console.print(f" [cyan]Response:[/cyan] {llm.response[:200]}")
3240+
3241+
# Summary
3242+
summary = trace.summary()
3243+
console.print(f"\n[dim]Total: {summary['total_steps']} steps, "
3244+
f"{summary['llm_calls']} LLM calls, "
3245+
f"{summary['total_tokens']} tokens, "
3246+
f"{summary['files_modified']} files modified[/dim]")
3247+
3248+
except FileNotFoundError:
3249+
console.print(f"[red]Error:[/red] No workspace found at {path}")
3250+
raise typer.Exit(1)
3251+
3252+
3253+
@work_app.command("diff")
3254+
def work_diff(
3255+
run_id: str = typer.Argument(..., help="Run ID to show diffs for"),
3256+
workspace_path: Optional[Path] = typer.Option(
3257+
None,
3258+
"--workspace",
3259+
"-w",
3260+
help="Workspace path (defaults to current directory)",
3261+
),
3262+
from_step: Optional[int] = typer.Option(
3263+
None,
3264+
"--from-step",
3265+
help="Starting step number (default: 0 = before execution)",
3266+
),
3267+
to_step: Optional[int] = typer.Option(
3268+
None,
3269+
"--to-step",
3270+
help="Ending step number (default: last step)",
3271+
),
3272+
) -> None:
3273+
"""Show file changes across an execution run.
3274+
3275+
Displays unified diffs of all files modified during the run,
3276+
or between specific steps.
3277+
3278+
Example:
3279+
cf work diff <run-id>
3280+
cf work diff <run-id> --from-step 1 --to-step 3
3281+
"""
3282+
import difflib
3283+
3284+
from codeframe.core.replay import compare_steps, load_execution_trace
3285+
from codeframe.core.workspace import get_workspace
3286+
3287+
path = workspace_path or Path.cwd()
3288+
3289+
try:
3290+
workspace = get_workspace(path)
3291+
trace = load_execution_trace(workspace, run_id)
3292+
3293+
if not trace:
3294+
console.print(f"[red]Error:[/red] No trace found for run '{run_id}'")
3295+
raise typer.Exit(1)
3296+
3297+
step_a = from_step if from_step is not None else 0
3298+
step_b = to_step if to_step is not None else max(s.step_number for s in trace.steps)
3299+
3300+
changes = compare_steps(workspace, run_id, step_a, step_b)
3301+
3302+
if not changes:
3303+
console.print("[yellow]No file changes between these steps.[/yellow]")
3304+
return
3305+
3306+
console.print(
3307+
f"[bold]File changes:[/bold] step {step_a} → step {step_b} "
3308+
f"({len(changes)} file(s))\n"
3309+
)
3310+
3311+
for file_path, change in changes.items():
3312+
before = change["before"] or ""
3313+
after = change["after"] or ""
3314+
3315+
if change["before"] is None:
3316+
console.print(f"[green]+++ {file_path}[/green] (created)")
3317+
elif change["after"] is None:
3318+
console.print(f"[red]--- {file_path}[/red] (deleted)")
3319+
else:
3320+
console.print(f"[yellow]~~~ {file_path}[/yellow] (modified)")
3321+
3322+
diff_lines = list(
3323+
difflib.unified_diff(
3324+
before.splitlines(keepends=True),
3325+
after.splitlines(keepends=True),
3326+
fromfile=f"a/{file_path}",
3327+
tofile=f"b/{file_path}",
3328+
)
3329+
)
3330+
for line in diff_lines:
3331+
line = line.rstrip()
3332+
if line.startswith("+") and not line.startswith("+++"):
3333+
console.print(f"[green]{line}[/green]")
3334+
elif line.startswith("-") and not line.startswith("---"):
3335+
console.print(f"[red]{line}[/red]")
3336+
else:
3337+
console.print(f"[dim]{line}[/dim]")
3338+
console.print()
3339+
3340+
except FileNotFoundError:
3341+
console.print(f"[red]Error:[/red] No workspace found at {path}")
3342+
raise typer.Exit(1)
3343+
3344+
3345+
@work_app.command("export-trace")
3346+
def work_export_trace(
3347+
run_id: str = typer.Argument(..., help="Run ID to export"),
3348+
workspace_path: Optional[Path] = typer.Option(
3349+
None,
3350+
"--workspace",
3351+
"-w",
3352+
help="Workspace path (defaults to current directory)",
3353+
),
3354+
output_format: str = typer.Option(
3355+
"json",
3356+
"--format",
3357+
"-f",
3358+
help="Export format: json or markdown",
3359+
click_type=click.Choice(["json", "markdown"], case_sensitive=False),
3360+
),
3361+
output: Optional[Path] = typer.Option(
3362+
None,
3363+
"--output",
3364+
"-o",
3365+
help="Write to file instead of stdout",
3366+
),
3367+
) -> None:
3368+
"""Export an execution trace for analysis.
3369+
3370+
Produces a complete trace in JSON or Markdown format,
3371+
including all steps, LLM interactions, and file changes.
3372+
3373+
Example:
3374+
cf work export-trace <run-id>
3375+
cf work export-trace <run-id> --format markdown
3376+
cf work export-trace <run-id> --output trace.json
3377+
"""
3378+
from codeframe.core.replay import (
3379+
export_trace_json,
3380+
export_trace_markdown,
3381+
load_execution_trace,
3382+
)
3383+
from codeframe.core.workspace import get_workspace
3384+
3385+
path = workspace_path or Path.cwd()
3386+
3387+
try:
3388+
workspace = get_workspace(path)
3389+
trace = load_execution_trace(workspace, run_id)
3390+
3391+
if not trace:
3392+
console.print(f"[red]Error:[/red] No trace found for run '{run_id}'")
3393+
raise typer.Exit(1)
3394+
3395+
if output_format == "json":
3396+
content = json.dumps(export_trace_json(trace), indent=2)
3397+
else:
3398+
content = export_trace_markdown(trace)
3399+
3400+
if output:
3401+
output.write_text(content)
3402+
console.print(f"[green]Trace exported to {output}[/green]")
3403+
else:
3404+
console.print(content, highlight=False)
3405+
3406+
except FileNotFoundError:
3407+
console.print(f"[red]Error:[/red] No workspace found at {path}")
3408+
raise typer.Exit(1)
3409+
3410+
3411+
@work_app.command("rerun")
3412+
def work_rerun(
3413+
run_id: str = typer.Argument(..., help="Run ID to re-run from"),
3414+
workspace_path: Optional[Path] = typer.Option(
3415+
None,
3416+
"--workspace",
3417+
"-w",
3418+
help="Workspace path (defaults to current directory)",
3419+
),
3420+
from_step: int = typer.Option(
3421+
1,
3422+
"--from-step",
3423+
help="Step number to resume from",
3424+
),
3425+
) -> None:
3426+
"""Prepare to re-execute a run from a specific step.
3427+
3428+
Reconstructs the file state at step N and shows what
3429+
would need to be re-executed. Use this to understand
3430+
what happened and plan a manual re-run.
3431+
3432+
Example:
3433+
cf work rerun <run-id> --from-step 2
3434+
"""
3435+
from codeframe.core.replay import prepare_rerun
3436+
from codeframe.core.workspace import get_workspace
3437+
3438+
path = workspace_path or Path.cwd()
3439+
3440+
try:
3441+
workspace = get_workspace(path)
3442+
rerun_info = prepare_rerun(workspace, run_id, from_step)
3443+
3444+
console.print(f"[bold]Re-run preparation for run {run_id}[/bold]\n")
3445+
console.print(f"[bold]Resume from:[/bold] Step {from_step}")
3446+
console.print(f"[bold]Task:[/bold] {rerun_info['task_id']}")
3447+
3448+
file_state = rerun_info["file_state"]
3449+
if file_state:
3450+
console.print(f"\n[bold]File state at step {from_step}:[/bold]")
3451+
for fp in sorted(file_state.keys()):
3452+
console.print(f" {fp}")
3453+
else:
3454+
console.print(f"\n[yellow]No files modified at step {from_step}[/yellow]")
3455+
3456+
remaining = rerun_info["remaining_steps"]
3457+
if remaining:
3458+
console.print(f"\n[bold]Remaining steps ({len(remaining)}):[/bold]")
3459+
for rs in remaining:
3460+
console.print(f" Step {rs['step_number']}: {rs['description']}")
3461+
else:
3462+
console.print("\n[yellow]No remaining steps after this point[/yellow]")
3463+
3464+
except FileNotFoundError:
3465+
console.print(f"[red]Error:[/red] No workspace found at {path}")
3466+
raise typer.Exit(1)
3467+
except ValueError as e:
3468+
console.print(f"[red]Error:[/red] {e}")
3469+
raise typer.Exit(1)
3470+
3471+
31323472
# =============================================================================
31333473
# Batch execution commands (subcommand group: cf work batch <cmd>)
31343474
# =============================================================================

0 commit comments

Comments
 (0)