ageflow/examples/bug-fix-pipeline/workflow.test.ts at master · Neftedollar/ageflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import type { WorkflowDef } from "@ageflow/core";
import { createTestHarness } from "@ageflow/testing";
import { describe, expect, it } from "vitest";
import workflow from "./workflow.js";

/**
 * Wrap workflow with an auto-approving checkpoint hook for headless test environments.
 * In production, the checkpoint waits for TTY input or a hook that returns Promise<true>.
 * Cast to WorkflowDef to widen the hooks type for createTestHarness compatibility.
 */
const workflowForTest: WorkflowDef = {
  ...(workflow as WorkflowDef),
  hooks: {
    onCheckpoint: (_taskName: string, _message: string) =>
      Promise.resolve(true) as Promise<boolean>,
  },
};

describe("bug-fix-pipeline", () => {
  it("runs full pipeline: analyze → fix+eval loop → summarize", async () => {
    const harness = createTestHarness(workflowForTest);

    harness.mockAgent("analyze", {
      issues: [
        {
          id: "i1",
          file: "src/app.ts",
          description: "Null pointer dereference",
          severity: "high",
        },
      ],
      summary: "Found 1 critical issue",
    });

    harness.mockAgent("fix", {
      patch: "diff --git a/src/app.ts\n-  obj.method()\n+  obj?.method()",
      explanation: "Added optional chaining to prevent null dereference",
      confidence: 0.9,
    });

    harness.mockAgent("eval", {
      satisfied: true,
      feedback: "Fix looks correct",
      score: 8,
    });

    harness.mockAgent("summarize", {
      report: "Fixed 1 critical issue: null pointer dereference in src/app.ts",
      fixedCount: 1,
      remainingCount: 0,
    });

    const result = await harness.run();

    // Workflow completed
    expect(result.outputs.summarize).toMatchObject({
      report: expect.stringContaining("Fixed 1 critical issue"),
      fixedCount: 1,
      remainingCount: 0,
    });

    // Analyze ran once
    const analyzeStats = harness.getTask("analyze");
    expect(analyzeStats.callCount).toBe(1);

    // Fix ran once (eval was satisfied on first try)
    const fixStats = harness.getTask("fix");
    expect(fixStats.callCount).toBe(1);
  });

  it("retries fix loop when eval is not satisfied", async () => {
    const harness = createTestHarness(workflowForTest);

    harness.mockAgent("analyze", {
      issues: [
        {
          id: "i1",
          file: "src/app.ts",
          description: "Memory leak",
          severity: "high",
        },
      ],
      summary: "Found 1 issue",
    });

    // First eval: not satisfied, second: satisfied
    harness.mockAgent("eval", [
      { satisfied: false, feedback: "The fix is incomplete", score: 3 },
      { satisfied: true, feedback: "Now it looks correct", score: 8 },
    ]);

    harness.mockAgent("fix", [
      { patch: "- bad fix", explanation: "First attempt", confidence: 0.4 },
      {
        patch: "+ correct fix",
        explanation: "Second attempt",
        confidence: 0.9,
      },
    ]);

    harness.mockAgent("summarize", {
      report: "Fixed 1 issue after 2 iterations",
      fixedCount: 1,
      remainingCount: 0,
    });

    const result = await harness.run();

    expect(result.outputs.summarize).toBeDefined();

    // Fix ran twice (loop needed 2 iterations)
    const fixStats = harness.getTask("fix");
    expect(fixStats.callCount).toBe(2);

    const evalStats = harness.getTask("eval");
    expect(evalStats.callCount).toBe(2);
  });

  it("workflow metrics are populated", async () => {
    const harness = createTestHarness(workflowForTest);

    harness.mockAgent("analyze", {
      issues: [],
      summary: "No issues found",
    });
    harness.mockAgent("fix", {
      patch: "",
      explanation: "nothing to fix",
      confidence: 1.0,
    });
    harness.mockAgent("eval", { satisfied: true, feedback: "ok", score: 10 });
    harness.mockAgent("summarize", {
      report: "Clean codebase",
      fixedCount: 0,
      remainingCount: 0,
    });

    const result = await harness.run();

    expect(result.metrics.taskCount).toBeGreaterThan(0);
    expect(result.metrics.totalLatencyMs).toBeGreaterThanOrEqual(0);
  });
});