aidd/tools/test-simulation.sh at main · NomadicDaddy/aidd · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
#!/usr/bin/env bash
# =============================================================================
# tools/test-simulation.sh
# =============================================================================
# Exercises every testable permutation of `--simulation` without burning real
# LLM tokens or hitting the network:
#
#   1. Validation rejection: --simulation with each non-zrun CLI must error.
#   2. AIDD orchestration layer: --simulation with --cli zrun runs a fake
#      iteration for each mode (coding, todo, validate, in-progress, audit,
#      interview, role, custom prompt) and emits the canned SIMULATION log.
#   3. ZRun tool layer: executeTool(simulation=true) returns canned results
#      for write_file/edit_file/bash and falls through for read-only tools.
#
# Run from anywhere:
#   bash tools/test-simulation.sh
# =============================================================================
set -uo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
AIDD_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
AIDD_SH="$AIDD_ROOT/aidd.sh"

PASS=0
FAIL=0
FAILURES=()

# ANSI colors (only when stdout is a TTY)
if [[ -t 1 ]]; then
    GREEN=$'\033[0;32m'
    RED=$'\033[0;31m'
    YELLOW=$'\033[0;33m'
    DIM=$'\033[2m'
    RESET=$'\033[0m'
else
    GREEN=""; RED=""; YELLOW=""; DIM=""; RESET=""
fi

ok()   { echo "${GREEN}PASS${RESET}  $*"; PASS=$((PASS+1)); }
fail() { echo "${RED}FAIL${RESET}  $*"; FAIL=$((FAIL+1)); FAILURES+=("$*"); }
hdr()  { echo; echo "${YELLOW}=== $* ===${RESET}"; }

# --- Setup: ephemeral project directory --------------------------------------
WORK_DIR="$(mktemp -d -t aidd-sim-XXXXXX)"
trap 'rm -rf "$WORK_DIR"' EXIT

# Each test gets a fresh project subdirectory so AIDD template-copy doesn't
# carry over state between cases.
new_project() {
    local name="$1"
    local dir="$WORK_DIR/$name"
    mkdir -p "$dir"
    echo "test spec for $name" > "$dir/spec.md"
    echo "$dir"
}

# =============================================================================
# 1. Validation rejection: --simulation only allowed with --cli zrun
# =============================================================================
hdr "Validation: --simulation rejected for non-zrun CLIs"

for cli in opencode kilocode claude-code codex; do
    proj="$(new_project "reject-$cli")"
    output=$(bash "$AIDD_SH" --cli "$cli" --project-dir "$proj" --spec "$proj/spec.md" \
        --simulation --max-iterations 1 --no-clean 2>&1 || true)
    if echo "$output" | grep -qE "simulation is only supported with --cli zrun"; then
        ok "rejects --cli $cli"
    else
        fail "did NOT reject --cli $cli (output: $(echo "$output" | tail -3 | tr '\n' ' '))"
    fi
done

# Sanity: zrun + --simulation must NOT error at validation
proj="$(new_project "accept-zrun")"
output=$(bash "$AIDD_SH" --cli zrun --project-dir "$proj" --spec "$proj/spec.md" \
    --simulation --max-iterations 1 --no-clean 2>&1 || true)
if echo "$output" | grep -qE "simulation is only supported"; then
    fail "zrun was incorrectly rejected by validation"
else
    ok "accepts --cli zrun"
fi

# =============================================================================
# 2. AIDD orchestration layer: every mode emits canned SIMULATION output
# =============================================================================
hdr "AIDD layer: simulated iteration runs across modes"

# Each test asserts:
#   - exit 0
#   - output contains "[SIMULATION MODE]"
#   - output contains "[zrun] Session complete"
#   - no LLM output (no real provider banner with non-zhipu model, no API errors)
run_aidd_sim() {
    local label="$1"
    shift
    local proj="$(new_project "$label")"
    local out
    out=$(bash "$AIDD_SH" --cli zrun --project-dir "$proj" --spec "$proj/spec.md" \
        --simulation --max-iterations 1 --no-clean "$@" 2>&1)
    local rc=$?

    if [[ $rc -ne 0 ]]; then
        fail "$label: exit code $rc"
        echo "${DIM}$out${RESET}" | tail -8
        return
    fi
    if ! echo "$out" | grep -q "\[SIMULATION MODE\]"; then
        fail "$label: missing [SIMULATION MODE] marker"
        return
    fi
    if ! echo "$out" | grep -q "\[zrun\] Session complete"; then
        fail "$label: missing [zrun] Session complete line"
        return
    fi
    ok "$label"
}

run_aidd_sim "mode-default-coding"
run_aidd_sim "mode-validate"      --validate
run_aidd_sim "mode-audit"         --audit SECURITY
run_aidd_sim "mode-custom-prompt" --prompt "do a quick QC pass"
run_aidd_sim "mode-multi-iter"    --max-iterations 3
# --todo and --in-progress short-circuit before any iteration when there is no
# matching work in the project. That gating is AIDD orchestration behavior
# unrelated to --simulation; the per-prompt summary mapping for those flavors
# is exercised by the prompt-flavor unit test below.

# Verify multi-iteration produced 3 distinct iteration logs
proj="$WORK_DIR/mode-multi-iter"
log_count=$(find "$proj/.aidd/iterations" -name '[0-9]*.log' 2>/dev/null | wc -l | tr -d ' ')
if [[ "$log_count" == "3" ]]; then
    ok "mode-multi-iter: 3 iteration logs written"
else
    fail "mode-multi-iter: expected 3 iteration logs, found $log_count"
fi

# Verify no source files written by simulation (only AIDD scaffolding + spec)
proj="$WORK_DIR/mode-default-coding"
stray=$(find "$proj" -type f \
    -not -path '*/.aidd/*' \
    -not -path '*/.claude/*' \
    -not -name 'spec.md' \
    -not -name 'CLAUDE.md' 2>/dev/null)
if [[ -z "$stray" ]]; then
    ok "no source files written during simulation"
else
    fail "stray files written during simulation: $stray"
fi

# =============================================================================
# 3. emit_simulated_zrun_output: per-prompt summary flavors
# =============================================================================
hdr "AIDD layer: emit_simulated_zrun_output per-prompt flavors"

# Source cli-zrun.sh in a subshell so we can call the helper directly.
# Stub log_debug since it's normally provided by utils.sh.
test_flavor() {
    local prompt_basename="$1"
    local expected_substring="$2"
    local out
    out=$(
        # shellcheck disable=SC1091
        SIMULATION_MODE=true
        log_debug() { :; }
        export -f log_debug 2>/dev/null || true
        source "$AIDD_ROOT/lib/cli-zrun.sh"
        emit_simulated_zrun_output "/tmp/fake-proj" "/tmp/prompts/${prompt_basename}.md"
    )
    if echo "$out" | grep -qF "$expected_substring"; then
        ok "flavor $prompt_basename → $expected_substring"
    else
        fail "flavor $prompt_basename: expected '$expected_substring', got: $(echo "$out" | grep Summary || echo '(no Summary line)')"
    fi
}

test_flavor "coding"       "Implemented next backlog feature (simulated)."
test_flavor "in-progress"  "Continued in-progress feature (simulated)."
test_flavor "todo"         "Resolved next TODO item (simulated)."
test_flavor "validate"     "Validated incomplete features (simulated)."
test_flavor "initializer"  "Initialized project skeleton from spec (simulated)."
test_flavor "onboarding"   "Captured onboarding context (simulated)."
test_flavor "interview"    "Answered next interview question (simulated)."
test_flavor "coordinator"  "Produced coordinator suggestion JSON (simulated)."
test_flavor "custom-foo"   "Executed prompt 'custom-foo' (simulated)."

# =============================================================================
# 4. ZRun tool layer: executeTool(simulation=true) no-ops mutations
# =============================================================================
hdr "ZRun layer: executeTool simulation behavior"

# Build a tiny TS harness that calls executeTool directly (no LLM, no network).
# Place it inside zrun/ so its relative imports resolve correctly.
HARNESS="$AIDD_ROOT/zrun/.test-simulation-harness.ts"
cleanup_harness() { rm -f "$HARNESS"; }
trap 'rm -rf "$WORK_DIR"; cleanup_harness' EXIT
cat > "$HARNESS" <<'TS'
import { executeTool } from './src/tools/index';
import { parseSimulationFlag } from './src/config';
import { existsSync, mkdtempSync, readFileSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';

const cwd = mkdtempSync(join(tmpdir(), 'zrun-sim-'));
const failures: string[] = [];
const expect = (cond: boolean, msg: string) => {
    if (!cond) failures.push(msg);
};

// --- parseSimulationFlag --------------------------------------------------
expect(parseSimulationFlag(['--simulation']) === true, 'parseSimulationFlag(["--simulation"]) should be true');
expect(parseSimulationFlag(['--model', 'glm-5']) === false, 'parseSimulationFlag without flag should be false');
expect(parseSimulationFlag([]) === true ? false : true, 'parseSimulationFlag([]) should be false');
expect(parseSimulationFlag(['--model', 'x', '--simulation']) === true, 'parseSimulationFlag with flag late should be true');

// --- write_file: simulation must NOT touch disk ---------------------------
const writePath = 'should-not-exist.txt';
const writeRes = await executeTool(
    'write_file',
    JSON.stringify({ path: writePath, content: 'hello simulated' }),
    cwd,
    true
);
expect(writeRes.includes('[SIMULATED]'), 'write_file simulated result missing [SIMULATED]');
expect(writeRes.includes(writePath), 'write_file simulated result missing path');
expect(!existsSync(join(cwd, writePath)), 'write_file simulated must NOT create file on disk');

// --- write_file: simulation=false WOULD write (sanity, then cleanup) ------
const realPath = 'really-written.txt';
const realRes = await executeTool(
    'write_file',
    JSON.stringify({ path: realPath, content: 'real content' }),
    cwd,
    false
);
expect(!realRes.includes('[SIMULATED]'), 'real write_file must NOT mark [SIMULATED]');
expect(existsSync(join(cwd, realPath)), 'real write_file must create file on disk');

// --- edit_file: simulated no-op ------------------------------------------
// Set up a real file first (using simulation=false), then try a simulated edit
const editRes = await executeTool(
    'edit_file',
    JSON.stringify({ path: realPath, old_string: 'real', new_string: 'PATCHED' }),
    cwd,
    true
);
expect(editRes.includes('[SIMULATED]'), 'edit_file simulated result missing [SIMULATED]');
const onDisk = readFileSync(join(cwd, realPath), 'utf-8');
expect(onDisk === 'real content', `edit_file simulated must NOT mutate disk (got: ${onDisk})`);

// --- bash: simulated must NOT execute ------------------------------------
const sentinel = join(cwd, 'sentinel-from-bash.txt');
const bashRes = await executeTool(
    'bash',
    JSON.stringify({ command: `touch '${sentinel}'` }),
    cwd,
    true
);
expect(bashRes.includes('[SIMULATED]'), 'bash simulated result missing [SIMULATED]');
expect(bashRes.includes('exit code: 0'), 'bash simulated result missing exit code: 0');
expect(!existsSync(sentinel), 'bash simulated must NOT execute the command');

// --- read_file: read-only tool falls through even in simulation ----------
const readRes = await executeTool(
    'read_file',
    JSON.stringify({ path: realPath }),
    cwd,
    true
);
expect(!readRes.includes('[SIMULATED]'), 'read_file in simulation must fall through (no [SIMULATED])');
expect(readRes.includes('real content'), 'read_file in simulation must return real file contents');

// --- glob, list_directory: read-only fall-through -------------------------
const globRes = await executeTool(
    'glob',
    JSON.stringify({ pattern: '*.txt' }),
    cwd,
    true
);
expect(!globRes.includes('[SIMULATED]'), 'glob in simulation must fall through');
expect(globRes.includes(realPath), 'glob in simulation must return real matches');

const lsRes = await executeTool('list_directory', JSON.stringify({ path: '.' }), cwd, true);
expect(!lsRes.includes('[SIMULATED]'), 'list_directory in simulation must fall through');

// --- Cleanup -------------------------------------------------------------
rmSync(cwd, { recursive: true, force: true });

if (failures.length > 0) {
    for (const f of failures) console.error('FAIL: ' + f);
    process.exit(1);
}
console.log(`OK: ${[
    'parseSimulationFlag',
    'write_file simulated',
    'write_file real',
    'edit_file simulated',
    'bash simulated',
    'read_file fall-through',
    'glob fall-through',
    'list_directory fall-through',
].join(', ')}`);
TS

# Run the harness with bun (uses zrun's tsconfig and node_modules)
if (cd "$AIDD_ROOT/zrun" && bun run ".test-simulation-harness.ts" 2>&1); then
    ok "executeTool simulation behavior (write/edit/bash no-op; read/glob/ls fall through)"
else
    fail "executeTool simulation harness failed (see output above)"
fi

# =============================================================================
# Summary
# =============================================================================
echo
echo "${YELLOW}=== Summary ===${RESET}"
echo "${GREEN}Passed: $PASS${RESET}"
if [[ $FAIL -gt 0 ]]; then
    echo "${RED}Failed: $FAIL${RESET}"
    for f in "${FAILURES[@]}"; do
        echo "  - $f"
    done
    exit 1
fi
echo "${RED}Failed: 0${RESET}"
exit 0