Skip to content

Commit 74eaed8

Browse files
committed
feat: add snapshot diffing with baseline and review follow-ups
1 parent 81a71a4 commit 74eaed8

9 files changed

Lines changed: 602 additions & 57 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ Minimal operating guide for AI coding agents in this repo.
3434
- Put command logic in handler modules:
3535
- session/apps/appstate/open/close/replay: `src/daemon/handlers/session.ts`
3636
- click/fill/get/is: `src/daemon/handlers/interaction.ts`
37-
- snapshot/wait/alert/settings: `src/daemon/handlers/snapshot.ts`
37+
- snapshot/diff/wait/alert/settings: `src/daemon/handlers/snapshot.ts`
3838
- find: `src/daemon/handlers/find.ts`
3939
- record/trace: `src/daemon/handlers/record-trace.ts`
4040
- Generic passthrough (press/scroll/type) is daemon fallback only after handlers return null.

skills/agent-device/SKILL.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ npx -y agent-device
3030
1. Open app or deep link: `open [app|url] [url]` (`open` handles target selection + boot/activation in the normal flow)
3131
2. Snapshot: `snapshot` to get refs from accessibility tree
3232
3. Interact using refs (`press @ref`, `fill @ref "text"`; `click` is an alias of `press`, `dblclick` is an alias of `click --double-tap`)
33-
4. Re-snapshot after navigation/UI changes
33+
4. Use `diff snapshot` to compare current UI against the previous snapshot baseline in-session
3434
5. Close session when done
3535

3636
## Commands
@@ -64,9 +64,11 @@ agent-device snapshot -c # Compact output
6464
agent-device snapshot -d 3 # Limit depth
6565
agent-device snapshot -s "Camera" # Scope to label/identifier
6666
agent-device snapshot --raw # Raw node output
67+
agent-device diff snapshot # Compare current snapshot against previous in-session baseline
6768
```
6869

6970
XCTest is the iOS snapshot engine: fast, complete, and no Accessibility permission required.
71+
`diff snapshot` is useful in exploration loops where only UI changes should be inspected.
7072

7173
### Find (semantic)
7274

@@ -239,6 +241,7 @@ agent-device apps --platform android --user-installed
239241
- `swipe` timing is platform-safe: Android uses requested duration; iOS uses normalized safe timing to avoid long-press side effects.
240242
- Pinch (`pinch <scale> [x y]`) is iOS simulator-only; scale > 1 zooms in, < 1 zooms out.
241243
- Snapshot refs are the core mechanism for interactive agent flows.
244+
- Prefer `diff snapshot` after UI mutations when you only need the delta from the prior snapshot.
242245
- Use selectors for deterministic replay artifacts and assertions (e.g. in e2e test workflows).
243246
- Prefer `snapshot -i` to reduce output size.
244247
- On iOS, snapshots use XCTest and do not require Accessibility permission.

src/core/capabilities.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ const COMMAND_CAPABILITY_MATRIX: Record<string, CommandCapability> = {
2323
click: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
2424
close: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
2525
fill: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
26+
diff: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
2627
find: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
2728
focus: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
2829
get: { ios: { simulator: true, device: true }, android: { emulator: true, device: true, unknown: true } },
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import test from 'node:test';
2+
import assert from 'node:assert/strict';
3+
import { buildSnapshotDiff, snapshotNodeToComparableLine } from '../snapshot-diff.ts';
4+
import { attachRefs, type RawSnapshotNode } from '../../utils/snapshot.ts';
5+
6+
function nodes(raw: RawSnapshotNode[]) {
7+
return attachRefs(raw);
8+
}
9+
10+
test('snapshotNodeToComparableLine ignores volatile fields', () => {
11+
const [node] = nodes([{
12+
index: 0,
13+
type: 'XCUIElementTypeTextField',
14+
label: 'Email',
15+
value: 'test@example.com',
16+
identifier: 'email-input',
17+
depth: 1,
18+
rect: { x: 10, y: 20, width: 100, height: 20 },
19+
}]);
20+
assert.equal(
21+
snapshotNodeToComparableLine(node),
22+
' textfield label="Email" value="test@example.com" id="email-input"',
23+
);
24+
});
25+
26+
test('buildSnapshotDiff returns unchanged lines when snapshots match', () => {
27+
const previous = nodes([
28+
{ index: 0, type: 'button', label: 'Submit', depth: 0 },
29+
{ index: 1, type: 'text', label: 'Create account', depth: 1 },
30+
]);
31+
const current = nodes([
32+
{ index: 0, type: 'button', label: 'Submit', depth: 0 },
33+
{ index: 1, type: 'text', label: 'Create account', depth: 1 },
34+
]);
35+
const diff = buildSnapshotDiff(previous, current);
36+
assert.deepEqual(diff.summary, { additions: 0, removals: 0, unchanged: 2 });
37+
assert.equal(diff.lines.length, 2);
38+
assert.equal(diff.lines[0]?.kind, 'unchanged');
39+
assert.equal(diff.lines[1]?.kind, 'unchanged');
40+
});
41+
42+
test('buildSnapshotDiff reports removals and additions for value changes', () => {
43+
const previous = nodes([
44+
{ index: 0, type: 'textfield', label: 'Email', value: '', depth: 0 },
45+
{ index: 1, type: 'button', label: 'Submit', depth: 0, enabled: true },
46+
]);
47+
const current = nodes([
48+
{ index: 0, type: 'textfield', label: 'Email', value: 'test@example.com', depth: 0 },
49+
{ index: 1, type: 'button', label: 'Submit', depth: 0, enabled: false },
50+
]);
51+
const diff = buildSnapshotDiff(previous, current);
52+
assert.deepEqual(diff.summary, { additions: 2, removals: 2, unchanged: 0 });
53+
assert.equal(diff.lines.length, 4);
54+
const removed = diff.lines.filter((line) => line.kind === 'removed');
55+
const added = diff.lines.filter((line) => line.kind === 'added');
56+
assert.equal(removed.length, 2);
57+
assert.equal(added.length, 2);
58+
});
59+
60+
test('buildSnapshotDiff keeps stable order with unchanged context', () => {
61+
const previous = nodes([
62+
{ index: 0, type: 'heading', label: 'Sign Up', depth: 0 },
63+
{ index: 1, type: 'text', label: 'Create account', depth: 0 },
64+
{ index: 2, type: 'button', label: 'Submit', depth: 0, enabled: true },
65+
]);
66+
const current = nodes([
67+
{ index: 0, type: 'heading', label: 'Sign Up', depth: 0 },
68+
{ index: 1, type: 'text', label: 'Create account', depth: 0 },
69+
{ index: 2, type: 'status', label: 'Sending...', depth: 0 },
70+
{ index: 3, type: 'button', label: 'Submit', depth: 0, enabled: false },
71+
]);
72+
const diff = buildSnapshotDiff(previous, current);
73+
assert.deepEqual(diff.summary, { additions: 2, removals: 1, unchanged: 2 });
74+
const kinds = diff.lines.map((line) => line.kind);
75+
assert.equal(kinds[0], 'unchanged');
76+
assert.equal(kinds[1], 'unchanged');
77+
assert.equal(diff.lines.filter((line) => line.kind === 'added').length, 2);
78+
assert.equal(diff.lines.filter((line) => line.kind === 'removed').length, 1);
79+
});
80+
81+
test('buildSnapshotDiff uses linear fallback for very large snapshots', () => {
82+
const previousRaw: RawSnapshotNode[] = [];
83+
const currentRaw: RawSnapshotNode[] = [];
84+
for (let index = 0; index < 2_100; index += 1) {
85+
previousRaw.push({ index, type: 'text', label: `row-${index}`, depth: 0 });
86+
currentRaw.push({ index, type: 'text', label: `row-${index}`, depth: 0 });
87+
}
88+
// Change one line so we still exercise add/remove behavior while crossing fallback threshold.
89+
currentRaw[1_050] = { index: 1_050, type: 'text', label: 'row-1050-updated', depth: 0 };
90+
const diff = buildSnapshotDiff(nodes(previousRaw), nodes(currentRaw));
91+
assert.equal(diff.summary.additions, 1);
92+
assert.equal(diff.summary.removals, 1);
93+
assert.equal(diff.summary.unchanged, 2_099);
94+
});

src/daemon/handlers/__tests__/snapshot-handler.test.ts

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import assert from 'node:assert/strict';
33
import fs from 'node:fs';
44
import os from 'node:os';
55
import path from 'node:path';
6-
import { handleSnapshotCommands } from '../snapshot.ts';
6+
import { buildDiffSnapshotResponse, handleSnapshotCommands } from '../snapshot.ts';
7+
import type { SnapshotState } from '../../../utils/snapshot.ts';
78
import { SessionStore } from '../../session-store.ts';
89
import type { SessionState } from '../../types.ts';
910

@@ -114,3 +115,65 @@ test('settings usage hint documents canonical faceid states', async () => {
114115
assert.doesNotMatch(response.error.message, /validate\|unvalidate/);
115116
}
116117
});
118+
119+
test('diff rejects unsupported kind', async () => {
120+
const sessionStore = makeSessionStore();
121+
const response = await handleSnapshotCommands({
122+
req: {
123+
token: 't',
124+
session: 'default',
125+
command: 'diff',
126+
positionals: ['screenshot'],
127+
flags: {},
128+
},
129+
sessionName: 'default',
130+
logPath: '/tmp/daemon.log',
131+
sessionStore,
132+
});
133+
134+
assert.ok(response);
135+
assert.equal(response?.ok, false);
136+
if (response && !response.ok) {
137+
assert.equal(response.error.code, 'INVALID_ARGS');
138+
assert.match(response.error.message, /supports only: snapshot/i);
139+
}
140+
});
141+
142+
test('buildDiffSnapshotResponse initializes baseline when previous snapshot is missing', () => {
143+
const current: SnapshotState = {
144+
nodes: [{ index: 0, ref: 'e1', label: 'Sign Up', type: 'heading', depth: 0 }],
145+
createdAt: Date.now(),
146+
backend: 'xctest',
147+
};
148+
const data = buildDiffSnapshotResponse(undefined, current);
149+
assert.equal(data.baselineInitialized, true);
150+
assert.deepEqual(data.summary, { additions: 0, removals: 0, unchanged: 1 });
151+
assert.deepEqual(data.lines, []);
152+
});
153+
154+
test('buildDiffSnapshotResponse returns additions/removals on changed snapshot', () => {
155+
const previous: SnapshotState = {
156+
nodes: [
157+
{ index: 0, ref: 'e1', label: 'Sign Up', type: 'heading', depth: 0 },
158+
{ index: 1, ref: 'e2', label: 'Submit', type: 'button', depth: 0, enabled: true },
159+
],
160+
createdAt: Date.now() - 100,
161+
backend: 'xctest',
162+
};
163+
const current: SnapshotState = {
164+
nodes: [
165+
{ index: 0, ref: 'e1', label: 'Sign Up', type: 'heading', depth: 0 },
166+
{ index: 1, ref: 'e2', label: 'Submit', type: 'button', depth: 0, enabled: false },
167+
{ index: 2, ref: 'e3', label: 'Sending...', type: 'status', depth: 0 },
168+
],
169+
createdAt: Date.now(),
170+
backend: 'xctest',
171+
};
172+
const data = buildDiffSnapshotResponse(previous, current);
173+
assert.equal(data.baselineInitialized, false);
174+
assert.equal(data.summary.additions, 2);
175+
assert.equal(data.summary.removals, 1);
176+
assert.equal(data.summary.unchanged, 1);
177+
assert.ok(data.lines.some((line) => line.kind === 'added'));
178+
assert.ok(data.lines.some((line) => line.kind === 'removed'));
179+
});

0 commit comments

Comments
 (0)