Skip to content

Commit a2cc6bd

Browse files
authored
feat(whaleflow): add trace store schema migration
Adds a state-store v2 schema migration for WhaleFlow workflow, branch, leaf, control-node, and teacher-candidate trace tables. Keeps workflow execution/replay deferred and preserves @AdityaVG13 WhaleFlow draft credit in the changelog.
1 parent f8b26b4 commit a2cc6bd

4 files changed

Lines changed: 227 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4040
release-safe. The foundation now includes serializable branch, leaf, and
4141
control-node result records toward the #2668 TraceStore contract. Thanks
4242
@AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
43+
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
44+
workflow, branch, leaf, control-node, and teacher-candidate runs. The
45+
migration creates persistence shape only; workflow execution and replay
46+
remain deferred until the runtime semantics are safe (#2668).
4347
- Added an official VS Code extension Phase 0 scaffold with terminal launch,
4448
local runtime attach checks, status bar state, and a read-only Agent View
4549
preview backed by recent runtime thread summaries. This answers the VS Code

crates/state/src/lib.rs

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ impl StateStore {
267267

268268
fn init_schema(&self) -> Result<()> {
269269
let conn = self.conn()?;
270-
let user_version: u32 = conn.query_row("PRAGMA user_version;", [], |row| row.get(0))?;
270+
let mut user_version: u32 = conn.query_row("PRAGMA user_version;", [], |row| row.get(0))?;
271271
if user_version == 0 {
272272
conn.execute_batch(
273273
r#"
@@ -376,6 +376,104 @@ impl StateStore {
376376
"#,
377377
)
378378
.context("failed to initialize thread schema")?;
379+
user_version = 1;
380+
}
381+
if user_version < 2 {
382+
conn.execute_batch(
383+
r#"
384+
BEGIN;
385+
CREATE TABLE IF NOT EXISTS workflow_runs (
386+
id TEXT PRIMARY KEY,
387+
workflow_id TEXT NOT NULL,
388+
goal TEXT NOT NULL,
389+
status TEXT NOT NULL,
390+
input_hash TEXT,
391+
started_at INTEGER NOT NULL,
392+
completed_at INTEGER,
393+
metadata_json TEXT NOT NULL DEFAULT '{}'
394+
);
395+
CREATE INDEX IF NOT EXISTS idx_workflow_runs_status_started_at
396+
ON workflow_runs(status, started_at DESC);
397+
CREATE INDEX IF NOT EXISTS idx_workflow_runs_workflow_started_at
398+
ON workflow_runs(workflow_id, started_at DESC);
399+
400+
CREATE TABLE IF NOT EXISTS branch_runs (
401+
id TEXT PRIMARY KEY,
402+
workflow_run_id TEXT NOT NULL,
403+
branch_id TEXT NOT NULL,
404+
node_id TEXT NOT NULL,
405+
status TEXT NOT NULL,
406+
started_at INTEGER NOT NULL,
407+
completed_at INTEGER,
408+
result_json TEXT NOT NULL DEFAULT '{}',
409+
FOREIGN KEY(workflow_run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE
410+
);
411+
CREATE INDEX IF NOT EXISTS idx_branch_runs_workflow_run_id
412+
ON branch_runs(workflow_run_id);
413+
CREATE INDEX IF NOT EXISTS idx_branch_runs_branch_id
414+
ON branch_runs(branch_id);
415+
416+
CREATE TABLE IF NOT EXISTS leaf_runs (
417+
id TEXT PRIMARY KEY,
418+
workflow_run_id TEXT NOT NULL,
419+
branch_run_id TEXT,
420+
leaf_id TEXT NOT NULL,
421+
task_id TEXT NOT NULL,
422+
input_hash TEXT,
423+
status TEXT NOT NULL,
424+
output_json TEXT NOT NULL DEFAULT '{}',
425+
artifacts_json TEXT NOT NULL DEFAULT '[]',
426+
started_at INTEGER NOT NULL,
427+
completed_at INTEGER,
428+
FOREIGN KEY(workflow_run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE,
429+
FOREIGN KEY(branch_run_id) REFERENCES branch_runs(id) ON DELETE SET NULL
430+
);
431+
CREATE INDEX IF NOT EXISTS idx_leaf_runs_workflow_run_id
432+
ON leaf_runs(workflow_run_id);
433+
CREATE INDEX IF NOT EXISTS idx_leaf_runs_replay_lookup
434+
ON leaf_runs(workflow_run_id, leaf_id, input_hash);
435+
436+
CREATE TABLE IF NOT EXISTS control_node_runs (
437+
id TEXT PRIMARY KEY,
438+
workflow_run_id TEXT NOT NULL,
439+
node_id TEXT NOT NULL,
440+
kind TEXT NOT NULL,
441+
status TEXT NOT NULL,
442+
selected_children_json TEXT NOT NULL DEFAULT '[]',
443+
result_json TEXT NOT NULL DEFAULT '{}',
444+
started_at INTEGER NOT NULL,
445+
completed_at INTEGER,
446+
FOREIGN KEY(workflow_run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE
447+
);
448+
CREATE INDEX IF NOT EXISTS idx_control_node_runs_workflow_run_id
449+
ON control_node_runs(workflow_run_id);
450+
CREATE INDEX IF NOT EXISTS idx_control_node_runs_node_id
451+
ON control_node_runs(node_id);
452+
453+
CREATE TABLE IF NOT EXISTS teacher_candidates (
454+
id TEXT PRIMARY KEY,
455+
workflow_run_id TEXT NOT NULL,
456+
control_node_run_id TEXT NOT NULL,
457+
candidate_id TEXT NOT NULL,
458+
branch_run_id TEXT,
459+
score REAL,
460+
passed INTEGER,
461+
rationale_json TEXT NOT NULL DEFAULT '{}',
462+
created_at INTEGER NOT NULL,
463+
FOREIGN KEY(workflow_run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE,
464+
FOREIGN KEY(control_node_run_id) REFERENCES control_node_runs(id) ON DELETE CASCADE,
465+
FOREIGN KEY(branch_run_id) REFERENCES branch_runs(id) ON DELETE SET NULL
466+
);
467+
CREATE INDEX IF NOT EXISTS idx_teacher_candidates_workflow_run_id
468+
ON teacher_candidates(workflow_run_id);
469+
CREATE INDEX IF NOT EXISTS idx_teacher_candidates_control_node_run_id
470+
ON teacher_candidates(control_node_run_id);
471+
472+
PRAGMA user_version = 2;
473+
COMMIT;
474+
"#,
475+
)
476+
.context("failed to initialize workflow trace schema")?;
379477
}
380478
Ok(())
381479
}

crates/state/tests/parity_state.rs

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,30 @@ fn temp_state_path(label: &str) -> PathBuf {
1212
))
1313
}
1414

15+
fn assert_workflow_trace_schema(conn: &Connection) {
16+
let user_version: u32 = conn
17+
.query_row("PRAGMA user_version;", [], |row| row.get(0))
18+
.expect("read user_version");
19+
assert_eq!(user_version, 2);
20+
21+
for table in [
22+
"workflow_runs",
23+
"branch_runs",
24+
"leaf_runs",
25+
"control_node_runs",
26+
"teacher_candidates",
27+
] {
28+
let exists: bool = conn
29+
.query_row(
30+
"SELECT EXISTS(SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ?1)",
31+
[table],
32+
|row| row.get(0),
33+
)
34+
.unwrap_or_else(|err| panic!("read sqlite_master for {table}: {err}"));
35+
assert!(exists, "missing workflow trace table {table}");
36+
}
37+
}
38+
1539
#[test]
1640
fn upsert_and_resume_thread_metadata() {
1741
let path = temp_state_path("upsert_resume");
@@ -157,6 +181,102 @@ fn init_schema_migration() {
157181
StateStore::open(Some(path.clone())).expect("open state store");
158182
}
159183

184+
#[test]
185+
fn fresh_schema_includes_workflow_trace_tables() {
186+
let path = temp_state_path("fresh_schema_includes_workflow_trace_tables");
187+
188+
StateStore::open(Some(path.clone())).expect("open state store");
189+
190+
let conn = Connection::open(&path).expect("open state db");
191+
assert_workflow_trace_schema(&conn);
192+
}
193+
194+
#[test]
195+
fn v1_schema_migrates_workflow_trace_tables() {
196+
let path = temp_state_path("v1_schema_migrates_workflow_trace_tables");
197+
let conn = Connection::open(&path).expect("open state db");
198+
conn.execute_batch(
199+
r#"
200+
CREATE TABLE threads (
201+
id TEXT PRIMARY KEY,
202+
rollout_path TEXT,
203+
preview TEXT NOT NULL,
204+
ephemeral INTEGER NOT NULL,
205+
model_provider TEXT NOT NULL,
206+
created_at INTEGER NOT NULL,
207+
updated_at INTEGER NOT NULL,
208+
status TEXT NOT NULL,
209+
path TEXT,
210+
cwd TEXT NOT NULL,
211+
cli_version TEXT NOT NULL,
212+
source TEXT NOT NULL,
213+
title TEXT,
214+
sandbox_policy TEXT,
215+
approval_mode TEXT,
216+
archived INTEGER NOT NULL DEFAULT 0,
217+
archived_at INTEGER,
218+
git_sha TEXT,
219+
git_branch TEXT,
220+
git_origin_url TEXT,
221+
memory_mode TEXT,
222+
current_leaf_id INTEGER
223+
);
224+
CREATE TABLE messages (
225+
id INTEGER PRIMARY KEY AUTOINCREMENT,
226+
thread_id TEXT NOT NULL,
227+
role TEXT NOT NULL,
228+
content TEXT NOT NULL,
229+
item_json TEXT,
230+
created_at INTEGER NOT NULL,
231+
parent_entry_id INTEGER
232+
);
233+
CREATE TABLE checkpoints (
234+
thread_id TEXT NOT NULL,
235+
checkpoint_id TEXT NOT NULL,
236+
state_json TEXT NOT NULL,
237+
created_at INTEGER NOT NULL,
238+
PRIMARY KEY(thread_id, checkpoint_id)
239+
);
240+
CREATE TABLE jobs (
241+
id TEXT PRIMARY KEY,
242+
name TEXT NOT NULL,
243+
status TEXT NOT NULL,
244+
progress INTEGER,
245+
detail TEXT,
246+
created_at INTEGER NOT NULL,
247+
updated_at INTEGER NOT NULL
248+
);
249+
CREATE TABLE thread_dynamic_tools (
250+
thread_id TEXT NOT NULL,
251+
position INTEGER NOT NULL,
252+
name TEXT NOT NULL,
253+
description TEXT,
254+
input_schema TEXT NOT NULL,
255+
PRIMARY KEY (thread_id, position)
256+
);
257+
INSERT INTO threads (
258+
id, preview, ephemeral, model_provider, created_at, updated_at, status, cwd, cli_version, source, archived
259+
)
260+
VALUES (
261+
'thread-test-1', 'hello', false, 'deepseek', 0, 0, 'running', '/tmp/project', '0.0.0-test', 'interactive', false
262+
);
263+
PRAGMA user_version = 1;
264+
"#,
265+
)
266+
.expect("create v1 schema");
267+
drop(conn);
268+
269+
let store = StateStore::open(Some(path.clone())).expect("open state store");
270+
let thread = store
271+
.get_thread("thread-test-1")
272+
.expect("read thread")
273+
.expect("thread survives migration");
274+
assert_eq!(thread.preview, "hello");
275+
276+
let conn = Connection::open(&path).expect("open state db");
277+
assert_workflow_trace_schema(&conn);
278+
}
279+
160280
#[test]
161281
fn init_schema_migration_same_second_messages() {
162282
let path = temp_state_path("init_schema_migration_same_second_messages");

crates/tui/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4040
release-safe. The foundation now includes serializable branch, leaf, and
4141
control-node result records toward the #2668 TraceStore contract. Thanks
4242
@AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
43+
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
44+
workflow, branch, leaf, control-node, and teacher-candidate runs. The
45+
migration creates persistence shape only; workflow execution and replay
46+
remain deferred until the runtime semantics are safe (#2668).
4347
- Added an official VS Code extension Phase 0 scaffold with terminal launch,
4448
local runtime attach checks, status bar state, and a read-only Agent View
4549
preview backed by recent runtime thread summaries. This answers the VS Code

0 commit comments

Comments
 (0)