Skip to content

Commit 6bb564e

Browse files
authored
fix(whaleflow): reject unknown workflow references (#2837)
1 parent 96b825b commit 6bb564e

5 files changed

Lines changed: 191 additions & 8 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6868
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
6969
results now also carry separate ARMH/shared-memo and provider prompt-cache
7070
telemetry counters, with mock aggregation tests, so #2671 can progress
71-
without wiring live RLM calls or billing-affecting provider behavior yet.
71+
without wiring live RLM calls or billing-affecting provider behavior yet. The
72+
Starlark and typed-IR gates now also reject unknown leaf dependencies,
73+
reducer inputs, and teacher-review candidates before mock execution or replay,
74+
keeping generated workflows fail-closed while runtime/worktree semantics stay
75+
deferred.
7276
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
7377
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
7478
workflow, branch, leaf, control-node, and teacher-candidate runs. The

crates/tui/CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6868
execution in CI-oriented crate tests (#2679). Leaf, branch, and workflow
6969
results now also carry separate ARMH/shared-memo and provider prompt-cache
7070
telemetry counters, with mock aggregation tests, so #2671 can progress
71-
without wiring live RLM calls or billing-affecting provider behavior yet.
71+
without wiring live RLM calls or billing-affecting provider behavior yet. The
72+
Starlark and typed-IR gates now also reject unknown leaf dependencies,
73+
reducer inputs, and teacher-review candidates before mock execution or replay,
74+
keeping generated workflows fail-closed while runtime/worktree semantics stay
75+
deferred.
7276
Thanks @AdityaVG13 for the WhaleFlow draft and cost-tracking direction.
7377
- Added a state-store v2 schema migration for WhaleFlow trace tables covering
7478
workflow, branch, leaf, control-node, and teacher-candidate runs. The

crates/whaleflow/src/lib.rs

Lines changed: 154 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ impl MockWorkflowExecutor {
878878
if let Some(max_children) = spec.max_children {
879879
nodes.truncate(max_children);
880880
}
881-
validate_workflow_nodes(&nodes)?;
881+
validate_workflow_node_shapes(&nodes)?;
882882
self.execute_nodes(&nodes, execution)?;
883883
execution.control_node_results.push(ControlNodeResult {
884884
node_id: spec.id.clone(),
@@ -973,6 +973,12 @@ pub enum WorkflowExecutionError {
973973
EmptyLeafPrompt { leaf: String },
974974
#[error("duplicate workflow node `{node}`")]
975975
DuplicateNodeId { node: String },
976+
#[error("workflow node `{node}` has unknown {field} reference `{reference}`")]
977+
UnknownNodeReference {
978+
node: String,
979+
field: &'static str,
980+
reference: String,
981+
},
976982
}
977983

978984
fn default_frontier_limit() -> usize {
@@ -994,6 +1000,14 @@ fn node_id(node: &WorkflowNode) -> String {
9941000

9951001
pub(crate) fn validate_workflow_nodes(
9961002
nodes: &[WorkflowNode],
1003+
) -> Result<(), WorkflowExecutionError> {
1004+
let mut seen = BTreeSet::new();
1005+
validate_workflow_nodes_inner(nodes, &mut seen)?;
1006+
validate_workflow_references(nodes, &seen)
1007+
}
1008+
1009+
pub(crate) fn validate_workflow_node_shapes(
1010+
nodes: &[WorkflowNode],
9971011
) -> Result<(), WorkflowExecutionError> {
9981012
let mut seen = BTreeSet::new();
9991013
validate_workflow_nodes_inner(nodes, &mut seen)
@@ -1033,6 +1047,68 @@ fn validate_workflow_nodes_inner(
10331047
Ok(())
10341048
}
10351049

1050+
fn validate_workflow_references(
1051+
nodes: &[WorkflowNode],
1052+
known_ids: &BTreeSet<String>,
1053+
) -> Result<(), WorkflowExecutionError> {
1054+
for node in nodes {
1055+
match node {
1056+
WorkflowNode::BranchSet(spec) => {
1057+
validate_workflow_references(&spec.children, known_ids)?;
1058+
}
1059+
WorkflowNode::Leaf(spec) => {
1060+
validate_known_references(
1061+
spec.id.as_str(),
1062+
"depends_on_results",
1063+
&spec.depends_on_results,
1064+
known_ids,
1065+
)?;
1066+
}
1067+
WorkflowNode::Sequence(spec) => {
1068+
validate_workflow_references(&spec.children, known_ids)?;
1069+
}
1070+
WorkflowNode::Reduce(spec) => {
1071+
validate_known_references(spec.id.as_str(), "inputs", &spec.inputs, known_ids)?;
1072+
}
1073+
WorkflowNode::TeacherReview(spec) => {
1074+
validate_known_references(
1075+
spec.id.as_str(),
1076+
"candidates",
1077+
&spec.candidates,
1078+
known_ids,
1079+
)?;
1080+
}
1081+
WorkflowNode::LoopUntil(spec) => {
1082+
validate_workflow_references(&spec.children, known_ids)?;
1083+
}
1084+
WorkflowNode::Cond(spec) => {
1085+
validate_workflow_references(&spec.then_nodes, known_ids)?;
1086+
validate_workflow_references(&spec.else_nodes, known_ids)?;
1087+
}
1088+
WorkflowNode::Expand(_) => {}
1089+
}
1090+
}
1091+
Ok(())
1092+
}
1093+
1094+
fn validate_known_references(
1095+
node: &str,
1096+
field: &'static str,
1097+
references: &[String],
1098+
known_ids: &BTreeSet<String>,
1099+
) -> Result<(), WorkflowExecutionError> {
1100+
for reference in references {
1101+
if !known_ids.contains(reference) {
1102+
return Err(WorkflowExecutionError::UnknownNodeReference {
1103+
node: node.to_string(),
1104+
field,
1105+
reference: reference.clone(),
1106+
});
1107+
}
1108+
}
1109+
Ok(())
1110+
}
1111+
10361112
fn control_kind_name(node: &WorkflowNode) -> &'static str {
10371113
match node {
10381114
WorkflowNode::BranchSet(_) => "branch_set",
@@ -2074,6 +2150,83 @@ mod tests {
20742150
);
20752151
}
20762152

2153+
#[test]
2154+
fn workflow_spec_rejects_unknown_leaf_dependency() {
2155+
let mut summarize = leaf_node("summarize");
2156+
let WorkflowNode::Leaf(spec) = &mut summarize else {
2157+
panic!("expected leaf");
2158+
};
2159+
spec.depends_on_results = vec!["missing-scan".to_string()];
2160+
let workflow = workflow_spec(vec![summarize]);
2161+
2162+
let mut executor = MockWorkflowExecutor::new();
2163+
let err = executor
2164+
.run(&workflow)
2165+
.expect_err("unknown leaf dependency should fail before execution");
2166+
2167+
assert_eq!(
2168+
err,
2169+
WorkflowExecutionError::UnknownNodeReference {
2170+
node: "summarize".to_string(),
2171+
field: "depends_on_results",
2172+
reference: "missing-scan".to_string(),
2173+
}
2174+
);
2175+
}
2176+
2177+
#[test]
2178+
fn workflow_spec_rejects_unknown_reduce_input() {
2179+
let workflow = workflow_spec(vec![
2180+
leaf_node("scan"),
2181+
WorkflowNode::Reduce(ReduceSpec {
2182+
id: "summarize".to_string(),
2183+
inputs: vec!["scan".to_string(), "missing-review".to_string()],
2184+
prompt: "Summarize safe fixes".to_string(),
2185+
model_policy: ModelPolicy::default(),
2186+
}),
2187+
]);
2188+
2189+
let mut executor = MockWorkflowExecutor::new();
2190+
let err = executor
2191+
.run(&workflow)
2192+
.expect_err("unknown reduce input should fail before execution");
2193+
2194+
assert_eq!(
2195+
err,
2196+
WorkflowExecutionError::UnknownNodeReference {
2197+
node: "summarize".to_string(),
2198+
field: "inputs",
2199+
reference: "missing-review".to_string(),
2200+
}
2201+
);
2202+
}
2203+
2204+
#[test]
2205+
fn workflow_spec_rejects_unknown_teacher_candidate() {
2206+
let workflow = workflow_spec(vec![
2207+
leaf_node("candidate-a"),
2208+
WorkflowNode::TeacherReview(TeacherReviewSpec {
2209+
id: "teacher-review".to_string(),
2210+
candidates: vec!["candidate-a".to_string(), "candidate-b".to_string()],
2211+
promotion_policy: PromotionPolicy::default(),
2212+
}),
2213+
]);
2214+
2215+
let mut executor = MockWorkflowExecutor::new();
2216+
let err = executor
2217+
.run(&workflow)
2218+
.expect_err("unknown teacher candidate should fail before execution");
2219+
2220+
assert_eq!(
2221+
err,
2222+
WorkflowExecutionError::UnknownNodeReference {
2223+
node: "teacher-review".to_string(),
2224+
field: "candidates",
2225+
reference: "candidate-b".to_string(),
2226+
}
2227+
);
2228+
}
2229+
20772230
#[test]
20782231
fn tournament_selects_passing_minimal_branch() {
20792232
let tournament = BranchTournament { min_score: 60 };

crates/whaleflow/src/replay.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
BranchResult, BranchSpec, CondSpec, ControlNodeKind, ControlNodeResult, ExpandSpec, LeafResult,
99
LeafSpec, LoopUntilSpec, SequenceSpec, WorkflowExecution, WorkflowExecutionError,
1010
WorkflowMemoUsage, WorkflowNode, WorkflowRunStatus, WorkflowSpec, WorkflowUsage,
11-
validate_workflow_nodes,
11+
validate_workflow_node_shapes, validate_workflow_nodes,
1212
};
1313

1414
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
@@ -316,7 +316,7 @@ impl WorkflowReplayExecutor {
316316
.as_ref()
317317
.map(|record| record.generated_nodes.clone())
318318
.unwrap_or_default();
319-
validate_workflow_nodes(&generated_nodes)?;
319+
validate_workflow_node_shapes(&generated_nodes)?;
320320
self.execute_nodes(spec, &generated_nodes, execution)?;
321321
let selected = record
322322
.as_ref()

crates/whaleflow/src/starlark_authoring.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use thiserror::Error;
1313
use crate::{
1414
AgentType, BranchSpec, BudgetSpec, CondSpec, ExpandSpec, IsolationMode, LeafSpec, ModelPolicy,
1515
PermissionSpec, PromotionPolicy, ReduceSpec, SequenceSpec, TaskMode, TeacherReviewSpec,
16-
WorkflowNode, WorkflowSpec,
16+
WorkflowNode, WorkflowSpec, validate_workflow_nodes,
1717
};
1818

1919
pub type StarlarkWorkflowResult<T> = std::result::Result<T, StarlarkWorkflowError>;
@@ -50,10 +50,13 @@ pub fn compile_starlark_workflow(
5050
eval.eval_module(ast, &globals)
5151
.map_err(StarlarkWorkflowError::Starlark)?;
5252
}
53-
builder
53+
let workflow = builder
5454
.into_inner()
5555
.workflow
56-
.ok_or(StarlarkWorkflowError::MissingWorkflow)
56+
.ok_or(StarlarkWorkflowError::MissingWorkflow)?;
57+
validate_workflow_nodes(&workflow.nodes)
58+
.map_err(|error| StarlarkWorkflowError::InvalidNode(error.to_string()))?;
59+
Ok(workflow)
5760
}
5861

5962
pub fn compile_starlark_workflow_with_repair(
@@ -531,6 +534,25 @@ workflow(goal = "bad", nodes = [])
531534
));
532535
}
533536

537+
#[test]
538+
fn starlark_compile_gate_rejects_unknown_references() {
539+
let source = r#"
540+
workflow(
541+
id = "bad-reference",
542+
goal = "reject missing candidates",
543+
nodes = [
544+
teacher_review(id = "review", candidates = ["missing-candidate"]),
545+
],
546+
)
547+
"#;
548+
549+
let err = compile_starlark_workflow("bad-reference.star", source)
550+
.expect_err("unknown candidate should fail at the compile gate");
551+
552+
assert!(matches!(err, StarlarkWorkflowError::InvalidNode(_)));
553+
assert!(err.to_string().contains("missing-candidate"));
554+
}
555+
534556
#[test]
535557
fn issue_fix_tournament_example_compiles() {
536558
let source = include_str!("../../../workflows/issue_fix_tournament.star");

0 commit comments

Comments
 (0)