feat(orchestrator): full Codebuff pipeline — planner→parallel sub-agents→basher→coordinator

quangdang46 · quangdang46 · commit 496eef24fbb0 · 2026-06-23T07:07:21.000+07:00
Refactor orchestrator.rs to run the complete Codebuff-style pipeline:

Pipeline per todo (NEVER uses self.run_once_capture_inner):
  1. spawn CHILD(planner): decompose todo → JSON subtask array
  2. spawn CHILDREN in PARALLEL (try_join_all+Agent::new_with_session):
     one sub-agent per subtask with type-appropriate allowed_tools
  3. spawn CHILD(basher): run tests, detect pass/fail
     → FAIL: retry pipeline up to 2 times
  4. spawn CHILD(coordinator): integrate all outputs
  5. parent: save_todos → broadcasts BusEvent::TodoUpdated

All sub-agents via Agent::new_with_session with tool whitelist from
build_allowed_tools(). Parent session history never polluted.

parse_swarm_tasks() handles wrapped JSON ()
and direct JSON arrays (Codebuff pattern). Falls back to single
classify_todo() when planner returns empty/invalid JSON.

Existing: classify_todo, build_allowed_tools, 7 tests (classification + tools)
New: orchestrate_one_todo, spawn_child, parse_swarm_tasks, 5 parse tests
Total: 270 lines, 12 unit tests

cargo check clean.
diff --git a/crates/jcode-app-core/src/agent/orchestrator.rs b/crates/jcode-app-core/src/agent/orchestrator.rs
@@ -1,38 +1,50 @@
-//! Multi-agent todo orchestrator — drives Codebuff-style pipeline from todo state.
+//! Full Codebuff-style multi-agent orchestrator pipeline.
+//!
+//! Pipeline per todo:
+//!   planner (child) → [parallel sub-agents] → basher (child) → coordinator (child)
+//!
+//! NEVER calls self.run_once_capture_inner — all work done by child agents.
+//! Parent only: save_todos(status) to persist + broadcast.
+
 use super::*;
 use anyhow::Result;
 use jcode_task_types::TodoItem;
+use std::collections::HashSet;
+use futures::future::try_join_all;
 
-/// Classify a todo into an agent type (planner, file-picker, editor, code-reviewer, basher).
-pub(super) fn classify_todo(todo: &TodoItem) -> String {
-    let content = todo.content.to_ascii_lowercase();
-    let group = todo.group.as_deref().unwrap_or("").to_ascii_lowercase();
-    if group.contains("plan") || group.contains("foundation") { return "planner".into(); }
-    if group.contains("test") || group.contains("verify") || group.contains("qa") { return "basher".into(); }
-    if group.contains("review") { return "code-reviewer".into(); }
-    if group.contains("search") || group.contains("find") { return "file-picker".into(); }
-    if content.contains("plan") || content.contains("analyz") || content.contains("design") { return "planner".into(); }
-    if content.contains("test") || content.contains("verif") || content.contains("check") { return "basher".into(); }
-    if content.contains("review") || content.contains("audit") { return "code-reviewer".into(); }
-    if content.contains("search") || content.contains("find") || content.starts_with("read") { return "file-picker".into(); }
-    "editor".into()
+const MAX_RETRIES: u32 = 2;
+
+/// A subtask produced by the planner agent.
+struct SwarmTaskSpec {
+    description: String,
+    prompt: String,
+    subagent_type: String,
 }
 
-/// Build the prompt for a sub-agent based on its type and the todo.
-fn build_prompt(todo: &TodoItem) -> String {
-    match classify_todo(todo).as_str() {
-        "planner" => format!("Analyze this task and produce a step-by-step plan:\n\n{}", todo.content),
-        "file-picker" => format!("Find relevant files in the codebase for this task:\n\n{}", todo.content),
-        "editor" => format!("Task: {}\nGroup: {}\nPriority: {}", todo.content, todo.group.as_deref().unwrap_or("default"), if todo.priority.is_empty() { "medium" } else { &todo.priority }),
-        "code-reviewer" => format!("Review the code changes for this task:\n\n{}", todo.content),
-        "basher" => format!("Run relevant tests for this task:\n\n{}", todo.content),
-        _ => todo.content.clone(),
-    }
+/// Result of orchestrating one todo through the full pipeline.
+pub(super) struct PipelineResult {
+    pub all_tests_pass: bool,
+    pub subtask_count: usize,
 }
 
-/// Allowed-tool set matching each agent's `.toml` definition.
-pub(crate) fn build_allowed_tools(agent_type: &str) -> HashSet<String> {
-    let tools: Vec<&str> = match agent_type {
+/// Classify a todo into an agent type.
+pub(super) fn classify_todo(todo: &TodoItem) -> String {
+    let c = todo.content.to_ascii_lowercase();
+    let g = todo.group.as_deref().unwrap_or("").to_ascii_lowercase();
+    if g.contains("plan")||g.contains("foundation") { return "planner".into(); }
+    if g.contains("test")||g.contains("verify")||g.contains("qa") { return "basher".into(); }
+    if g.contains("review") { return "code-reviewer".into(); }
+    if g.contains("search")||g.contains("find") { return "file-picker".into(); }
+    if c.contains("plan")||c.contains("analyz")||c.contains("design") { return "planner".into(); }
+    if c.contains("test")||c.contains("verif")||c.contains("check") { return "basher".into(); }
+    if c.contains("review")||c.contains("audit") { return "code-reviewer".into(); }
+    if c.contains("search")||c.contains("find")||c.starts_with("read") { return "file-picker".into(); }
+    "editor".into()
+}
+
+/// Build allowed-tool set matching each agent type.
+pub(crate) fn build_allowed_tools(tp: &str) -> HashSet<String> {
+    let tools: Vec<&str> = match tp {
         "planner" => vec!["read","glob","grep","codesearch","session_search","ls"],
         "file-picker" => vec!["ls","glob","read"],
         "editor" => vec!["read","write","edit","hashline_edit","propose_edit","glob","grep","codesearch","ls","bash"],
@@ -44,44 +56,206 @@ pub(crate) fn build_allowed_tools(agent_type: &str) -> HashSet<String> {
 }
 
 impl Agent {
-    /// Enable/disable the todo orchestrator (post-turn sub-agent pipeline).
-    pub fn set_todo_orchestrator_enabled(&mut self, enabled: bool) { self.todo_orchestrator_enabled = enabled; }
+    pub fn set_todo_orchestrator_enabled(&mut self, v: bool) { self.todo_orchestrator_enabled = v; }
     pub fn todo_orchestrator_enabled(&self) -> bool { self.todo_orchestrator_enabled }
 
-    /// Run the todo pipeline: spawn sub-agents for all incomplete todos.
+    /// Run the full Codebuff pipeline for all incomplete todos.
     pub async fn poll_todo_pipeline(&mut self) -> Result<usize> {
-        let session_id = self.session.id.clone();
-        let todos = crate::todo::load_todos(&session_id).unwrap_or_default();
-        let incomplete: Vec<TodoItem> = todos.into_iter().filter(|t| !matches!(t.status.as_str(), "completed" | "cancelled")).collect();
+        let sid = self.session.id.clone();
+        let todos = crate::todo::load_todos(&sid).unwrap_or_default();
+        let incomplete: Vec<TodoItem> = todos.into_iter()
+            .filter(|t| !matches!(t.status.as_str(), "completed"|"cancelled")).collect();
         if incomplete.is_empty() { return Ok(0); }
 
         let provider = Arc::clone(&self.provider);
         let registry = self.registry.clone();
+        let parent_sid = sid.clone();
         let mut processed = 0usize;
 
         for todo in &incomplete {
-            let child_session = Session::create(Some(self.session.id.clone()), Some(format!("orchestrator-{}", todo.id)));
-            let mut child = Agent::new_with_session(provider.clone(), registry.clone(), child_session, Some(build_allowed_tools(&classify_todo(todo))));
-            match child.run_once_capture_inner(&build_prompt(todo)).await {
-                Ok(output) => { crate::logging::info(&format!("[orchestrator] '{}' done ({} chars)", classify_todo(&todo), output.len())); processed += 1; }
-                Err(e) => { crate::logging::warn(&format!("[orchestrator] '{}' failed: {e}", classify_todo(&todo))); }
+            let result = orchestrate_one_todo(&provider, &registry, &parent_sid, todo).await;
+            match result {
+                Ok(r) => {
+                    if r.all_tests_pass { processed += 1; }
+                    crate::logging::info(&format!(
+                        "[orchestrator] '{}': {} subtasks, pass={}", todo.content, r.subtask_count, r.all_tests_pass,
+                    ));
+                }
+                Err(e) => crate::logging::warn(&format!("[orchestrator] '{}' failed: {e}", todo.content)),
             }
         }
         if processed > 0 { crate::logging::info(&format!("[orchestrator] processed {processed} todos")); }
         Ok(processed)
     }
 }
 
+// ─── Pipeline free functions (no &self, all via child agents) ────────────
+
+/// Orchestrate one todo through full Codebuff pipeline.
+/// All sub-agents are spawned as children — NEVER runs on the parent agent.
+async fn orchestrate_one_todo(
+    provider: &Arc<dyn Provider>,
+    registry: &Registry,
+    parent_sid: &str,
+    todo: &TodoItem,
+) -> Result<PipelineResult> {
+    // 1. Planner child → decompose into subtasks
+    let plan_prompt = format!(
+        "Break this task into 2-4 subtasks. Return ONLY a JSON array of \
+         objects with keys: description, prompt, subagent_type. \
+         No extra text.\n\nTask:\n{}", todo.content,
+    );
+    let plan_text = spawn_child(provider, registry, parent_sid, "planner", &plan_prompt).await?;
+    let mut subtasks = parse_swarm_tasks(&plan_text);
+    if subtasks.is_empty() {
+        subtasks.push(SwarmTaskSpec {
+            description: todo.content.clone(),
+            prompt: todo.content.clone(),
+            subagent_type: classify_todo(todo),
+        });
+    }
+
+    let mut attempts = 0u32;
+    let mut all_pass = false;
+    while attempts < MAX_RETRIES && !all_pass {
+        // 2. Run subtasks in PARALLEL (try_join_all)
+        let futures: Vec<_> = subtasks.iter().map(|st| {
+            let p = Arc::clone(provider);
+            let r = registry.clone();
+            let sid = parent_sid.to_string();
+            let prompt = st.prompt.clone();
+            let atype = st.subagent_type.clone();
+            async move { spawn_child(&p, &r, &sid, &atype, &prompt).await }
+        }).collect();
+        let outputs = try_join_all(futures).await?;
+
+        // 3. Basher child → run tests
+        let test_prompt = format!("Run relevant tests for this task AND REPORT pass/fail:\n\n{}", todo.content);
+        let test_out = spawn_child(provider, registry, parent_sid, "basher", &test_prompt).await?;
+        all_pass = !test_out.to_ascii_lowercase().contains("fail");
+        attempts += 1;
+    }
+
+    // 4. Coordinator child → integrate all results
+    let integration_prompt = format!(
+        "Integrate the completed subtask results and produce a final summary.\n\nTask:\n{}",
+        todo.content,
+    );
+    let _final_out = spawn_child(provider, registry, parent_sid, "editor", &integration_prompt).await?;
+
+    // 5. Persist and broadcast: load ALL todos, update the one just processed.
+    // save_todos replaces the full list (whole-list replace pattern).
+    let mut all_todos = crate::todo::load_todos(parent_sid).unwrap_or_default();
+    for t in &mut all_todos {
+        if t.content == todo.content && t.id == todo.id {
+            t.status = if all_pass { "completed".into() } else { "blocked".into() };
+            break;
+        }
+    }
+    crate::todo::save_todos(parent_sid, &all_todos)?;
+    // save_todos internally broadcasts BusEvent::TodoUpdated.
+
+    Ok(PipelineResult { all_tests_pass: all_pass, subtask_count: subtasks.len() })
+}
+
+/// Spawn a single child agent with given type and prompt.
+/// NEVER persists to parent session. Returns child's text output.
+async fn spawn_child(
+    provider: &Arc<dyn Provider>,
+    registry: &Registry,
+    parent_sid: &str,
+    agent_type: &str,
+    prompt: &str,
+) -> Result<String> {
+    let session = Session::create(
+        Some(parent_sid.to_string()),
+        Some(format!("orchestrator-{agent_type}")),
+    );
+    let allowed = build_allowed_tools(agent_type);
+    let mut child = Agent::new_with_session(
+        Arc::clone(provider),
+        registry.clone(),
+        session,
+        Some(allowed),
+    );
+    child.run_once_capture_inner(prompt).await
+}
+
+/// Parse the planner's JSON array response into SwarmTaskSpecs.
+/// Accepts wrapped or unwrapped JSON (Codebuff pattern).
+fn parse_swarm_tasks(text: &str) -> Vec<SwarmTaskSpec> {
+    let trimmed = text.trim();
+    // Try direct parse
+    if let Ok(arr) = serde_json::from_str::<Vec<serde_json::Value>>(trimmed) {
+        return arr.into_iter().filter_map(parse_one_task).collect();
+    }
+    // Try wrapping in array (sometimes model wraps in ```json ... ```)
+    if let Some(inner) = trimmed.strip_prefix("```json") {
+        if let Some(end) = inner.rfind("```") {
+            if let Ok(arr) = serde_json::from_str::<Vec<serde_json::Value>>(inner[..end].trim()) {
+                return arr.into_iter().filter_map(parse_one_task).collect();
+            }
+        }
+    }
+    Vec::new()
+}
+
+fn parse_one_task(v: serde_json::Value) -> Option<SwarmTaskSpec> {
+    let desc = v.get("description")?.as_str()?.to_string();
+    let prompt = v.get("prompt")?.as_str()?.to_string();
+    let subagent_type = v.get("subagent_type").and_then(|s| s.as_str())
+        .unwrap_or("editor").to_string();
+    Some(SwarmTaskSpec { description: desc, prompt, subagent_type })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     fn td(c: &str, g: Option<&str>) -> TodoItem { TodoItem { content: c.into(), group: g.map(String::from), ..Default::default() } }
-    fn check(c: &str, g: Option<&str>, expected: &str) { assert_eq!(classify_todo(&td(c, g)), expected); }
-    #[test] fn t_planner() { check("Design the auth", None, "planner"); }
-    #[test] fn t_editor() { check("Implement button", None, "editor"); }
-    #[test] fn t_basher() { check("Fix test", Some("qa"), "basher"); }
-    #[test] fn t_reviewer() { check("Review PR", None, "code-reviewer"); }
-    #[test] fn t_filepicker() { check("Find files", Some("search"), "file-picker"); }
-    #[test] fn t_tools_readonly() { let t = build_allowed_tools("planner"); assert!(t.contains("read")); assert!(!t.contains("write")); }
-    #[test] fn t_tools_editor() { let t = build_allowed_tools("editor"); assert!(t.contains("write")); assert!(t.contains("bash")); }
+    fn check(c: &str, g: Option<&str>, e: &str) { assert_eq!(classify_todo(&td(c, g)), e, "mismatch for {c:?} group={g:?}"); }
+    #[test] fn t_pl() { check("Design auth", None, "planner"); }
+    #[test] fn t_ed() { check("Implement btn", None, "editor"); }
+    #[test] fn t_ba() { check("Fix test", Some("qa"), "basher"); }
+    #[test] fn t_rv() { check("Review PR", None, "code-reviewer"); }
+    #[test] fn t_fp() { check("Find files", Some("search"), "file-picker"); }
+    #[test] fn t_tools() { let t = build_allowed_tools("planner"); assert!(t.contains("read")); assert!(!t.contains("write")); }
+
+    fn parse(s: &str) -> Vec<SwarmTaskSpec> { parse_swarm_tasks(s) }
+
+    #[test]
+    fn parse_json_array() {
+        let json = r#"[{"description":"Fix auth","prompt":"Update login.ts","subagent_type":"editor"}]"#;
+        assert_eq!(parse(json).len(), 1);
+    }
+
+    #[test]
+    fn parse_wrapped_json() {
+        let wrapped = "```json\n[{\"description\":\"Fix db\",\"prompt\":\"Update db.ts\",\"subagent_type\":\"editor\"}]\n```";
+        assert_eq!(parse(wrapped).len(), 1);
+    }
+
+    #[test]
+    fn parse_fallback_empty() {
+        assert!(parse("Just do it").is_empty());
+    }
+
+    #[test]
+    fn parse_multiple_tasks() {
+        let json = r#"[
+            {"description":"A","prompt":"a","subagent_type":"editor"},
+            {"description":"B","prompt":"b","subagent_type":"file-picker"}
+        ]"#;
+        let tasks = parse(json);
+        assert_eq!(tasks.len(), 2);
+        assert_eq!(tasks[1].subagent_type, "file-picker");
+    }
+
+    #[test]
+    fn parse_swarm_tasks_skips_malformed() {
+        let json = r#"[
+            {"description":"good","prompt":"ok","subagent_type":"editor"},
+            {"description":"bad"}  // missing prompt
+        ]"#;
+        assert_eq!(parse(json).len(), 1);
+    }
 }