Skip to content

Commit 7b457aa

Browse files
sanil-23claude
andauthored
feat(agent): pure orchestrator pattern with per-skill delegation tools (tinyhumansai#496)
* feat(agent): pure orchestrator pattern with per-skill delegation tools (tinyhumansai#478) Refactors the main agent from a direct tool-calling model to a pure orchestrator that delegates all work through dynamically generated tools. Architecture changes: - Orchestrator only sees generated tools (notion, gmail, research, run_code, review_code, plan, spawn_subagent) — skill tools are architecturally unreachable from the main agent - Each installed skill auto-generates a delegation tool at build time (SkillDelegationTool) that routes to skills_agent with the correct skill_filter - Static archetype tools (research, run_code, etc.) delegate to their respective sub-agents - visible_tool_specs filters the function-calling schema sent to the provider, enforcing the orchestrator boundary at the API level Prompt changes: - Rewrote AGENTS.md as a lean orchestrator prompt — no more routing tables or agent_id instructions - Orchestrator skips TOOLS.md, MEMORY.md, HEARTBEAT.md (~6k tokens saved per turn) — subagents get tool specs from the registry - Workspace .md files auto-sync via builtin-hash mechanism so prompt updates ship automatically to existing installs Bug fixes: - ModelSpec::Hint now resolves to {hint}-v1 (e.g. agentic-v1) instead of hint:agentic which the backend rejected - validate_skill_filter now uses skill_id from the engine tuple instead of splitting on __ in the raw tool name (which always failed) - Memory context forwarded to subagents via ParentExecutionContext Observability: - Added [agent] tagged logs for tool responses, agent state transitions, and delegation decisions throughout turn.rs See docs/agent-prompt-architecture.excalidraw for the visual diagram. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * style: rustfmt orchestrator_tools.rs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * style: cargo fmt Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: address CodeRabbit review — dispatch guard, fork specs, decouple sync - Enforce visible-tool allowlist at dispatch time (not just schema) - Fork mode uses visible_tool_specs (not full registry) - De-duplicate spawn_subagent when extending orchestrator tools - Raw tool output moved to debug level, info level logs metadata only - Decouple workspace file sync from prompt rendering so skipped files still get synced to disk Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b8ded44 commit 7b457aa

15 files changed

Lines changed: 1226 additions & 146 deletions

docs/agent-prompt-architecture.excalidraw

Lines changed: 504 additions & 0 deletions
Large diffs are not rendered by default.

src/openhuman/agent/agent/builder.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ impl AgentBuilder {
2828
Self {
2929
provider: None,
3030
tools: None,
31+
visible_tool_names: None,
3132
memory: None,
3233
prompt_builder: None,
3334
tool_dispatcher: None,
@@ -71,6 +72,14 @@ impl AgentBuilder {
7172
self
7273
}
7374

75+
/// Restricts which tools the main agent can see and call directly.
76+
/// Tools not in this set are still available to sub-agents via the
77+
/// runner. Pass `None` (default) to make all tools visible.
78+
pub fn visible_tool_names(mut self, names: std::collections::HashSet<String>) -> Self {
79+
self.visible_tool_names = Some(names);
80+
self
81+
}
82+
7483
/// Sets the memory system for the agent.
7584
pub fn memory(mut self, memory: Arc<dyn Memory>) -> Self {
7685
self.memory = Some(memory);
@@ -198,12 +207,38 @@ impl AgentBuilder {
198207
.ok_or_else(|| anyhow::anyhow!("tools are required"))?;
199208
let tool_specs: Vec<ToolSpec> = tools.iter().map(|tool| tool.spec()).collect();
200209

210+
let visible_names = self.visible_tool_names.unwrap_or_default();
211+
212+
// Build the filtered spec list that the main agent sends to the
213+
// provider. When the filter is empty every tool is visible
214+
// (backward compat). When populated, only allowlisted tools
215+
// appear in the function-calling schema so the LLM literally
216+
// cannot call skill tools directly — it must use spawn_subagent.
217+
let visible_tool_specs: Vec<ToolSpec> = if visible_names.is_empty() {
218+
tool_specs.clone()
219+
} else {
220+
tool_specs
221+
.iter()
222+
.filter(|spec| visible_names.contains(&spec.name))
223+
.cloned()
224+
.collect()
225+
};
226+
227+
log::info!(
228+
"[agent] tool spec filter: total={} visible={} (filter_active={})",
229+
tool_specs.len(),
230+
visible_tool_specs.len(),
231+
!visible_names.is_empty()
232+
);
233+
201234
Ok(Agent {
202235
provider: self
203236
.provider
204237
.ok_or_else(|| anyhow::anyhow!("provider is required"))?,
205238
tools: Arc::new(tools),
206239
tool_specs: Arc::new(tool_specs),
240+
visible_tool_specs: Arc::new(visible_tool_specs),
241+
visible_tool_names: visible_names,
207242
memory: self
208243
.memory
209244
.ok_or_else(|| anyhow::anyhow!("memory is required"))?,
@@ -227,6 +262,7 @@ impl AgentBuilder {
227262
identity_config: self.identity_config.unwrap_or_default(),
228263
skills: self.skills.unwrap_or_default(),
229264
auto_save: self.auto_save.unwrap_or(false),
265+
last_memory_context: None,
230266
history: Vec::new(),
231267
classification_config: self.classification_config.unwrap_or_default(),
232268
available_hints: self.available_hints.unwrap_or_default(),
@@ -405,9 +441,29 @@ impl Agent {
405441
}
406442
}
407443

444+
// Generate the orchestrator's tool set: one tool per skill +
445+
// one tool per archetype (research, run_code, etc.) + spawn_subagent
446+
// as a fallback. These are the only tools the LLM sees in its
447+
// function-calling schema. Sub-agents still access the full `tools`
448+
// registry via ParentExecutionContext.
449+
let orchestrator_tools = tools::orchestrator_tools::collect_orchestrator_tools();
450+
let visible: std::collections::HashSet<String> = orchestrator_tools
451+
.iter()
452+
.map(|t| t.name().to_string())
453+
.collect();
454+
// De-duplicate: spawn_subagent is already in the base registry.
455+
let existing_names: std::collections::HashSet<String> =
456+
tools.iter().map(|t| t.name().to_string()).collect();
457+
tools.extend(
458+
orchestrator_tools
459+
.into_iter()
460+
.filter(|t| !existing_names.contains(t.name())),
461+
);
462+
408463
Agent::builder()
409464
.provider(provider)
410465
.tools(tools)
466+
.visible_tool_names(visible)
411467
.memory(memory)
412468
.tool_dispatcher(tool_dispatcher)
413469
.memory_loader(Box::new(

src/openhuman/agent/agent/turn.rs

Lines changed: 96 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ impl Agent {
4141
/// and returns the final assistant response.
4242
pub async fn turn(&mut self, user_message: &str) -> Result<String> {
4343
let turn_started = std::time::Instant::now();
44+
log::info!("[agent] turn started — awaiting user message processing");
4445
log::info!(
4546
"[agent_loop] turn start message_chars={} history_len={} max_tool_iterations={}",
4647
user_message.chars().count(),
@@ -55,6 +56,7 @@ impl Agent {
5556
// would just burn memory-store reads on data we throw away.
5657
let learned = self.fetch_learned_context().await;
5758
let system_prompt = self.build_system_prompt(learned)?;
59+
log::info!("[agent] system prompt built — initialising conversation history");
5860
log::info!(
5961
"[agent_loop] system prompt built chars={}",
6062
system_prompt.chars().count()
@@ -87,15 +89,23 @@ impl Agent {
8789
.await;
8890
}
8991

92+
log::info!("[agent] loading memory context for user message");
9093
let context = self
9194
.memory_loader
9295
.load_context(self.memory.as_ref(), user_message)
9396
.await
9497
.unwrap_or_default();
9598

9699
let enriched = if context.is_empty() {
100+
log::info!("[agent] no memory context found — using raw user message");
101+
self.last_memory_context = None;
97102
user_message.to_string()
98103
} else {
104+
log::info!(
105+
"[agent] memory context loaded — enriching user message context_chars={}",
106+
context.chars().count()
107+
);
108+
self.last_memory_context = Some(context.clone());
99109
format!("{context}{user_message}")
100110
};
101111

@@ -188,6 +198,12 @@ impl Agent {
188198
}
189199

190200
let messages = self.tool_dispatcher.to_provider_messages(&self.history);
201+
log::info!(
202+
"[agent] iteration {}/{} — sending request to provider model={}",
203+
iteration + 1,
204+
self.config.max_tool_iterations,
205+
effective_model
206+
);
191207
log::info!(
192208
"[agent_loop] provider request i={} messages={} send_tool_specs={}",
193209
iteration + 1,
@@ -201,7 +217,7 @@ impl Agent {
201217
ChatRequest {
202218
messages: &messages,
203219
tools: if self.tool_dispatcher.should_send_tool_specs() {
204-
Some(self.tool_specs.as_slice())
220+
Some(self.visible_tool_specs.as_slice())
205221
} else {
206222
None
207223
},
@@ -234,6 +250,11 @@ impl Agent {
234250

235251
let (text, calls) = self.tool_dispatcher.parse_response(&response);
236252
let calls = Self::with_fallback_tool_call_ids(calls, iteration);
253+
log::info!(
254+
"[agent] provider responded — parsed tool_calls={} text_chars={}",
255+
calls.len(),
256+
text.chars().count()
257+
);
237258
log::info!(
238259
"[agent_loop] parsed response i={} parsed_text_chars={} parsed_tool_calls={}",
239260
iteration + 1,
@@ -246,6 +267,10 @@ impl Agent {
246267
} else {
247268
text
248269
};
270+
log::info!(
271+
"[agent] no tool calls — returning final response after {} iteration(s)",
272+
iteration + 1
273+
);
249274
log::info!(
250275
"[agent_loop] final response i={} final_chars={}",
251276
iteration + 1,
@@ -309,6 +334,11 @@ impl Agent {
309334
)));
310335
}
311336
let tool_names: Vec<&str> = calls.iter().map(|call| call.name.as_str()).collect();
337+
log::info!(
338+
"[agent] dispatching {} tool(s): {:?}",
339+
calls.len(),
340+
tool_names
341+
);
312342
log::info!(
313343
"[agent_loop] executing tools i={} names={:?}",
314344
iteration + 1,
@@ -338,6 +368,23 @@ impl Agent {
338368
iteration + 1,
339369
results.len()
340370
);
371+
for r in &results {
372+
log::info!(
373+
"[agent] tool response name={} success={} output_chars={}",
374+
r.name,
375+
r.success,
376+
r.output.chars().count(),
377+
);
378+
log::debug!(
379+
"[agent] tool response body name={}: {}",
380+
r.name,
381+
truncate_with_ellipsis(&r.output, 300)
382+
);
383+
}
384+
log::info!(
385+
"[agent] all tools complete for iteration {} — looping back to provider",
386+
iteration + 1
387+
);
341388
let formatted = self.tool_dispatcher.format_results(&results);
342389
self.history.push(formatted);
343390
self.trim_history();
@@ -348,6 +395,10 @@ impl Agent {
348395
);
349396
}
350397

398+
log::warn!(
399+
"[agent] exceeded max tool iterations ({}) — aborting turn",
400+
self.config.max_tool_iterations
401+
);
351402
log::warn!(
352403
"[agent_loop] exceeded maximum tool iterations max={}",
353404
self.config.max_tool_iterations
@@ -399,6 +450,7 @@ impl Agent {
399450
tool_name: call.name.clone(),
400451
session_id: self.event_session_id().to_string(),
401452
});
453+
log::info!("[agent] executing tool: {}", call.name);
402454
log::info!("[agent_loop] tool start name={}", call.name);
403455

404456
// Special-case `spawn_subagent { mode: "fork", … }`: stash a
@@ -420,27 +472,37 @@ impl Agent {
420472
None
421473
};
422474

423-
let (raw_result, success) =
424-
if let Some(tool) = self.tools.iter().find(|t| t.name() == call.name) {
425-
let exec = tool.execute(call.arguments.clone());
426-
let outcome = if let Some(fork_ctx) = fork_context_for_call {
427-
harness::with_fork_context(fork_ctx, exec).await
428-
} else {
429-
exec.await
430-
};
431-
match outcome {
432-
Ok(r) => {
433-
if !r.is_error {
434-
(r.output(), true)
435-
} else {
436-
(format!("Error: {}", r.output()), false)
437-
}
438-
}
439-
Err(e) => (format!("Error executing {}: {e}", call.name), false),
440-
}
475+
let (raw_result, success) = if !self.visible_tool_names.is_empty()
476+
&& !self.visible_tool_names.contains(&call.name)
477+
{
478+
log::warn!(
479+
"[agent] blocked tool call '{}' — not in visible tool set",
480+
call.name
481+
);
482+
(
483+
format!("Tool '{}' is not available to this agent", call.name),
484+
false,
485+
)
486+
} else if let Some(tool) = self.tools.iter().find(|t| t.name() == call.name) {
487+
let exec = tool.execute(call.arguments.clone());
488+
let outcome = if let Some(fork_ctx) = fork_context_for_call {
489+
harness::with_fork_context(fork_ctx, exec).await
441490
} else {
442-
(format!("Unknown tool: {}", call.name), false)
491+
exec.await
443492
};
493+
match outcome {
494+
Ok(r) => {
495+
if !r.is_error {
496+
(r.output(), true)
497+
} else {
498+
(format!("Error: {}", r.output()), false)
499+
}
500+
}
501+
Err(e) => (format!("Error executing {}: {e}", call.name), false),
502+
}
503+
} else {
504+
(format!("Unknown tool: {}", call.name), false)
505+
};
444506

445507
// Context pipeline stage 1: apply the per-result byte budget
446508
// *inline* before the result enters history. This is the only
@@ -466,6 +528,17 @@ impl Agent {
466528
success,
467529
elapsed_ms,
468530
});
531+
log::info!(
532+
"[agent] tool completed: {} success={} elapsed_ms={}",
533+
call.name,
534+
success,
535+
elapsed_ms
536+
);
537+
log::debug!(
538+
"[agent] tool output for {}: {}",
539+
call.name,
540+
truncate_with_ellipsis(&result, 500)
541+
);
469542
log::info!(
470543
"[agent_loop] tool finish name={} elapsed_ms={} output_chars={} success={}",
471544
call.name,
@@ -527,6 +600,7 @@ impl Agent {
527600
agent_config: self.config.clone(),
528601
identity_config: self.identity_config.clone(),
529602
skills: Arc::new(self.skills.clone()),
603+
memory_context: self.last_memory_context.clone(),
530604
session_id: self.event_session_id().to_string(),
531605
channel: self.event_channel().to_string(),
532606
}
@@ -559,7 +633,7 @@ impl Agent {
559633

560634
harness::ForkContext {
561635
system_prompt: Arc::new(system_prompt),
562-
tool_specs: Arc::clone(&self.tool_specs),
636+
tool_specs: Arc::clone(&self.visible_tool_specs),
563637
message_prefix: Arc::new(messages),
564638
cache_boundary: None,
565639
fork_task_prompt,
@@ -667,6 +741,7 @@ impl Agent {
667741
identity_config: Some(&self.identity_config),
668742
dispatcher_instructions: &instructions,
669743
learned,
744+
visible_tool_names: &self.visible_tool_names,
670745
};
671746
self.prompt_builder.build(&ctx)
672747
}

src/openhuman/agent/agent/types.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,21 @@ use std::sync::Arc;
2323
/// system to maintain context across turns.
2424
pub struct Agent {
2525
pub(super) provider: Arc<dyn Provider>,
26+
/// Full tool registry. Sub-agents pull from this via
27+
/// [`ParentExecutionContext::all_tools`].
2628
pub(super) tools: Arc<Vec<Box<dyn Tool>>>,
29+
/// Full tool specs — sub-agents receive these via
30+
/// [`ParentExecutionContext::all_tool_specs`].
2731
pub(super) tool_specs: Arc<Vec<ToolSpec>>,
32+
/// Tool specs filtered by `visible_tool_names`. These are the specs
33+
/// actually sent to the provider in the main agent's chat requests.
34+
/// When `visible_tool_names` is empty this equals `tool_specs`.
35+
pub(super) visible_tool_specs: Arc<Vec<ToolSpec>>,
36+
/// When non-empty, only these tool names are visible in the main
37+
/// agent's prompt and callable by the main agent. Sub-agents ignore
38+
/// this filter — they apply per-definition whitelists in the runner.
39+
/// Empty = no filter (all tools visible, backward compat).
40+
pub(super) visible_tool_names: std::collections::HashSet<String>,
2841
pub(super) memory: Arc<dyn Memory>,
2942
pub(super) prompt_builder: SystemPromptBuilder,
3043
pub(super) tool_dispatcher: Box<dyn ToolDispatcher>,
@@ -36,6 +49,9 @@ pub struct Agent {
3649
pub(super) identity_config: crate::openhuman::config::IdentityConfig,
3750
pub(super) skills: Vec<crate::openhuman::skills::Skill>,
3851
pub(super) auto_save: bool,
52+
/// Last memory context loaded for the current turn. Stored so it can
53+
/// be forwarded to subagents via `ParentExecutionContext`.
54+
pub(super) last_memory_context: Option<String>,
3955
pub(super) history: Vec<ConversationMessage>,
4056
pub(super) classification_config: crate::openhuman::config::QueryClassificationConfig,
4157
pub(super) available_hints: Vec<String>,
@@ -57,6 +73,8 @@ pub struct Agent {
5773
pub struct AgentBuilder {
5874
pub(super) provider: Option<Arc<dyn Provider>>,
5975
pub(super) tools: Option<Vec<Box<dyn Tool>>>,
76+
/// When set, restricts which tools the main agent sees/calls.
77+
pub(super) visible_tool_names: Option<std::collections::HashSet<String>>,
6078
pub(super) memory: Option<Arc<dyn Memory>>,
6179
pub(super) prompt_builder: Option<SystemPromptBuilder>,
6280
pub(super) tool_dispatcher: Option<Box<dyn ToolDispatcher>>,

src/openhuman/agent/harness/archetypes.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl std::fmt::Display for AgentArchetype {
4545
}
4646

4747
impl AgentArchetype {
48-
/// Model hint passed to `RouterProvider` (prefixed with `"hint:"` at call site).
48+
/// Model hint resolved to `{hint}-v1` at call site (e.g. `"agentic"` → `"agentic-v1"`).
4949
pub fn default_model_hint(&self) -> &'static str {
5050
match self {
5151
Self::Orchestrator => "reasoning",
@@ -54,8 +54,7 @@ impl AgentArchetype {
5454
Self::SkillsAgent => "agentic",
5555
Self::ToolMaker => "coding",
5656
Self::Researcher => "agentic",
57-
Self::Critic => "reasoning",
58-
// Archivist uses the cheapest available model (local preferred).
57+
Self::Critic => "agentic",
5958
Self::Archivist => "local",
6059
}
6160
}

0 commit comments

Comments
 (0)