Skip to content

Commit 337447c

Browse files
vishalveerareddy123vishal veerareddyclaude
authored
fix(agents): make the sub-agent learning subsystem actually work (#80)
* fix(agents): make the sub-agent learning subsystem actually work The Reflector → Skillbook → prompt-injection loop was fully built but dead: the Reflector read `context.transcript` and `context.taskPrompt`, neither of which the context manager ever set. Reflection threw on every non-trivial run and the error was silently swallowed as "Learning failed", so no skills were ever persisted or injected. - context-manager: populate `context.taskPrompt` and an in-memory `context.transcript`, kept in sync in `recordToolCall`. - reflector: fix error-recovery detection (sliced by a ms timestamp instead of the entry's index, always yielding []); make `_inferTaskType` null-safe so reflection can never throw. - executor/coordinator/index: thread the definition loader through so newly learned skills re-inject into agent prompts live (not only after restart); loader injection rebuilds from the original prompt, so it stays idempotent. - loader: add periodic pruning of proven-unhelpful skills on a hardcoded 6h unref'd timer (only when agents are enabled). - add test/agent-learning.test.js covering the full loop and pruning. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(orchestrator): report real model/usage on tool-call responses (#78) toAnthropicResponse() built the response `model` from the client request (falling back to a hardcoded default) and never looked at the provider's actual model, so tool-call responses reported a stale/aliased model. Usage also silently zeroed when the provider used input_tokens/output_tokens naming, and the prompt-cache-hit path returned without _routingMeta so downstream model-name resolution couldn't correct it. - model: prefer openai.model, fall back to requestedModel (mirrors the direct non-tool path `databricksResponse.json.model || requestedModel`). - usage: accept both prompt_tokens/completion_tokens and input_tokens/output_tokens shapes. - attach _routingMeta on the cache-hit return path, matching the live loop. - export toAnthropicResponse and add test/tool-call-response-metadata.test.js. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: vishal veerareddy <vishalveera.reddy@servicenow.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 522b736 commit 337447c

10 files changed

Lines changed: 445 additions & 12 deletions

File tree

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"dev": "nodemon index.js",
1717
"lint": "eslint src index.js",
1818
"test": "npm run test:unit && npm run test:performance",
19-
"test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js test/task-decomposition.test.js test/output-format-guard.test.js test/tier-fallback.test.js test/wrap.test.js test/init.test.js",
19+
"test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js test/task-decomposition.test.js test/output-format-guard.test.js test/tier-fallback.test.js test/wrap.test.js test/init.test.js test/agent-learning.test.js test/tool-call-response-metadata.test.js",
2020
"test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
2121
"test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
2222
"test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",

src/agents/context-manager.js

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ class ContextManager {
6262
allowedTools: agentDef.allowedTools,
6363
startTime: Date.now(),
6464

65+
// Original task prompt — consumed by the Reflector to infer task type.
66+
taskPrompt,
67+
68+
// In-memory transcript mirror of the JSONL file — consumed by the
69+
// Reflector for tool-usage / error-recovery analysis. The JSONL file
70+
// remains the durable record; this array is the hot-path copy.
71+
transcript: [],
72+
6573
// Token tracking
6674
inputTokens: 0,
6775
outputTokens: 0,
@@ -102,7 +110,7 @@ class ContextManager {
102110
* Record tool execution in transcript
103111
*/
104112
recordToolCall(context, toolName, input, output, error = null) {
105-
this.writeTranscriptEntry(context.transcriptPath, {
113+
const entry = {
106114
type: "tool_call",
107115
agentId: context.agentId,
108116
step: context.steps,
@@ -111,7 +119,15 @@ class ContextManager {
111119
output: error ? null : output,
112120
error: error ? error.message : null,
113121
timestamp: Date.now()
114-
});
122+
};
123+
124+
// Keep the in-memory transcript in sync so the Reflector (which reads
125+
// context.transcript) sees tool calls without re-parsing the JSONL file.
126+
if (Array.isArray(context.transcript)) {
127+
context.transcript.push(entry);
128+
}
129+
130+
this.writeTranscriptEntry(context.transcriptPath, entry);
115131
}
116132

117133
/**

src/agents/definitions/loader.js

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@ const yaml = require("js-yaml");
44
const logger = require("../../logger");
55
const Skillbook = require("../skillbook");
66

7+
// How often to prune low-quality learned skills. 6 hours is frequent enough to
8+
// keep skillbooks tidy on a long-running process but rare enough to be
9+
// negligible overhead. Hardcoded on purpose — not worth a config knob.
10+
const SKILL_PRUNE_INTERVAL_MS = 6 * 60 * 60 * 1000;
11+
712
class AgentDefinitionLoader {
813
constructor() {
914
this.agents = new Map();
1015
this.skillbooks = new Map(); // agentType → Skillbook
1116
this.initialized = false;
17+
this.pruneTimer = null;
1218

1319
// Initialize synchronously for compatibility
1420
this.loadBuiltInAgentsSync();
@@ -82,13 +88,97 @@ class AgentDefinitionLoader {
8288
return this.skillbooks.get(agentType);
8389
}
8490

91+
/**
92+
* Replace an agent's skillbook with a freshly-learned instance and re-inject
93+
* its skills into the (in-memory) system prompt. Called by the executor after
94+
* a run persists new skills, so learning takes effect within the running
95+
* process instead of only after a restart. Injection rebuilds from
96+
* originalSystemPrompt, so repeated calls do not duplicate the skills block.
97+
*/
98+
setSkillbook(agentType, skillbook) {
99+
if (!agentType || !skillbook) return;
100+
// Match the case-insensitive key an agent is actually stored under.
101+
const key = this.agents.has(agentType)
102+
? agentType
103+
: Array.from(this.agents.keys()).find(
104+
k => k.toLowerCase() === String(agentType).toLowerCase()
105+
);
106+
if (!key) return;
107+
108+
this.skillbooks.set(key, skillbook);
109+
this._injectSkillsIntoPrompt(key);
110+
}
111+
85112
/**
86113
* Reload skillbooks and update prompts (call after learning)
87114
*/
88115
async reloadSkillbooks() {
89116
await this._loadSkillbooksAsync();
90117
}
91118

119+
/**
120+
* Prune low-quality skills from every loaded skillbook. Skillbook.prune()
121+
* only drops skills that have been tried enough times to prove they don't
122+
* help (default: useCount >= 3 && confidence < 0.2), so fresh skills are
123+
* never removed. Persists and re-injects only the skillbooks that changed.
124+
* @returns {Promise<number>} total skills pruned across all agents
125+
*/
126+
async pruneSkillbooks() {
127+
let totalPruned = 0;
128+
129+
for (const [agentType, skillbook] of this.skillbooks.entries()) {
130+
try {
131+
const pruned = skillbook.prune();
132+
if (pruned > 0) {
133+
await skillbook.save();
134+
this._injectSkillsIntoPrompt(agentType);
135+
totalPruned += pruned;
136+
}
137+
} catch (error) {
138+
logger.warn(
139+
{ agentType, error: error.message },
140+
"Failed to prune skillbook"
141+
);
142+
}
143+
}
144+
145+
if (totalPruned > 0) {
146+
logger.info({ totalPruned }, "Pruned low-quality agent skills");
147+
}
148+
return totalPruned;
149+
}
150+
151+
/**
152+
* Start periodic skillbook pruning. Idempotent; a non-positive interval
153+
* disables pruning. The timer is unref'd so it never keeps the process alive.
154+
*/
155+
startSkillPruning(intervalMs = SKILL_PRUNE_INTERVAL_MS) {
156+
if (this.pruneTimer) return; // already running
157+
if (!Number.isInteger(intervalMs) || intervalMs <= 0) {
158+
logger.debug({ intervalMs }, "Skillbook pruning disabled");
159+
return;
160+
}
161+
162+
this.pruneTimer = setInterval(() => {
163+
this.pruneSkillbooks().catch((error) => {
164+
logger.warn({ error: error.message }, "Skillbook pruning failed");
165+
});
166+
}, intervalMs);
167+
this.pruneTimer.unref();
168+
169+
logger.info({ intervalMs }, "Skillbook pruning started");
170+
}
171+
172+
/**
173+
* Stop periodic skillbook pruning (for shutdown / tests).
174+
*/
175+
stopSkillPruning() {
176+
if (this.pruneTimer) {
177+
clearInterval(this.pruneTimer);
178+
this.pruneTimer = null;
179+
}
180+
}
181+
92182
/**
93183
* Load built-in agents (Explore, Plan, General)
94184
*/

src/agents/executor.js

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@ const Reflector = require("./reflector");
99
const contextManager = new ContextManager();
1010

1111
class SubagentExecutor {
12+
/**
13+
* @param {Object} [options]
14+
* @param {Object} [options.definitionLoader] - Agent definition loader. When
15+
* provided, newly-learned skills are re-injected into agent prompts live
16+
* (within the running process). Optional so the executor still works
17+
* standalone (e.g. in tests); without it, learning still persists to disk
18+
* and is picked up on the next process start.
19+
*/
20+
constructor({ definitionLoader = null } = {}) {
21+
this.definitionLoader = definitionLoader;
22+
}
23+
1224
/**
1325
* Execute a single subagent
1426
* @param {Object} agentDef - Agent definition
@@ -403,8 +415,12 @@ class SubagentExecutor {
403415
return; // Nothing to learn
404416
}
405417

406-
// Load skillbook for this agent type
407-
const skillbook = await Skillbook.load(context.agentName);
418+
// Prefer the loader's live skillbook instance (so use-counts/confidence
419+
// accumulate in one place); fall back to loading from disk when running
420+
// standalone.
421+
const skillbook =
422+
this.definitionLoader?.getSkillbook(context.agentName) ||
423+
(await Skillbook.load(context.agentName));
408424

409425
// Add each learned pattern
410426
for (const pattern of patterns) {
@@ -414,6 +430,12 @@ class SubagentExecutor {
414430
// Save skillbook (persists learning)
415431
await skillbook.save();
416432

433+
// Re-inject into the agent's prompt so the next run benefits without a
434+
// restart. No-op when no loader was provided.
435+
if (this.definitionLoader) {
436+
this.definitionLoader.setSkillbook(context.agentName, skillbook);
437+
}
438+
417439
logger.info({
418440
agentType: context.agentName,
419441
patternsLearned: patterns.length,

src/agents/index.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,15 @@ const ParallelCoordinator = require("./parallel-coordinator");
55
const agentStore = require("./store");
66

77
const definitionLoader = new AgentDefinitionLoader();
8-
const coordinator = new ParallelCoordinator(config.agents?.maxConcurrent || 10);
8+
const coordinator = new ParallelCoordinator(config.agents?.maxConcurrent || 10, {
9+
definitionLoader,
10+
});
11+
12+
// Periodically drop learned skills that have proven unhelpful. Only when the
13+
// agents subsystem is active; the timer is unref'd so it never blocks exit.
14+
if (config.agents?.enabled) {
15+
definitionLoader.startSkillPruning();
16+
}
917

1018
/**
1119
* Spawn and execute subagent(s)

src/agents/parallel-coordinator.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ const logger = require("../logger");
22
const SubagentExecutor = require("./executor");
33

44
class ParallelCoordinator {
5-
constructor(maxConcurrent = 10) {
5+
constructor(maxConcurrent = 10, { definitionLoader = null } = {}) {
66
this.maxConcurrent = maxConcurrent;
7-
this.executor = new SubagentExecutor();
7+
this.executor = new SubagentExecutor({ definitionLoader });
88
}
99

1010
/**

src/agents/reflector.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,13 @@ class Reflector {
184184
// Pattern 1: Recovered from errors
185185
if (successful) {
186186
const failedTools = errorEntries.map(e => e.toolName);
187+
// Tools invoked *after* the last error entry are the recovery path.
188+
// Use the entry's position in the transcript, not its timestamp
189+
// (slicing by a ~1.7e12 timestamp always yielded []).
190+
const lastErrorEntry = errorEntries[errorEntries.length - 1];
191+
const lastErrorIndex = transcript.indexOf(lastErrorEntry);
187192
const recoveryTools = transcript
188-
.slice(errorEntries[errorEntries.length - 1].timestamp)
193+
.slice(lastErrorIndex + 1)
189194
.filter(e => e.type === "tool_call" && !e.error)
190195
.map(e => e.toolName);
191196

@@ -248,6 +253,11 @@ class Reflector {
248253
* Infer task type from prompt
249254
*/
250255
static _inferTaskType(prompt) {
256+
// Guard against a missing/non-string prompt so reflection never throws
257+
// (a throw here previously aborted all learning silently).
258+
if (typeof prompt !== "string" || prompt.length === 0) {
259+
return null;
260+
}
251261
const lower = prompt.toLowerCase();
252262

253263
const taskTypes = [

src/orchestrator/index.js

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,7 +1111,13 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
11111111
id: openai.id ?? `msg_${Date.now()}`,
11121112
type: "message",
11131113
role: "assistant",
1114-
model: requestedModel,
1114+
// Prefer the model the provider actually served with; fall back to the
1115+
// requested model only when the provider omits it. Mirrors the direct
1116+
// (non-tool) path at `databricksResponse.json.model || requestedModel`, so
1117+
// tool-call responses no longer report a stale/aliased client-request model.
1118+
model: (typeof openai?.model === "string" && openai.model.trim())
1119+
? openai.model
1120+
: requestedModel,
11151121
content: contentItems,
11161122
stop_reason:
11171123
choice?.finish_reason === "stop"
@@ -1123,8 +1129,11 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
11231129
: choice?.finish_reason ?? "end_turn",
11241130
stop_sequence: null,
11251131
usage: {
1126-
input_tokens: usage.prompt_tokens ?? 0,
1127-
output_tokens: usage.completion_tokens ?? 0,
1132+
// Accept both OpenAI (prompt_tokens/completion_tokens) and
1133+
// already-Anthropic (input_tokens/output_tokens) usage shapes so token
1134+
// counts survive regardless of which provider/converter produced them.
1135+
input_tokens: usage.prompt_tokens ?? usage.input_tokens ?? 0,
1136+
output_tokens: usage.completion_tokens ?? usage.output_tokens ?? 0,
11281137
cache_creation_input_tokens: 0,
11291138
cache_read_input_tokens: 0,
11301139
},
@@ -4053,6 +4062,16 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
40534062
anthropicPayload.usage.cache_read_input_tokens = promptTokens;
40544063
anthropicPayload.usage.cache_creation_input_tokens = 0;
40554064

4065+
// Carry routing metadata on the cache-hit path too, so downstream model
4066+
// name resolution (OpenClaw) behaves the same as the live loop path.
4067+
if (cachedResponse.routingDecision) {
4068+
anthropicPayload._routingMeta = {
4069+
provider: cachedResponse.routingDecision.provider,
4070+
model: cachedResponse.routingDecision.model,
4071+
tier: cachedResponse.routingDecision.tier,
4072+
};
4073+
}
4074+
40564075
appendTurnToSession(session, {
40574076
role: "assistant",
40584077
type: "message",
@@ -4147,4 +4166,6 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
41474166

41484167
module.exports = {
41494168
processMessage,
4169+
// Exported for unit testing of response-metadata conversion.
4170+
toAnthropicResponse,
41504171
};

0 commit comments

Comments
 (0)