Skip to content

Commit 3afffa5

Browse files
Copilothuberp
andauthored
fix(C-2): inject original request into step prompts; require concrete values in planner descriptions (#118)
* Initial plan * fix(C-2): inject original request into step prompts and strengthen planner instructions Agent-Logs-Url: https://github.com/huberp/agentloop/sessions/c6821f70-b870-4117-a6d4-06d33377c44f Co-authored-by: huberp <4027454+huberp@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: huberp <4027454+huberp@users.noreply.github.com>
1 parent 4d04a2f commit 3afffa5

3 files changed

Lines changed: 134 additions & 1 deletion

File tree

src/__tests__/langgraph.test.ts

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@ import {
2222
isDeadlocked,
2323
} from "../langgraph/scheduler";
2424
import { buildGraphNodes, invokeGraph } from "../langgraph/graph";
25+
import { runPlannedStep } from "../langgraph/step-runner";
2526
import type {
2627
BlocksPlan,
2728
CompiledPlan,
29+
CompiledPlanNode,
2830
NodeRecord,
2931
GraphState,
3032
GraphEvent,
@@ -728,3 +730,129 @@ describe("finalize node", () => {
728730
expect(result.output).toContain("Something broke");
729731
});
730732
});
733+
734+
// ─────────────────────────────────────────────────────────────────────────────
735+
// (11) runPlannedStep — originalRequest grounding
736+
// ─────────────────────────────────────────────────────────────────────────────
737+
738+
describe("runPlannedStep — original request grounding", () => {
739+
/** Minimal CompiledPlanNode for unit tests. */
740+
function makeNode(overrides: Partial<CompiledPlanNode> = {}): CompiledPlanNode {
741+
return {
742+
id: "s1",
743+
description: "Clone the repository locally",
744+
dependsOn: [],
745+
toolsNeeded: [],
746+
estimatedComplexity: "low",
747+
resources: [],
748+
...overrides,
749+
};
750+
}
751+
752+
it("includes originalRequest in the step system prompt when provided", async () => {
753+
let capturedSystemPrompt = "";
754+
755+
const invoke = jest.fn().mockImplementation(() => {
756+
return Promise.resolve({ content: "done", tool_calls: [] });
757+
});
758+
const bindTools = jest.fn().mockImplementation((_tools: unknown, opts?: { tool_choice?: string }) => {
759+
// Capture the system prompt from whatever call is made
760+
return {
761+
invoke: jest.fn().mockImplementation((messages: unknown[]) => {
762+
if (Array.isArray(messages)) {
763+
const systemMsg = (messages as Array<{ _getType?: () => string; content?: string }>)
764+
.find((m) => m._getType?.() === "system");
765+
if (systemMsg?.content) capturedSystemPrompt = systemMsg.content as string;
766+
}
767+
return Promise.resolve({ content: "done", tool_calls: [] });
768+
}),
769+
};
770+
});
771+
772+
const llm = { invoke, bindTools } as unknown as BaseChatModel;
773+
const registry = new ToolRegistry();
774+
const node = makeNode();
775+
776+
await runPlannedStep(node, {
777+
registry,
778+
llm,
779+
originalRequest: "add Anthropic models to github repo huberp/agentloop",
780+
});
781+
782+
// The system prompt passed to runSubagent must contain the original request
783+
expect(capturedSystemPrompt).toContain("add Anthropic models to github repo huberp/agentloop");
784+
expect(capturedSystemPrompt).toContain("Original user request (for context):");
785+
});
786+
787+
it("omits the original-request line when originalRequest is not provided", async () => {
788+
let capturedSystemPrompt = "";
789+
790+
const bindTools = jest.fn().mockImplementation(() => ({
791+
invoke: jest.fn().mockImplementation((messages: unknown[]) => {
792+
if (Array.isArray(messages)) {
793+
const systemMsg = (messages as Array<{ _getType?: () => string; content?: string }>)
794+
.find((m) => m._getType?.() === "system");
795+
if (systemMsg?.content) capturedSystemPrompt = systemMsg.content as string;
796+
}
797+
return Promise.resolve({ content: "done", tool_calls: [] });
798+
}),
799+
}));
800+
801+
const llm = {
802+
invoke: jest.fn().mockResolvedValue({ content: "done", tool_calls: [] }),
803+
bindTools,
804+
} as unknown as BaseChatModel;
805+
const registry = new ToolRegistry();
806+
const node = makeNode();
807+
808+
await runPlannedStep(node, { registry, llm });
809+
810+
expect(capturedSystemPrompt).not.toContain("Original user request (for context):");
811+
});
812+
813+
it("propagates request from state.request via invokeGraph", async () => {
814+
const capturedSystemPrompts: string[] = [];
815+
816+
const planJson = JSON.stringify({
817+
version: "2.0",
818+
goal: "clone test",
819+
blocks: [
820+
{ type: "step", description: "Clone the forked repository locally to the workspace", toolsNeeded: [], estimatedComplexity: "low" },
821+
],
822+
});
823+
const workspaceCtxJson = JSON.stringify({
824+
workspaceInfo: { language: "node", framework: "none", packageManager: "npm", hasTests: true, testCommand: "", lintCommand: "", buildCommand: "", entryPoints: [], gitInitialized: true },
825+
});
826+
827+
// Capture all system prompts seen during execution
828+
let callCount = 0;
829+
const invoke = jest.fn().mockImplementation((messages: unknown[]) => {
830+
callCount++;
831+
if (Array.isArray(messages)) {
832+
const systemMsg = (messages as Array<{ _getType?: () => string; content?: string }>)
833+
.find((m) => m._getType?.() === "system");
834+
if (systemMsg?.content) capturedSystemPrompts.push(systemMsg.content as string);
835+
}
836+
if (callCount === 1) return Promise.resolve({ content: workspaceCtxJson, tool_calls: [] });
837+
if (callCount === 2) return Promise.resolve({ content: planJson, tool_calls: [] });
838+
return Promise.resolve({ content: "cloned successfully", tool_calls: [] });
839+
});
840+
const llm = {
841+
invoke,
842+
bindTools: jest.fn().mockImplementation(() => ({ invoke })),
843+
} as unknown as BaseChatModel;
844+
845+
const registry = new ToolRegistry();
846+
await invokeGraph(
847+
"add Anthropic models to github repo huberp/agentloop",
848+
{ registry, llm },
849+
);
850+
851+
// At least one system prompt (from the step subagent) must contain the original request
852+
const stepPrompts = capturedSystemPrompts.filter((p) =>
853+
p.includes("executing one step of a larger plan"),
854+
);
855+
expect(stepPrompts.length).toBeGreaterThan(0);
856+
expect(stepPrompts[0]).toContain("add Anthropic models to github repo huberp/agentloop");
857+
}, 30000);
858+
});

src/langgraph/graph.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ const BLOCKS_PLANNER_SYSTEM =
116116
`- Set join to "any" when only the first successful branch matters.\n` +
117117
`- Mark resources: ["network"] for steps using web search/fetch.\n` +
118118
`- Mark resources: ["file:WRITE:<path>"] for steps writing to a specific file.\n` +
119-
`- Produce at least one block.`;
119+
`- Produce at least one block.\n` +
120+
`- Include all concrete values (URLs, repository names, file paths, version numbers, package names) needed to execute the step directly in the step description itself.`;
120121

121122
// ─────────────────────────────────────────────────────────────────────────────
122123
// Dependencies injected when building the graph
@@ -288,6 +289,7 @@ export function buildGraphNodes(deps: GraphDeps, progressCb?: (evt: GraphEvent)
288289
llm: deps.llm,
289290
profileRegistry: deps.profileRegistry,
290291
sharedContext: state.sharedContext,
292+
originalRequest: state.request,
291293
});
292294
return { nodeId, ...result };
293295
}),

src/langgraph/step-runner.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ export interface StepRunnerDeps {
4545
profileRegistry?: AgentProfileRegistry;
4646
/** Shared context from the graph state (conversation history, prior step outputs). */
4747
sharedContext?: Record<string, unknown>;
48+
/** The original user request; injected into every step prompt to prevent hallucination. */
49+
originalRequest?: string;
4850
}
4951

5052
/**
@@ -102,6 +104,7 @@ export async function runPlannedStep(
102104

103105
const stepSystemPrompt =
104106
`You are an AI agent executing one step of a larger plan.\n` +
107+
(deps.originalRequest ? `Original user request (for context): ${deps.originalRequest}\n` : ``) +
105108
`Step: ${node.description}\n` +
106109
`${toolList}\n` +
107110
`Instructions:\n` +

0 commit comments

Comments
 (0)