@@ -22,7 +22,7 @@ import {
2222 isDeadlocked ,
2323} from "../langgraph/scheduler" ;
2424import { buildGraphNodes , invokeGraph } from "../langgraph/graph" ;
25- import { runPlannedStep } from "../langgraph/step-runner" ;
25+ import { runPlannedStep , STEP_FAILED_MARKERS } from "../langgraph/step-runner" ;
2626import type {
2727 BlocksPlan ,
2828 CompiledPlan ,
@@ -856,3 +856,81 @@ describe("runPlannedStep — original request grounding", () => {
856856 expect ( stepPrompts [ 0 ] ) . toContain ( "add Anthropic models to github repo huberp/agentloop" ) ;
857857 } , 30000 ) ;
858858} ) ;
859+
860+ // ─────────────────────────────────────────────────────────────────────────────
861+ // (12) runPlannedStep — semantic step failure detection
862+ // ─────────────────────────────────────────────────────────────────────────────
863+
864+ describe ( "runPlannedStep — semantic failure detection" , ( ) => {
865+ function makeNode ( overrides : Partial < CompiledPlanNode > = { } ) : CompiledPlanNode {
866+ return {
867+ id : "s1" ,
868+ description : "Fork the huberp/agentloop repository" ,
869+ dependsOn : [ ] ,
870+ toolsNeeded : [ ] ,
871+ estimatedComplexity : "low" ,
872+ resources : [ ] ,
873+ ...overrides ,
874+ } ;
875+ }
876+
877+ function makeLlmWithOutput ( output : string ) {
878+ const invoke = jest . fn ( ) . mockResolvedValue ( { content : output , tool_calls : [ ] } ) ;
879+ return {
880+ invoke,
881+ bindTools : jest . fn ( ) . mockReturnValue ( { invoke } ) ,
882+ } as unknown as BaseChatModel ;
883+ }
884+
885+ it ( "returns status=failed when output contains 'I cannot'" , async ( ) => {
886+ const llm = makeLlmWithOutput (
887+ "I cannot directly fork a repository or perform GitHub actions like forking. " +
888+ "However, you can manually fork the repository by following these steps." ,
889+ ) ;
890+ const registry = new ToolRegistry ( ) ;
891+ const result = await runPlannedStep ( makeNode ( ) , { registry, llm } ) ;
892+
893+ expect ( result . status ) . toBe ( "failed" ) ;
894+ expect ( result . error ) . toContain ( "I cannot" ) ;
895+ expect ( result . output ) . toContain ( "I cannot" ) ;
896+ } ) ;
897+
898+ it ( "returns status=failed when output contains 'I am unable'" , async ( ) => {
899+ const llm = makeLlmWithOutput ( "I am unable to perform this action directly." ) ;
900+ const registry = new ToolRegistry ( ) ;
901+ const result = await runPlannedStep ( makeNode ( ) , { registry, llm } ) ;
902+
903+ expect ( result . status ) . toBe ( "failed" ) ;
904+ expect ( result . error ) . toContain ( "I am unable" ) ;
905+ } ) ;
906+
907+ it ( "returns status=failed when output contains 'cannot perform'" , async ( ) => {
908+ const llm = makeLlmWithOutput ( "This agent cannot perform external API calls." ) ;
909+ const registry = new ToolRegistry ( ) ;
910+ const result = await runPlannedStep ( makeNode ( ) , { registry, llm } ) ;
911+
912+ expect ( result . status ) . toBe ( "failed" ) ;
913+ } ) ;
914+
915+ it ( "returns status=failed case-insensitively (e.g. 'I CANNOT')" , async ( ) => {
916+ const llm = makeLlmWithOutput ( "I CANNOT access external services." ) ;
917+ const registry = new ToolRegistry ( ) ;
918+ const result = await runPlannedStep ( makeNode ( ) , { registry, llm } ) ;
919+
920+ expect ( result . status ) . toBe ( "failed" ) ;
921+ } ) ;
922+
923+ it ( "returns status=success when output does not contain any failure marker" , async ( ) => {
924+ const llm = makeLlmWithOutput ( "Repository cloned successfully." ) ;
925+ const registry = new ToolRegistry ( ) ;
926+ const result = await runPlannedStep ( makeNode ( ) , { registry, llm } ) ;
927+
928+ expect ( result . status ) . toBe ( "success" ) ;
929+ expect ( result . output ) . toBe ( "Repository cloned successfully." ) ;
930+ } ) ;
931+
932+ it ( "exports STEP_FAILED_MARKERS as a non-empty array" , ( ) => {
933+ expect ( Array . isArray ( STEP_FAILED_MARKERS ) ) . toBe ( true ) ;
934+ expect ( STEP_FAILED_MARKERS . length ) . toBeGreaterThan ( 0 ) ;
935+ } ) ;
936+ } ) ;
0 commit comments