fix(workflow): reject incoherent ResultGenerator output across all steps

Prachig-Microsoft · Copilot · Prachig-Microsoft · commit 4de1e284845e · 2026-06-13T00:45:04.000+05:30
Production deployment of the agent-framework 1.3.0 upgrade surfaced a
crash chain: Analysis "succeeded" with a self-contradictory result
(result=True, is_hard_terminated=False, output=None), Design then
crashed at `task_param.output.process_id`. The root cause is the
ResultGenerator returning an empty shell when participants never
produced useful content.

Fixes:

* groupchat_orchestrator.run_stream now validates ResultGenerator output
  before constructing OrchestrationResult. If the result is not hard
  terminated but carries no `output` / `termination_output` payload, the
  orchestrator now reports success=False with a descriptive error. This
  is generic across all four step models (Analysis uses `output`;
  Design/Convert/Documentation use `termination_output`).
* All four step executors gained a defense-in-depth guard that raises a
  clear `&lt;Step&gt;Executor failed: produced no &lt;X&gt;Output. Reason: ...`
  exception when the same incoherent shape is observed. This stops the
  broken value at the boundary instead of propagating it downstream.
* groupchat_orchestrator silent `except Exception: pass` around
  Coordinator JSON parsing replaced with `logger.debug(... exc_info=...)`
  so loop-detection failures become visible during debugging instead of
  being swallowed.

Tests:

* Updated each executor's existing soft-completion test to provide a
  valid output (previous setup encoded the broken shape we now reject).
* Added a new guard test per executor asserting the new exception fires
  for the incoherent (success=True + output=None + not hard-terminated)
  shape.
* Full unit suite: 829 passed (was 825; +4 new guard tests).

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/src/processor/src/libs/agent_framework/groupchat_orchestrator.py b/src/processor/src/libs/agent_framework/groupchat_orchestrator.py
@@ -643,17 +643,48 @@ async def run_stream(
                     f"[RESULT] Skipping result generation - result_format: {result_format}, agent exists: {result_generator_name in self.agents}"
                 )
 
+            # Validate that ResultGenerator produced a coherent output. The LLM can
+            # sometimes return is_hard_terminated=False with output=None ("success
+            # but no actual output"), which causes downstream steps to crash with
+            # NoneType errors. Treat such self-contradictory results as failures so
+            # the workflow surfaces a clear error rather than propagating an empty
+            # shell to the next step.
+            generated_error: str | None = None
+            if final_analysis is not None and not bool(
+                getattr(final_analysis, "is_hard_terminated", False)
+            ):
+                # Step result models use either ``output`` (Analysis) or
+                # ``termination_output`` (Design, Convert, Documentation). Treat
+                # both equivalently: if neither holds a non-None payload, the
+                # ResultGenerator returned an incoherent shell.
+                has_output_attr = hasattr(final_analysis, "output") or hasattr(
+                    final_analysis, "termination_output"
+                )
+                payload = getattr(final_analysis, "output", None) or getattr(
+                    final_analysis, "termination_output", None
+                )
+                if has_output_attr and payload is None:
+                    reason = (
+                        getattr(final_analysis, "reason", "") or "<no reason given>"
+                    )
+                    generated_error = (
+                        "ResultGenerator produced incoherent output: "
+                        "is_hard_terminated=False but output=None. "
+                        f"Reason from result: {reason}"
+                    )
+                    logger.error("[RESULT] %s", generated_error)
+
             # Calculate execution time
             execution_time = (datetime.now() - start_time).total_seconds()
 
             # Build result
             result = OrchestrationResult[TOutput](
-                success=True,
+                success=generated_error is None,
                 conversation=conversation,
                 agent_responses=self.agent_responses,
                 tool_usage=self.agent_tool_usage,
                 result=final_analysis,
-                error=None,
+                error=generated_error,
                 execution_time_seconds=execution_time,
             )
 
@@ -1154,9 +1185,23 @@ async def _complete_agent_response(
                 ):
                     # Record invocation time for non-termination coordinator selections
                     self._agent_invoked_at[selected] = completed_at
-            except Exception:
-                # If the Coordinator didn't emit valid JSON, ignore.
-                pass
+            except Exception as exc:
+                # If the Coordinator didn't emit valid JSON we silently drop
+                # loop-detection and termination handling for this turn. Log at
+                # debug so the silence is visible if loop detection ever appears
+                # to misfire (previously this was a bare ``pass`` which made the
+                # failure invisible).
+                preview = (
+                    complete_message[:200]
+                    if isinstance(complete_message, str)
+                    else str(type(complete_message))
+                )
+                logger.debug(
+                    "Coordinator JSON parse failed; skipping loop detection for "
+                    "this turn. Raw message preview: %r",
+                    preview,
+                    exc_info=exc,
+                )
 
         # Invoke callback with complete response
         if callback:
diff --git a/src/processor/src/steps/analysis/workflow/analysis_executor.py b/src/processor/src/steps/analysis/workflow/analysis_executor.py
@@ -65,6 +65,13 @@ async def handle_execute(
             error_msg = result.error or "Analysis orchestration failed with no output"
             raise Exception(f"AnalysisExecutor failed: {error_msg}")
 
+        if not result.result.is_hard_terminated and result.result.output is None:
+            reason = result.result.reason or "<no reason given>"
+            raise Exception(
+                "AnalysisExecutor failed: orchestration reported success but produced "
+                f"no AnalysisOutput. Reason: {reason}"
+            )
+
         if result.result:
             if not result.result.is_hard_terminated:
                 await ctx.send_message(result.result)
diff --git a/src/processor/src/steps/convert/workflow/yaml_convert_executor.py b/src/processor/src/steps/convert/workflow/yaml_convert_executor.py
@@ -45,6 +45,13 @@ async def handle_execute(
             )
             raise Exception(f"YamlConvertExecutor failed: {error_msg}")
 
+        if not result.result.is_hard_terminated and result.result.termination_output is None:
+            reason = result.result.reason or "<no reason given>"
+            raise Exception(
+                "YamlConvertExecutor failed: orchestration reported success but "
+                f"produced no YAML conversion output. Reason: {reason}"
+            )
+
         if result.result:
             if not result.result.is_hard_terminated:
                 await ctx.send_message(result.result)
diff --git a/src/processor/src/steps/design/workflow/design_executor.py b/src/processor/src/steps/design/workflow/design_executor.py
@@ -42,6 +42,13 @@ async def handle_execute(
             error_msg = result.error or "Design orchestration failed with no output"
             raise Exception(f"DesignExecutor failed: {error_msg}")
 
+        if not result.result.is_hard_terminated and result.result.termination_output is None:
+            reason = result.result.reason or "<no reason given>"
+            raise Exception(
+                "DesignExecutor failed: orchestration reported success but produced "
+                f"no DesignOutput. Reason: {reason}"
+            )
+
         if result.result:
             if not result.result.is_hard_terminated:
                 await ctx.send_message(result.result)
diff --git a/src/processor/src/steps/documentation/workflow/documentation_executor.py b/src/processor/src/steps/documentation/workflow/documentation_executor.py
@@ -47,4 +47,11 @@ async def handle_execute(
             )
             raise Exception(f"DocumentationExecutor failed: {error_msg}")
 
+        if not result.result.is_hard_terminated and result.result.termination_output is None:
+            reason = result.result.reason or "<no reason given>"
+            raise Exception(
+                "DocumentationExecutor failed: orchestration reported success but "
+                f"produced no DocumentationOutput. Reason: {reason}"
+            )
+
         await ctx.yield_output(result.result)
diff --git a/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py b/src/processor/src/tests/unit/steps/analysis/test_analysis_executor.py
@@ -6,11 +6,47 @@
 import asyncio
 
 from libs.agent_framework.groupchat_orchestrator import OrchestrationResult
-from steps.analysis.models.step_output import Analysis_BooleanExtendedResult
+from steps.analysis.models.step_output import (
+    AnalysisOutput,
+    Analysis_BooleanExtendedResult,
+    ComplexityAnalysis,
+    FileType,
+    MigrationReadiness,
+)
 from steps.analysis.models.step_param import Analysis_TaskParam
 from steps.analysis.workflow.analysis_executor import AnalysisExecutor
 
 
+def _make_analysis_output(process_id: str) -> AnalysisOutput:
+    return AnalysisOutput(
+        process_id=process_id,
+        platform_detected="EKS",
+        confidence_score="95%",
+        files_discovered=[
+            FileType(
+                filename="app.yaml",
+                type="Deployment",
+                complexity="Low",
+                azure_mapping="AKS Deployment",
+            )
+        ],
+        complexity_analysis=ComplexityAnalysis(
+            network_complexity="Low",
+            security_complexity="Low",
+            storage_complexity="Low",
+            compute_complexity="Low",
+        ),
+        migration_readiness=MigrationReadiness(
+            overall_score="A",
+            concerns=[],
+            recommendations=[],
+        ),
+        summary="ok",
+        expert_insights=[],
+        analysis_file="analysis.md",
+    )
+
+
 class _FakeTelemetry:
     def __init__(self):
         self.transitions: list[tuple[str, str, str]] = []
@@ -59,6 +95,7 @@ async def execute(self, task_param=None):
                         result=True,
                         is_hard_terminated=False,
                         process_id=task_param.process_id,
+                        output=_make_analysis_output(task_param.process_id),
                     ),
                 )
 
@@ -143,3 +180,63 @@ async def execute(self, task_param=None):
         assert isinstance(ctx.yielded[0], Analysis_BooleanExtendedResult)
 
     asyncio.run(_run())
+
+
+def test_analysis_executor_raises_when_soft_completion_has_no_output(monkeypatch):
+    """Soft completion with output=None is incoherent: AnalysisExecutor must raise.
+
+    This guards against ResultGenerator returning a self-contradictory shell
+    (success=True, is_hard_terminated=False, output=None) which would otherwise
+    propagate to Design and crash there with `NoneType.process_id`.
+    """
+    async def _run():
+        import pytest
+
+        telemetry = _FakeTelemetry()
+        app_context = _FakeAppContext(telemetry)
+        ctx = _FakeCtx()
+
+        class _FakeOrchestrator:
+            def __init__(self, _app_context):
+                pass
+
+            async def execute(self, task_param=None):
+                return OrchestrationResult(
+                    success=True,
+                    conversation=[],
+                    agent_responses=[],
+                    tool_usage={},
+                    result=Analysis_BooleanExtendedResult(
+                        result=True,
+                        is_hard_terminated=False,
+                        process_id=task_param.process_id,
+                        reason="agents never produced output",
+                    ),
+                )
+
+        monkeypatch.setattr(
+            "steps.analysis.workflow.analysis_executor.text2art",
+            lambda _s: "ART",
+            raising=False,
+        )
+        monkeypatch.setattr(
+            "steps.analysis.workflow.analysis_executor.AnalysisOrchestrator",
+            _FakeOrchestrator,
+        )
+
+        executor = AnalysisExecutor(id="analysis", app_context=app_context)
+        message = Analysis_TaskParam(
+            process_id="p1",
+            container_name="c1",
+            source_file_folder="p1/source",
+            workspace_file_folder="p1/workspace",
+            output_file_folder="p1/output",
+        )
+
+        with pytest.raises(Exception, match="produced no AnalysisOutput"):
+            await executor.handle_execute(message, ctx)  # type: ignore[arg-type]
+
+        assert len(ctx.sent) == 0
+        assert len(ctx.yielded) == 0
+
+    asyncio.run(_run())
diff --git a/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py b/src/processor/src/tests/unit/steps/convert/test_yaml_convert_executor.py
@@ -6,11 +6,63 @@
 import asyncio
 
 from libs.agent_framework.groupchat_orchestrator import OrchestrationResult
-from steps.convert.models.step_output import Yaml_ExtendedBooleanResult
+from steps.convert.models.step_output import (
+    ConvertedFile,
+    ConversionMetrics,
+    ConversionQuality,
+    DimensionalAnalysis,
+    MultiDimensionalAnalysis,
+    YamlOutput,
+    Yaml_ExtendedBooleanResult,
+)
 from steps.convert.workflow.yaml_convert_executor import YamlConvertExecutor
 from steps.design.models.step_output import Design_ExtendedBooleanResult
 
 
+def _make_yaml_output() -> YamlOutput:
+    dim = DimensionalAnalysis(
+        complexity="Low",
+        converted_components=["pod"],
+        azure_optimizations="none",
+        concerns=[],
+        success_rate="100%",
+    )
+    return YamlOutput(
+        converted_files=[
+            ConvertedFile(
+                source_file="a.yaml",
+                converted_file="a-azure.yaml",
+                conversion_status="Success",
+                accuracy_rating="100%",
+                concerns=[],
+                azure_enhancements=[],
+            )
+        ],
+        multi_dimensional_analysis=MultiDimensionalAnalysis(
+            network_analysis=dim,
+            security_analysis=dim,
+            storage_analysis=dim,
+            compute_analysis=dim,
+        ),
+        overall_conversion_metrics=ConversionMetrics(
+            total_files=1,
+            successful_conversions=1,
+            failed_conversions=0,
+            overall_accuracy="100%",
+            azure_compatibility="100%",
+        ),
+        conversion_quality=ConversionQuality(
+            azure_best_practices="ok",
+            security_hardening="ok",
+            performance_optimization="ok",
+            production_readiness="ok",
+        ),
+        summary="ok",
+        expert_insights=[],
+        conversion_report_file="report.md",
+    )
+
+
 class _FakeTelemetry:
     def __init__(self):
         self.transitions: list[tuple[str, str, str]] = []
@@ -59,6 +111,7 @@ async def execute(self, task_param=None):
                         result=True,
                         is_hard_terminated=False,
                         process_id=task_param.process_id,
+                        termination_output=_make_yaml_output(),
                     ),
                 )
 
@@ -118,3 +171,47 @@ async def execute(self, task_param=None):
         assert isinstance(ctx.yielded[0], Yaml_ExtendedBooleanResult)
 
     asyncio.run(_run())
+
+
+def test_yaml_convert_executor_raises_when_soft_completion_has_no_output(monkeypatch):
+    """Soft completion with termination_output=None is incoherent: must raise."""
+    async def _run():
+        import pytest
+
+        telemetry = _FakeTelemetry()
+        app_context = _FakeAppContext(telemetry)
+        ctx = _FakeCtx()
+
+        class _FakeOrchestrator:
+            def __init__(self, _app_context):
+                pass
+
+            async def execute(self, task_param=None):
+                return OrchestrationResult(
+                    success=True,
+                    conversation=[],
+                    agent_responses=[],
+                    tool_usage={},
+                    result=Yaml_ExtendedBooleanResult(
+                        result=True,
+                        is_hard_terminated=False,
+                        process_id=task_param.process_id,
+                        reason="agents never produced output",
+                    ),
+                )
+
+        monkeypatch.setattr(
+            "steps.convert.workflow.yaml_convert_executor.YamlConvertOrchestrator",
+            _FakeOrchestrator,
+        )
+
+        executor = YamlConvertExecutor(id="yaml", app_context=app_context)
+        message = Design_ExtendedBooleanResult(process_id="p1")
+
+        with pytest.raises(Exception, match="produced no YAML conversion output"):
+            await executor.handle_execute(message, ctx)  # type: ignore[arg-type]
+
+        assert len(ctx.sent) == 0
+        assert len(ctx.yielded) == 0
+
+    asyncio.run(_run())
diff --git a/src/processor/src/tests/unit/steps/design/test_design_executor.py b/src/processor/src/tests/unit/steps/design/test_design_executor.py
diff --git a/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py b/src/processor/src/tests/unit/steps/documentation/test_documentation_executor.py