Fix crescendo JSON parsing crash producing zero results (#5058399) (#45526)

slister1001 · Copilot · web-flow · commit d13f84caf0f3 · 2026-03-10T00:15:26.000Z
* Fix crescendo JSON parsing crash producing zero results (#5058399)

When Foundry's crescendo orchestration returns non-JSON responses,
the ScenarioOrchestrator now catches the error gracefully instead
of propagating it. Partial results from successful attack strategies
(e.g., baseline) are preserved even when other strategies fail.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

* Address review comments: remove redundant guard, add logging and docs

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

* Apply black formatting

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;

---------

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_scenario_orchestrator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_scenario_orchestrator.py
@@ -112,7 +112,26 @@ async def execute(
 
         # Run attack - PyRIT handles all execution
         self.logger.info(f"Executing attacks for {self.risk_category}...")
-        self._scenario_result = await self._scenario.run_async()
+        try:
+            self._scenario_result = await self._scenario.run_async()
+        except Exception as e:
+            self.logger.warning(
+                f"Error during attack execution for {self.risk_category}: {str(e)}. "
+                f"Partial results may still be available."
+            )
+            # Intentionally swallow the exception so execute() returns normally.
+            # The FoundryExecutionManager (see PR #45541) provides an additional
+            # outer recovery layer. If _scenario_result remains None,
+            # downstream get_attack_results() returns an empty list safely.
+            try:
+                # Relies on PyRIT FoundryScenario internal `_result` attribute
+                # to retrieve partial results accumulated before the failure.
+                # hasattr guards against future PyRIT versions removing this attribute.
+                # If the attribute type changes, get_attack_results() will fail safely downstream.
+                if hasattr(self._scenario, "_result"):
+                    self._scenario_result = self._scenario._result
+            except Exception as e:
+                self.logger.debug("Failed to retrieve partial scenario result: %s", e, exc_info=True)
 
         self.logger.info(f"Attack execution complete for {self.risk_category}")
 
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py
@@ -111,7 +111,11 @@ class TestGetAttackTypeName:
 
     def test_with_dict_identifier(self):
         """Test with current pyrit 0.11.0 dict form."""
-        identifier = {"__type__": "PromptSendingAttack", "__module__": "pyrit.executor", "id": "abc"}
+        identifier = {
+            "__type__": "PromptSendingAttack",
+            "__module__": "pyrit.executor",
+            "id": "abc",
+        }
         assert _get_attack_type_name(identifier) == "PromptSendingAttack"
 
     def test_with_dict_missing_type(self):
@@ -1032,6 +1036,83 @@ def test_calculate_asr_by_strategy(self, mock_logger):
         assert "MorseAttack" in asr_by_strategy
         assert asr_by_strategy["MorseAttack"] == pytest.approx(1.0)  # 1/1
 
+    @pytest.mark.asyncio
+    async def test_execute_swallows_run_async_exception_with_partial_results(self, mock_logger):
+        """Test that when run_async raises, execute() does not propagate the exception
+        and _scenario_result captures partial results from _result if available."""
+        from pyrit.scenario.foundry import FoundryStrategy
+
+        mock_target = MagicMock()
+        mock_scorer = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = [MagicMock()]
+
+        orchestrator = ScenarioOrchestrator(
+            risk_category="violence",
+            objective_target=mock_target,
+            rai_scorer=mock_scorer,
+            logger=mock_logger,
+        )
+
+        # Simulate partial results stored on the internal _result attribute
+        partial_result = MagicMock()
+        mock_foundry = AsyncMock()
+        mock_foundry.initialize_async = AsyncMock()
+        mock_foundry.run_async = AsyncMock(side_effect=RuntimeError("mid-execution failure"))
+        mock_foundry._result = partial_result
+
+        with patch(
+            "azure.ai.evaluation.red_team._foundry._scenario_orchestrator.FoundryScenario",
+            return_value=mock_foundry,
+        ), patch("pyrit.executor.attack.AttackScoringConfig"):
+            # Should NOT raise
+            result = await orchestrator.execute(
+                dataset_config=mock_dataset,
+                strategies=[FoundryStrategy.Base64],
+            )
+
+            assert result == orchestrator
+            # Partial result should be captured
+            assert orchestrator._scenario_result is partial_result
+            mock_logger.warning.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_execute_swallows_run_async_exception_no_partial_results(self, mock_logger):
+        """Test that when run_async raises and _result is absent, execute() still returns
+        normally with _scenario_result remaining None."""
+        from pyrit.scenario.foundry import FoundryStrategy
+
+        mock_target = MagicMock()
+        mock_scorer = MagicMock()
+        mock_dataset = MagicMock()
+        mock_dataset.get_all_seed_groups.return_value = [MagicMock()]
+
+        orchestrator = ScenarioOrchestrator(
+            risk_category="violence",
+            objective_target=mock_target,
+            rai_scorer=mock_scorer,
+            logger=mock_logger,
+        )
+
+        mock_foundry = AsyncMock()
+        mock_foundry.initialize_async = AsyncMock()
+        mock_foundry.run_async = AsyncMock(side_effect=RuntimeError("total failure"))
+        # No _result attribute on mock_foundry (simulate missing private attr)
+        del mock_foundry._result
+
+        with patch(
+            "azure.ai.evaluation.red_team._foundry._scenario_orchestrator.FoundryScenario",
+            return_value=mock_foundry,
+        ), patch("pyrit.executor.attack.AttackScoringConfig"):
+            result = await orchestrator.execute(
+                dataset_config=mock_dataset,
+                strategies=[FoundryStrategy.Base64],
+            )
+
+            assert result == orchestrator
+            assert orchestrator._scenario_result is None
+            mock_logger.warning.assert_called_once()
+
 
 # =============================================================================
 # Tests for FoundryResultProcessor
@@ -1510,7 +1591,9 @@ def test_group_results_by_strategy_keys_match_complexity_map(
         self, mock_credential, mock_azure_ai_project, mock_logger
     ):
         """Test that strategy keys match ATTACK_STRATEGY_COMPLEXITY_MAP."""
-        from azure.ai.evaluation.red_team._utils.constants import ATTACK_STRATEGY_COMPLEXITY_MAP
+        from azure.ai.evaluation.red_team._utils.constants import (
+            ATTACK_STRATEGY_COMPLEXITY_MAP,
+        )
 
         manager = FoundryExecutionManager(
             credential=mock_credential,