Skip to content

Commit d13f84c

Browse files
slister1001Copilot
andauthored
Fix crescendo JSON parsing crash producing zero results (#5058399) (#45526)
* Fix crescendo JSON parsing crash producing zero results (#5058399) When Foundry's crescendo orchestration returns non-JSON responses, the ScenarioOrchestrator now catches the error gracefully instead of propagating it. Partial results from successful attack strategies (e.g., baseline) are preserved even when other strategies fail. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Address review comments: remove redundant guard, add logging and docs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply black formatting Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent b494ea5 commit d13f84c

2 files changed

Lines changed: 105 additions & 3 deletions

File tree

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_foundry/_scenario_orchestrator.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,26 @@ async def execute(
112112

113113
# Run attack - PyRIT handles all execution
114114
self.logger.info(f"Executing attacks for {self.risk_category}...")
115-
self._scenario_result = await self._scenario.run_async()
115+
try:
116+
self._scenario_result = await self._scenario.run_async()
117+
except Exception as e:
118+
self.logger.warning(
119+
f"Error during attack execution for {self.risk_category}: {str(e)}. "
120+
f"Partial results may still be available."
121+
)
122+
# Intentionally swallow the exception so execute() returns normally.
123+
# The FoundryExecutionManager (see PR #45541) provides an additional
124+
# outer recovery layer. If _scenario_result remains None,
125+
# downstream get_attack_results() returns an empty list safely.
126+
try:
127+
# Relies on PyRIT FoundryScenario internal `_result` attribute
128+
# to retrieve partial results accumulated before the failure.
129+
# hasattr guards against future PyRIT versions removing this attribute.
130+
# If the attribute type changes, get_attack_results() will fail safely downstream.
131+
if hasattr(self._scenario, "_result"):
132+
self._scenario_result = self._scenario._result
133+
except Exception as e:
134+
self.logger.debug("Failed to retrieve partial scenario result: %s", e, exc_info=True)
116135

117136
self.logger.info(f"Attack execution complete for {self.risk_category}")
118137

sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_foundry.py

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,11 @@ class TestGetAttackTypeName:
111111

112112
def test_with_dict_identifier(self):
113113
"""Test with current pyrit 0.11.0 dict form."""
114-
identifier = {"__type__": "PromptSendingAttack", "__module__": "pyrit.executor", "id": "abc"}
114+
identifier = {
115+
"__type__": "PromptSendingAttack",
116+
"__module__": "pyrit.executor",
117+
"id": "abc",
118+
}
115119
assert _get_attack_type_name(identifier) == "PromptSendingAttack"
116120

117121
def test_with_dict_missing_type(self):
@@ -1032,6 +1036,83 @@ def test_calculate_asr_by_strategy(self, mock_logger):
10321036
assert "MorseAttack" in asr_by_strategy
10331037
assert asr_by_strategy["MorseAttack"] == pytest.approx(1.0) # 1/1
10341038

1039+
@pytest.mark.asyncio
1040+
async def test_execute_swallows_run_async_exception_with_partial_results(self, mock_logger):
1041+
"""Test that when run_async raises, execute() does not propagate the exception
1042+
and _scenario_result captures partial results from _result if available."""
1043+
from pyrit.scenario.foundry import FoundryStrategy
1044+
1045+
mock_target = MagicMock()
1046+
mock_scorer = MagicMock()
1047+
mock_dataset = MagicMock()
1048+
mock_dataset.get_all_seed_groups.return_value = [MagicMock()]
1049+
1050+
orchestrator = ScenarioOrchestrator(
1051+
risk_category="violence",
1052+
objective_target=mock_target,
1053+
rai_scorer=mock_scorer,
1054+
logger=mock_logger,
1055+
)
1056+
1057+
# Simulate partial results stored on the internal _result attribute
1058+
partial_result = MagicMock()
1059+
mock_foundry = AsyncMock()
1060+
mock_foundry.initialize_async = AsyncMock()
1061+
mock_foundry.run_async = AsyncMock(side_effect=RuntimeError("mid-execution failure"))
1062+
mock_foundry._result = partial_result
1063+
1064+
with patch(
1065+
"azure.ai.evaluation.red_team._foundry._scenario_orchestrator.FoundryScenario",
1066+
return_value=mock_foundry,
1067+
), patch("pyrit.executor.attack.AttackScoringConfig"):
1068+
# Should NOT raise
1069+
result = await orchestrator.execute(
1070+
dataset_config=mock_dataset,
1071+
strategies=[FoundryStrategy.Base64],
1072+
)
1073+
1074+
assert result == orchestrator
1075+
# Partial result should be captured
1076+
assert orchestrator._scenario_result is partial_result
1077+
mock_logger.warning.assert_called_once()
1078+
1079+
@pytest.mark.asyncio
1080+
async def test_execute_swallows_run_async_exception_no_partial_results(self, mock_logger):
1081+
"""Test that when run_async raises and _result is absent, execute() still returns
1082+
normally with _scenario_result remaining None."""
1083+
from pyrit.scenario.foundry import FoundryStrategy
1084+
1085+
mock_target = MagicMock()
1086+
mock_scorer = MagicMock()
1087+
mock_dataset = MagicMock()
1088+
mock_dataset.get_all_seed_groups.return_value = [MagicMock()]
1089+
1090+
orchestrator = ScenarioOrchestrator(
1091+
risk_category="violence",
1092+
objective_target=mock_target,
1093+
rai_scorer=mock_scorer,
1094+
logger=mock_logger,
1095+
)
1096+
1097+
mock_foundry = AsyncMock()
1098+
mock_foundry.initialize_async = AsyncMock()
1099+
mock_foundry.run_async = AsyncMock(side_effect=RuntimeError("total failure"))
1100+
# No _result attribute on mock_foundry (simulate missing private attr)
1101+
del mock_foundry._result
1102+
1103+
with patch(
1104+
"azure.ai.evaluation.red_team._foundry._scenario_orchestrator.FoundryScenario",
1105+
return_value=mock_foundry,
1106+
), patch("pyrit.executor.attack.AttackScoringConfig"):
1107+
result = await orchestrator.execute(
1108+
dataset_config=mock_dataset,
1109+
strategies=[FoundryStrategy.Base64],
1110+
)
1111+
1112+
assert result == orchestrator
1113+
assert orchestrator._scenario_result is None
1114+
mock_logger.warning.assert_called_once()
1115+
10351116

10361117
# =============================================================================
10371118
# Tests for FoundryResultProcessor
@@ -1510,7 +1591,9 @@ def test_group_results_by_strategy_keys_match_complexity_map(
15101591
self, mock_credential, mock_azure_ai_project, mock_logger
15111592
):
15121593
"""Test that strategy keys match ATTACK_STRATEGY_COMPLEXITY_MAP."""
1513-
from azure.ai.evaluation.red_team._utils.constants import ATTACK_STRATEGY_COMPLEXITY_MAP
1594+
from azure.ai.evaluation.red_team._utils.constants import (
1595+
ATTACK_STRATEGY_COMPLEXITY_MAP,
1596+
)
15141597

15151598
manager = FoundryExecutionManager(
15161599
credential=mock_credential,

0 commit comments

Comments
 (0)