Skip to content

Commit 39031dd

Browse files
behnam-oBehnam Ousathannahwestra25
authored
MAINT: Add labels to attack results (#1624)
Co-authored-by: Behnam Ousat <behnamousat@microsoft.com> Co-authored-by: hannahwestra25 <hannahwestra@microsoft.com>
1 parent 2e952a6 commit 39031dd

22 files changed

Lines changed: 505 additions & 137 deletions

pyrit/backend/mappers/attack_mappers.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,11 @@ def attack_result_to_summary(
198198
"""
199199
message_count = stats.message_count
200200
last_preview = stats.last_message_preview
201-
labels = dict(stats.labels) if stats.labels else {}
202201

202+
# Merge attack-result labels with conversation-level labels.
203+
# Conversation labels take precedence on key collision.
204+
labels = dict(ar.labels) if ar.labels else {}
205+
labels.update(stats.labels or {})
203206
# Resolution order for created_at: explicit metadata override, then the
204207
# persisted AttackResult.timestamp, and finally datetime.now() as a
205208
# last-resort fallback for never-persisted results.

pyrit/backend/services/attack_service.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ async def create_attack_async(self, *, request: CreateAttackRequest) -> CreateAt
330330
"created_at": now.isoformat(),
331331
"updated_at": now.isoformat(),
332332
},
333+
labels=labels,
333334
)
334335

335336
# Store in memory

pyrit/executor/attack/multi_turn/chunked_request.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ async def _perform_async(self, *, context: ChunkedRequestAttackContext) -> Attac
327327
outcome_reason=outcome_reason,
328328
executed_turns=context.executed_turns,
329329
metadata={"combined_chunks": combined_value, "chunk_count": len(context.chunk_responses)},
330+
labels=context.memory_labels,
330331
)
331332

332333
def _determine_attack_outcome(

pyrit/executor/attack/multi_turn/crescendo.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ async def _perform_async(self, *, context: CrescendoAttackContext) -> CrescendoA
424424
last_response=context.last_response.get_piece() if context.last_response else None,
425425
last_score=context.last_score,
426426
related_conversations=context.related_conversations,
427+
labels=context.memory_labels,
427428
)
428429
# setting metadata for backtrack count
429430
result.backtrack_count = context.backtrack_count

pyrit/executor/attack/multi_turn/multi_prompt_sending.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ async def _perform_async(self, *, context: MultiTurnAttackContext[Any]) -> Attac
297297
outcome=outcome,
298298
outcome_reason=outcome_reason,
299299
executed_turns=context.executed_turns,
300+
labels=context.memory_labels,
300301
)
301302

302303
def _determine_attack_outcome(

pyrit/executor/attack/multi_turn/red_teaming.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ async def _perform_async(self, *, context: MultiTurnAttackContext[Any]) -> Attac
338338
last_response=context.last_response.get_piece() if context.last_response else None,
339339
last_score=context.last_score,
340340
related_conversations=context.related_conversations,
341+
labels=context.memory_labels,
341342
)
342343

343344
async def _teardown_async(self, *, context: MultiTurnAttackContext[Any]) -> None:

pyrit/executor/attack/multi_turn/tree_of_attacks.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2233,6 +2233,7 @@ def _create_attack_result(
22332233
last_response=last_response,
22342234
last_score=context.best_objective_score,
22352235
related_conversations=context.related_conversations,
2236+
labels=context.memory_labels,
22362237
)
22372238

22382239
# Set attack-specific metadata using properties

pyrit/executor/attack/single_turn/prompt_sending.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ async def _perform_async(self, *, context: SingleTurnAttackContext[Any]) -> Atta
238238
outcome=outcome,
239239
outcome_reason=outcome_reason,
240240
executed_turns=1,
241+
labels=context.memory_labels,
241242
)
242243

243244
def _determine_attack_outcome(

pyrit/executor/attack/single_turn/skeleton_key.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,5 @@ def _create_skeleton_key_failure_result(self, *, context: SingleTurnAttackContex
181181
outcome=AttackOutcome.FAILURE,
182182
outcome_reason="Skeleton key prompt was filtered or failed",
183183
executed_turns=1,
184+
labels=context.memory_labels,
184185
)

pyrit/executor/benchmark/fairness_bias.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ async def _perform_async(self, *, context: FairnessBiasBenchmarkContext) -> Atta
200200
atomic_attack_identifier=build_atomic_attack_identifier(
201201
attack_identifier=ComponentIdentifier.of(self),
202202
),
203+
labels=context.memory_labels,
203204
)
204205

205206
return last_attack_result

0 commit comments

Comments
 (0)