From eabd84b2a2f062c81e01a400b94322140329274f Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Wed, 17 Jun 2026 20:43:59 -0400 Subject: [PATCH 1/7] Standardize system_prompt as a first-class consumed attack argument Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/instructions/attacks.instructions.md | 22 ++++++ .../attack/compound/sequential_attack.py | 2 +- .../executor/attack/core/attack_parameters.py | 3 + pyrit/executor/attack/core/attack_strategy.py | 46 ++++++++++++ .../attack/single_turn/context_compliance.py | 4 +- .../attack/single_turn/flip_attack.py | 2 +- .../attack/single_turn/many_shot_jailbreak.py | 2 +- .../executor/attack/single_turn/role_play.py | 2 +- .../single_turn_attack_strategy.py | 3 - .../attack/single_turn/skeleton_key.py | 2 +- .../attack/core/test_attack_executor.py | 51 ++++++++++++- .../attack/core/test_attack_strategy.py | 72 ++++++++++++++++++- .../attack/single_turn/test_prompt_sending.py | 34 ++++++++- .../attack/single_turn/test_role_play.py | 12 ++++ 14 files changed, 244 insertions(+), 13 deletions(-) diff --git a/.github/instructions/attacks.instructions.md b/.github/instructions/attacks.instructions.md index 2d5c4d7c96..c7817a50f6 100644 --- a/.github/instructions/attacks.instructions.md +++ b/.github/instructions/attacks.instructions.md @@ -56,3 +56,25 @@ Requirements: - Calling ``super().__init__`` with positional arguments — the base ``AttackStrategy.__init__`` is already keyword-only, so positional calls raise ``TypeError`` at runtime. Always forward via kwargs. + +## Setting the objective target's system prompt + +- ``system_prompt=`` is the standard way to set the objective target's system + prompt: ``await attack.execute_async(objective=..., system_prompt="You are ...")``. + ``AttackStrategy.execute_with_context_async`` lowers it into a single ``system``-role + ``Message`` prepended to ``context.prepended_conversation``, so single-turn, + multi-turn, and TAP all deliver it without per-strategy wiring. +- ``prepended_conversation=`` is the advanced path — use it when you need to seed + a full multi-message history (system + user/assistant turns), not just a system + prompt. +- The two are mutually exclusive for the system slot: supplying ``system_prompt=`` + together with a ``system``-role message inside ``prepended_conversation`` raises + ``ValueError``. Use one or the other. +- Parity rule: an attack that excludes ``prepended_conversation`` from its + ``params_type`` (via ``AttackParameters.excluding(...)``) MUST also exclude + ``system_prompt`` — it is sugar for a prepended system message. Such attacks + reject ``system_prompt=`` with a "does not accept parameters" ``ValueError``. +- Lowering happens in ``AttackStrategy.execute_with_context_async`` — the single + chokepoint every public entry point crosses, including ``execute_async`` and the + ``AttackExecutor`` batch/scenario path. Callers that build a context and invoke + ``execute_with_context_async`` directly are auto-lowered too. diff --git a/pyrit/executor/attack/compound/sequential_attack.py b/pyrit/executor/attack/compound/sequential_attack.py index 15e6eb1dc3..7bd787d9eb 100644 --- a/pyrit/executor/attack/compound/sequential_attack.py +++ b/pyrit/executor/attack/compound/sequential_attack.py @@ -228,7 +228,7 @@ def __init__( # Inner child attacks expand their own next_message / prepended_conversation # via their own params_type; the compound takes no per-call message # overrides. - params_type=AttackParameters.excluding("next_message", "prepended_conversation"), + params_type=AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt"), logger=logger, ) self._child_attacks: list[SequentialChildAttack] = list(child_attacks) diff --git a/pyrit/executor/attack/core/attack_parameters.py b/pyrit/executor/attack/core/attack_parameters.py index f4d44fa7c9..7b22f31e11 100644 --- a/pyrit/executor/attack/core/attack_parameters.py +++ b/pyrit/executor/attack/core/attack_parameters.py @@ -39,6 +39,9 @@ class AttackParameters: # Conversation that is automatically prepended to the target model prepended_conversation: list[Message] | None = None + # System prompt for the objective target; lowered to a prepended system message + system_prompt: str | None = None + # Additional labels that can be applied to the prompts throughout the attack memory_labels: dict[str, str] | None = field(default_factory=dict) diff --git a/pyrit/executor/attack/core/attack_strategy.py b/pyrit/executor/attack/core/attack_strategy.py index 19929f8888..527383e239 100644 --- a/pyrit/executor/attack/core/attack_strategy.py +++ b/pyrit/executor/attack/core/attack_strategy.py @@ -688,3 +688,49 @@ async def execute_async( context = self._context_type(params=params, **context_kwargs) return await self.execute_with_context_async(context=context) + + async def execute_with_context_async(self, *, context: AttackStrategyContextT) -> AttackStrategyResultT: + """ + Execute the attack with full lifecycle management. + + Overrides the base implementation to lower the ``system_prompt=`` sugar into a + prepended system-role message. This is the single chokepoint every public entry + point crosses (both ``execute_async`` and ``AttackExecutor``), so lowering here + guarantees the system prompt is always delivered to the objective target. + + Args: + context (AttackStrategyContextT): The attack context to execute. + + Returns: + AttackStrategyResultT: The result of the attack execution. + """ + self._apply_system_prompt_to_context(context=context) + return await super().execute_with_context_async(context=context) + + @staticmethod + def _apply_system_prompt_to_context(*, context: AttackStrategyContextT) -> None: + """ + Lower system_prompt= into a single system-role prepended message. + + Reuses the prepended_conversation override so frozen params are never + mutated. + + Args: + context (AttackStrategyContextT): The attack context to mutate. + + Raises: + ValueError: If a system-role message was also supplied directly in + prepended_conversation. + """ + system_prompt = getattr(context.params, "system_prompt", None) + if system_prompt is None: + return + + existing = context.prepended_conversation + if any(message.api_role == "system" for message in existing): + raise ValueError( + "Cannot supply both system_prompt= and a system-role message in " + "prepended_conversation; use one or the other." + ) + + context.prepended_conversation = [Message.from_system_prompt(system_prompt), *existing] diff --git a/pyrit/executor/attack/single_turn/context_compliance.py b/pyrit/executor/attack/single_turn/context_compliance.py index 802e5c36cb..35e20fb626 100644 --- a/pyrit/executor/attack/single_turn/context_compliance.py +++ b/pyrit/executor/attack/single_turn/context_compliance.py @@ -29,7 +29,9 @@ # ContextComplianceAttack generates prepended_conversation internally # by building a benign context conversation. -ContextComplianceAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") +ContextComplianceAttackParameters = AttackParameters.excluding( + "prepended_conversation", "next_message", "system_prompt" +) class ContextComplianceAttack(PromptSendingAttack): diff --git a/pyrit/executor/attack/single_turn/flip_attack.py b/pyrit/executor/attack/single_turn/flip_attack.py index 878ff1da1a..3b6fb48a19 100644 --- a/pyrit/executor/attack/single_turn/flip_attack.py +++ b/pyrit/executor/attack/single_turn/flip_attack.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # FlipAttack generates prepended_conversation internally from its system prompt. -FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") +FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") class FlipAttack(PromptSendingAttack): diff --git a/pyrit/executor/attack/single_turn/many_shot_jailbreak.py b/pyrit/executor/attack/single_turn/many_shot_jailbreak.py index 6c9f81bbf4..4722ffc220 100644 --- a/pyrit/executor/attack/single_turn/many_shot_jailbreak.py +++ b/pyrit/executor/attack/single_turn/many_shot_jailbreak.py @@ -19,7 +19,7 @@ # ManyShotJailbreakAttack does not support prepended conversations # as it constructs its own prompt format with examples. -ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message") +ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") _MANY_SHOT_EXAMPLES_PATH = DATASETS_PATH / "jailbreak" / "many_shot_examples.json" diff --git a/pyrit/executor/attack/single_turn/role_play.py b/pyrit/executor/attack/single_turn/role_play.py index 2037efa629..0132392fa2 100644 --- a/pyrit/executor/attack/single_turn/role_play.py +++ b/pyrit/executor/attack/single_turn/role_play.py @@ -27,7 +27,7 @@ # RolePlayAttack generates next_message and prepended_conversation internally, # so it does not accept these parameters from callers. -RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation") +RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt") class RolePlayPaths(enum.Enum): diff --git a/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py b/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py index a2271fef29..1f699e3654 100644 --- a/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py +++ b/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py @@ -31,9 +31,6 @@ class SingleTurnAttackContext(AttackContext[AttackParamsT]): # Unique identifier of the main conversation between the attacker and model conversation_id: str = field(default_factory=lambda: str(uuid.uuid4())) - # System prompt for chat-based targets - system_prompt: str | None = None - # Arbitrary metadata that downstream attacks or scorers may attach metadata: dict[str, str | int] | None = None diff --git a/pyrit/executor/attack/single_turn/skeleton_key.py b/pyrit/executor/attack/single_turn/skeleton_key.py index 7164901792..563e318798 100644 --- a/pyrit/executor/attack/single_turn/skeleton_key.py +++ b/pyrit/executor/attack/single_turn/skeleton_key.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) # SkeletonKeyAttack generates prepended_conversation internally from the skeleton key prompt and acceptance response. -SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") +SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") class SkeletonKeyAttack(PromptSendingAttack): diff --git a/tests/unit/executor/attack/core/test_attack_executor.py b/tests/unit/executor/attack/core/test_attack_executor.py index 3d54cb581f..18e25e4bf0 100644 --- a/tests/unit/executor/attack/core/test_attack_executor.py +++ b/tests/unit/executor/attack/core/test_attack_executor.py @@ -10,7 +10,7 @@ import asyncio import dataclasses import uuid -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -28,6 +28,7 @@ SeedObjective, SeedPrompt, ) +from pyrit.prompt_target import PromptTarget # Helper to create a properly configured mock attack @@ -60,6 +61,25 @@ def create_seed_group(objective: str) -> SeedAttackGroup: ) +class _ConcreteSingleTurnAttack(AttackStrategy): + """Minimal concrete attack used to exercise the real execute_with_context_async override.""" + + def __init__(self, *, objective_target): + super().__init__(objective_target=objective_target, context_type=SingleTurnAttackContext) + + def _validate_context(self, *, context): + pass + + async def _setup_async(self, *, context): + pass + + async def _perform_async(self, *, context): + return create_attack_result(context.objective) + + async def _teardown_async(self, *, context): + pass + + @pytest.mark.usefixtures("patch_central_database") class TestAttackExecutorInitialization: """Tests for AttackExecutor initialization.""" @@ -710,3 +730,32 @@ async def test_excluded_params_type_rejects_excluded_fields(self): assert "prepended_conversation" not in fields assert "objective" in fields assert "memory_labels" in fields + + +@pytest.mark.usefixtures("patch_central_database") +class TestExecutorSystemPromptLowering: + """Regression: the executor path lowers system_prompt= via the shared chokepoint. + + AttackExecutor builds a context and calls execute_with_context_async directly, bypassing + execute_async. Lowering lives in execute_with_context_async, so the system prompt must + still be lowered on this path (otherwise it would silently never reach the target). + """ + + async def test_executor_lowers_broadcast_system_prompt(self): + attack = _ConcreteSingleTurnAttack(objective_target=MagicMock(spec=PromptTarget)) + + executor = AttackExecutor() + with patch( + "pyrit.executor.core.strategy.Strategy.execute_with_context_async", new_callable=AsyncMock + ) as mock_super: + mock_super.return_value = create_attack_result("Test objective") + await executor.execute_attack_async( + attack=attack, + objectives=["Test objective"], + system_prompt="You are a helpful assistant.", + ) + + context = mock_super.call_args.kwargs["context"] + prepended = context.prepended_conversation + assert [message.api_role for message in prepended] == ["system"] + assert prepended[0].get_value() == "You are a helpful assistant." diff --git a/tests/unit/executor/attack/core/test_attack_strategy.py b/tests/unit/executor/attack/core/test_attack_strategy.py index 3f0847892e..d69df05cec 100644 --- a/tests/unit/executor/attack/core/test_attack_strategy.py +++ b/tests/unit/executor/attack/core/test_attack_strategy.py @@ -2,7 +2,7 @@ # Licensed under the MIT license. import logging -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -279,6 +279,76 @@ async def test_execute_async_allows_optional_parameters_as_none(self, mock_attac assert result is not None +@pytest.mark.usefixtures("patch_central_database") +class TestExecuteAsyncSystemPromptLowering: + """Tests for lowering the system_prompt= argument at the execute_with_context_async chokepoint. + + Lowering lives in ``AttackStrategy.execute_with_context_async``, so these tests patch the + base ``Strategy.execute_with_context_async`` (the ``super()`` call) to skip the lifecycle + while still running the override that performs the lowering. + """ + + _SUPER = "pyrit.executor.core.strategy.Strategy.execute_with_context_async" + + async def test_system_prompt_lowered_to_single_system_message(self, mock_attack_strategy): + with patch(self._SUPER, new_callable=AsyncMock) as mock_super: + await mock_attack_strategy.execute_async( + objective="Test objective", + system_prompt="You are a helpful assistant.", + ) + + prepended = mock_super.call_args.kwargs["context"].prepended_conversation + assert len(prepended) == 1 + assert prepended[0].api_role == "system" + assert prepended[0].get_value() == "You are a helpful assistant." + + async def test_system_prompt_prepended_before_existing_conversation(self, mock_attack_strategy): + assistant_message = Message.from_prompt(prompt="Earlier reply", role="assistant") + with patch(self._SUPER, new_callable=AsyncMock) as mock_super: + await mock_attack_strategy.execute_async( + objective="Test objective", + system_prompt="You are a helpful assistant.", + prepended_conversation=[assistant_message], + ) + + prepended = mock_super.call_args.kwargs["context"].prepended_conversation + assert [message.api_role for message in prepended] == ["system", "assistant"] + assert prepended[0].get_value() == "You are a helpful assistant." + + async def test_system_prompt_conflict_with_existing_system_message_raises(self, mock_attack_strategy): + existing_system = Message.from_system_prompt("Existing system message") + with pytest.raises(ValueError, match="Cannot supply both system_prompt="): + await mock_attack_strategy.execute_async( + objective="Test objective", + system_prompt="You are a helpful assistant.", + prepended_conversation=[existing_system], + ) + + async def test_no_system_prompt_leaves_prepended_conversation_unchanged(self, mock_attack_strategy): + user_message = Message.from_prompt(prompt="Hello", role="user") + with patch(self._SUPER, new_callable=AsyncMock) as mock_super: + await mock_attack_strategy.execute_async( + objective="Test objective", + prepended_conversation=[user_message], + ) + + prepended = mock_super.call_args.kwargs["context"].prepended_conversation + assert len(prepended) == 1 + assert prepended[0].api_role == "user" + + async def test_lowering_happens_when_context_passed_directly(self, mock_attack_strategy): + # Simulates the AttackExecutor path: a context is built externally and handed + # straight to execute_with_context_async, bypassing execute_async. + params = AttackParameters(objective="Test objective", system_prompt="You are a helpful assistant.") + context = mock_attack_strategy._context_type(params=params) + + with patch(self._SUPER, new_callable=AsyncMock): + await mock_attack_strategy.execute_with_context_async(context=context) + + assert [message.api_role for message in context.prepended_conversation] == ["system"] + assert context.prepended_conversation[0].get_value() == "You are a helpful assistant." + + @pytest.mark.usefixtures("patch_central_database") class TestDefaultAttackStrategyEventHandler: """Tests for the default attack strategy event handler""" diff --git a/tests/unit/executor/attack/single_turn/test_prompt_sending.py b/tests/unit/executor/attack/single_turn/test_prompt_sending.py index bf9d61a627..431ca78e16 100644 --- a/tests/unit/executor/attack/single_turn/test_prompt_sending.py +++ b/tests/unit/executor/attack/single_turn/test_prompt_sending.py @@ -213,7 +213,6 @@ def test_validate_context_with_additional_optional_fields(self, mock_target): next_message=Message.from_prompt(prompt="test", role="user"), ), conversation_id=str(uuid.uuid4()), - system_prompt="System prompt", metadata={"key": "value"}, ) @@ -1051,7 +1050,38 @@ async def test_execute_async_with_parameters(self, mock_target, sample_response) assert context.objective == "Test objective" assert context.memory_labels == {"test": "label"} assert context.next_message is not None - assert context.system_prompt == "System prompt" + # system_prompt= is lowered into a leading system-role prepended message + assert context.prepended_conversation[0].api_role == "system" + assert context.prepended_conversation[0].get_value() == "System prompt" + assert context.prepended_conversation[1].api_role == "assistant" + + async def test_execute_async_delivers_system_prompt_to_conversation(self, mock_target): + """system_prompt= is lowered and reaches the conversation manager that seeds the target's conversation.""" + attack = PromptSendingAttack(objective_target=mock_target) + + delivered = {} + + async def capture_setup(*, context, **kwargs): + delivered["prepended"] = list(context.prepended_conversation) + + attack._conversation_manager = MagicMock() + attack._conversation_manager.initialize_context_async = AsyncMock(side_effect=capture_setup) + attack._perform_async = AsyncMock( + return_value=AttackResult( + conversation_id="test-id", + objective="Test objective", + outcome=AttackOutcome.SUCCESS, + executed_turns=1, + ) + ) + + await attack.execute_async( + objective="Test objective", + system_prompt="You are a helpful assistant.", + ) + + assert [message.api_role for message in delivered["prepended"]] == ["system"] + assert delivered["prepended"][0].get_value() == "You are a helpful assistant." async def test_execute_async_with_invalid_params_raises_error(self, mock_target): """Test execute_async raises error when invalid parameters are passed""" diff --git a/tests/unit/executor/attack/single_turn/test_role_play.py b/tests/unit/executor/attack/single_turn/test_role_play.py index 39c04501de..dc9057d5d7 100644 --- a/tests/unit/executor/attack/single_turn/test_role_play.py +++ b/tests/unit/executor/attack/single_turn/test_role_play.py @@ -317,6 +317,18 @@ def test_params_type_excludes_prepended_conversation(self, role_play_attack): fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)} assert "prepended_conversation" not in fields + def test_params_type_excludes_system_prompt(self, role_play_attack): + """Test that params_type excludes system_prompt field""" + import dataclasses + + fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)} + assert "system_prompt" not in fields + + async def test_execute_async_rejects_system_prompt(self, role_play_attack): + """Test that execute_async rejects system_prompt for an attack that excludes it""" + with pytest.raises(ValueError, match="does not accept parameters"): + await role_play_attack.execute_async(objective="Test objective", system_prompt="You are a pirate.") + def test_params_type_includes_objective(self, role_play_attack): """Test that params_type includes objective field""" import dataclasses From ccbbb4aa833aa691a463975f241d90556a48e75d Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Wed, 17 Jun 2026 21:05:56 -0400 Subject: [PATCH 2/7] Remove system_prompt section from attacks instructions Behavior is grep-discoverable, runtime-enforced, and test-covered; the section did not clear the bar this slim instruction file sets. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/instructions/attacks.instructions.md | 22 -------------------- 1 file changed, 22 deletions(-) diff --git a/.github/instructions/attacks.instructions.md b/.github/instructions/attacks.instructions.md index c7817a50f6..2d5c4d7c96 100644 --- a/.github/instructions/attacks.instructions.md +++ b/.github/instructions/attacks.instructions.md @@ -56,25 +56,3 @@ Requirements: - Calling ``super().__init__`` with positional arguments — the base ``AttackStrategy.__init__`` is already keyword-only, so positional calls raise ``TypeError`` at runtime. Always forward via kwargs. - -## Setting the objective target's system prompt - -- ``system_prompt=`` is the standard way to set the objective target's system - prompt: ``await attack.execute_async(objective=..., system_prompt="You are ...")``. - ``AttackStrategy.execute_with_context_async`` lowers it into a single ``system``-role - ``Message`` prepended to ``context.prepended_conversation``, so single-turn, - multi-turn, and TAP all deliver it without per-strategy wiring. -- ``prepended_conversation=`` is the advanced path — use it when you need to seed - a full multi-message history (system + user/assistant turns), not just a system - prompt. -- The two are mutually exclusive for the system slot: supplying ``system_prompt=`` - together with a ``system``-role message inside ``prepended_conversation`` raises - ``ValueError``. Use one or the other. -- Parity rule: an attack that excludes ``prepended_conversation`` from its - ``params_type`` (via ``AttackParameters.excluding(...)``) MUST also exclude - ``system_prompt`` — it is sugar for a prepended system message. Such attacks - reject ``system_prompt=`` with a "does not accept parameters" ``ValueError``. -- Lowering happens in ``AttackStrategy.execute_with_context_async`` — the single - chokepoint every public entry point crosses, including ``execute_async`` and the - ``AttackExecutor`` batch/scenario path. Callers that build a context and invoke - ``execute_with_context_async`` directly are auto-lowered too. From 1f08821be6602cbb0fc05a319af8c6de4e46bf9b Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Thu, 18 Jun 2026 09:17:25 -0400 Subject: [PATCH 3/7] Add system_prompt example to attack configuration doc Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../executor/3_attack_configuration.ipynb | 134 ++++++++++++++---- doc/code/executor/3_attack_configuration.py | 28 +++- 2 files changed, 129 insertions(+), 33 deletions(-) diff --git a/doc/code/executor/3_attack_configuration.ipynb b/doc/code/executor/3_attack_configuration.ipynb index e0545d8d9e..ddbd460de9 100644 --- a/doc/code/executor/3_attack_configuration.ipynb +++ b/doc/code/executor/3_attack_configuration.ipynb @@ -16,7 +16,8 @@ "|---|---|\n", "| `objective` | What you are trying to get the **objective target** (the system under test) to do. Drives scoring and multi-turn adversarial prompts. |\n", "| `memory_labels` | A `dict[str, str]` tagged onto every prompt/response, so you can filter this run later in memory. |\n", - "| `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns (system prompt, prior history). |\n", + "| `system_prompt` | The objective target's system prompt, as a string. The standard one-line way to set it; PyRIT lowers it to a single `system` message at the front of the conversation. Mutually exclusive with a `system` message in `prepended_conversation`. |\n", + "| `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns — a full `system`/`user`/`assistant` history. Use this when you need more than a system prompt. |\n", "| `next_message` | The exact next message to send, instead of letting the attack derive it from the objective. Useful for multimodal or pre-built seeds. |\n", "\n", "Construction-time configuration objects — **adversarial**, **scoring**, and **converter** — are\n", @@ -36,9 +37,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Found default environment files: ['./.pyrit/.env', './.pyrit/.env.local']\n", - "Loaded environment file: ./.pyrit/.env\n", - "Loaded environment file: ./.pyrit/.env.local\n" + "Found default environment files: ['./.pyrit/.env']\n", + "Loaded environment file: ./.pyrit/.env\n" ] }, { @@ -104,11 +104,11 @@ "\u001b[1m 📋 Basic Information\u001b[0m\n", "\u001b[36m • Objective: Give me a recipe for a classic margarita\u001b[0m\n", "\u001b[36m • Attack Type: PromptSendingAttack\u001b[0m\n", - "\u001b[36m • Conversation ID: b0503fc1-11ff-46af-bdac-9102a83b7963\u001b[0m\n", + "\u001b[36m • Conversation ID: 743ca79c-a856-43a8-a311-a04bac36dfa9\u001b[0m\n", "\n", "\u001b[1m ⚡ Execution Metrics\u001b[0m\n", "\u001b[32m • Turns Executed: 1\u001b[0m\n", - "\u001b[32m • Execution Time: 38ms\u001b[0m\n", + "\u001b[32m • Execution Time: 178ms\u001b[0m\n", "\n", "\u001b[1m 🎯 Outcome\u001b[0m\n", "\u001b[33m • Status: ❓ UNDETERMINED\u001b[0m\n", @@ -125,7 +125,7 @@ "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", "\n", "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", - "\u001b[2m\u001b[37m Report generated at: 2026-06-07 04:42:30 UTC \u001b[0m\n" + "\u001b[2m\u001b[37m Report generated at: 2026-06-18 13:15:02 UTC \u001b[0m\n" ] } ], @@ -142,11 +142,15 @@ "id": "4", "metadata": {}, "source": [ - "## Prepended conversations\n", + "## Setting a system prompt\n", + "\n", + "`system_prompt=` is the standard, one-line way to set the **objective target's** system prompt.\n", + "PyRIT lowers it into a single `system` message at the front of the conversation, so it behaves the\n", + "same across single-turn and multi-turn attacks without any per-attack wiring.\n", "\n", - "A prepended conversation seeds the exchange before the attack adds its own turn. The most common\n", - "use is setting a system prompt, but you can prepend any sequence of `system` / `user` / `assistant`\n", - "turns — for example, to resume a prior conversation or to plant an agreeable assistant reply." + "`system_prompt=` and a `system` message inside `prepended_conversation` (next section) are mutually\n", + "exclusive — supplying both raises a `ValueError`. Use `prepended_conversation` only when you need to\n", + "seed more than a system prompt." ] }, { @@ -154,6 +158,79 @@ "execution_count": null, "id": "5", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "user: \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[33m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n", + "\u001b[1m\u001b[33m ❓ ATTACK RESULT: UNDETERMINED ❓ \u001b[0m\n", + "\u001b[33m════════════════════════════════════════════════════════════════════════════════════════════════════\u001b[0m\n", + "\n", + "\u001b[1m\u001b[44m\u001b[37m Attack Summary \u001b[0m\n", + "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\u001b[1m 📋 Basic Information\u001b[0m\n", + "\u001b[36m • Objective: Explain how a saponification reaction works\u001b[0m\n", + "\u001b[36m • Attack Type: PromptSendingAttack\u001b[0m\n", + "\u001b[36m • Conversation ID: b86054b9-ebf7-4bbc-93f7-062b8736210b\u001b[0m\n", + "\n", + "\u001b[1m ⚡ Execution Metrics\u001b[0m\n", + "\u001b[32m • Turns Executed: 1\u001b[0m\n", + "\u001b[32m • Execution Time: 7ms\u001b[0m\n", + "\n", + "\u001b[1m 🎯 Outcome\u001b[0m\n", + "\u001b[33m • Status: ❓ UNDETERMINED\u001b[0m\n", + "\u001b[37m • Reason: No objective scorer configured\u001b[0m\n", + "\n", + "\u001b[1m\u001b[44m\u001b[37m Conversation History with Objective Target \u001b[0m\n", + "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\n", + "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\u001b[1m\u001b[34m🔹 Turn 1 - USER\u001b[0m\n", + "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\u001b[34m \u001b[0m\n", + "\n", + "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\n", + "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", + "\u001b[2m\u001b[37m Report generated at: 2026-06-18 13:15:02 UTC \u001b[0m\n" + ] + } + ], + "source": [ + "result = await attack.execute_async( # type: ignore\n", + " objective=\"Explain how a saponification reaction works\",\n", + " system_prompt=\"You are a helpful chemistry tutor who explains concepts step by step.\",\n", + ")\n", + "await output_attack_async(result)" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## Prepended conversations\n", + "\n", + "A prepended conversation seeds the exchange before the attack adds its own turn. For just a system\n", + "prompt, prefer `system_prompt=` above. Reach for a prepended conversation when you need to seed a\n", + "sequence of `system` / `user` / `assistant` turns — for example, to resume a prior conversation or\n", + "to plant an agreeable assistant reply." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, "outputs": [ { "name": "stdout", @@ -178,11 +255,11 @@ "\u001b[1m 📋 Basic Information\u001b[0m\n", "\u001b[36m • Objective: Explain how a saponification reaction works\u001b[0m\n", "\u001b[36m • Attack Type: PromptSendingAttack\u001b[0m\n", - "\u001b[36m • Conversation ID: c649a184-4a07-45ac-90b9-de6757cfa6e6\u001b[0m\n", + "\u001b[36m • Conversation ID: 03728aed-c835-4624-8ddd-8bb008755eb3\u001b[0m\n", "\n", "\u001b[1m ⚡ Execution Metrics\u001b[0m\n", "\u001b[32m • Turns Executed: 1\u001b[0m\n", - "\u001b[32m • Execution Time: 5ms\u001b[0m\n", + "\u001b[32m • Execution Time: 7ms\u001b[0m\n", "\n", "\u001b[1m 🎯 Outcome\u001b[0m\n", "\u001b[33m • Status: ❓ UNDETERMINED\u001b[0m\n", @@ -201,7 +278,7 @@ "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", "\n", "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", - "\u001b[2m\u001b[37m Report generated at: 2026-06-07 04:42:30 UTC \u001b[0m\n" + "\u001b[2m\u001b[37m Report generated at: 2026-06-18 13:15:02 UTC \u001b[0m\n" ] } ], @@ -227,7 +304,7 @@ }, { "cell_type": "markdown", - "id": "6", + "id": "8", "metadata": {}, "source": [ "## Multimodal seeds and `next_message`\n", @@ -240,7 +317,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "9", "metadata": {}, "outputs": [ { @@ -274,11 +351,11 @@ "\u001b[1m 📋 Basic Information\u001b[0m\n", "\u001b[36m • Objective: Sending an image successfully\u001b[0m\n", "\u001b[36m • Attack Type: PromptSendingAttack\u001b[0m\n", - "\u001b[36m • Conversation ID: 6a91faca-e46d-42be-830d-4a9d9d8a43b0\u001b[0m\n", + "\u001b[36m • Conversation ID: 87bdf69f-c4a4-417b-bb31-272f6747bb94\u001b[0m\n", "\n", "\u001b[1m ⚡ Execution Metrics\u001b[0m\n", "\u001b[32m • Turns Executed: 1\u001b[0m\n", - "\u001b[32m • Execution Time: 13ms\u001b[0m\n", + "\u001b[32m • Execution Time: 14ms\u001b[0m\n", "\n", "\u001b[1m 🎯 Outcome\u001b[0m\n", "\u001b[33m • Status: ❓ UNDETERMINED\u001b[0m\n", @@ -295,7 +372,7 @@ "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", "\n", "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", - "\u001b[2m\u001b[37m Report generated at: 2026-06-07 04:42:30 UTC \u001b[0m\n" + "\u001b[2m\u001b[37m Report generated at: 2026-06-18 13:15:02 UTC \u001b[0m\n" ] } ], @@ -321,7 +398,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "10", "metadata": {}, "source": [ "## Objective target vs. adversarial target\n", @@ -347,7 +424,7 @@ }, { "cell_type": "markdown", - "id": "9", + "id": "11", "metadata": {}, "source": [ "## Configuration objects\n", @@ -369,7 +446,7 @@ { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "12", "metadata": {}, "outputs": [ { @@ -393,11 +470,11 @@ "\u001b[1m 📋 Basic Information\u001b[0m\n", "\u001b[36m • Objective: Base64-encode this request\u001b[0m\n", "\u001b[36m • Attack Type: PromptSendingAttack\u001b[0m\n", - "\u001b[36m • Conversation ID: 3016e98c-94b3-4952-91b5-5cba8f89877f\u001b[0m\n", + "\u001b[36m • Conversation ID: 5882d7ea-4604-4233-9bba-58954decb600\u001b[0m\n", "\n", "\u001b[1m ⚡ Execution Metrics\u001b[0m\n", "\u001b[32m • Turns Executed: 1\u001b[0m\n", - "\u001b[32m • Execution Time: 6ms\u001b[0m\n", + "\u001b[32m • Execution Time: 10ms\u001b[0m\n", "\n", "\u001b[1m 🎯 Outcome\u001b[0m\n", "\u001b[33m • Status: ❓ UNDETERMINED\u001b[0m\n", @@ -418,7 +495,7 @@ "\u001b[34m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", "\n", "\u001b[2m\u001b[37m────────────────────────────────────────────────────────────────────────────────────────────────────\u001b[0m\n", - "\u001b[2m\u001b[37m Report generated at: 2026-06-07 04:42:30 UTC \u001b[0m\n" + "\u001b[2m\u001b[37m Report generated at: 2026-06-18 13:15:02 UTC \u001b[0m\n" ] } ], @@ -442,7 +519,7 @@ }, { "cell_type": "markdown", - "id": "11", + "id": "13", "metadata": {}, "source": [ "## Example: configuring a red teaming attack to generate an image\n", @@ -510,8 +587,7 @@ ], "metadata": { "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python" + "cell_metadata_filter": "-all" }, "language_info": { "codemirror_mode": { @@ -523,7 +599,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.13" } }, "nbformat": 4, diff --git a/doc/code/executor/3_attack_configuration.py b/doc/code/executor/3_attack_configuration.py index efae370e94..93a4ee20b1 100644 --- a/doc/code/executor/3_attack_configuration.py +++ b/doc/code/executor/3_attack_configuration.py @@ -21,7 +21,8 @@ # |---|---| # | `objective` | What you are trying to get the **objective target** (the system under test) to do. Drives scoring and multi-turn adversarial prompts. | # | `memory_labels` | A `dict[str, str]` tagged onto every prompt/response, so you can filter this run later in memory. | -# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns (system prompt, prior history). | +# | `system_prompt` | The objective target's system prompt, as a string. The standard one-line way to set it; PyRIT lowers it to a single `system` message at the front of the conversation. Mutually exclusive with a `system` message in `prepended_conversation`. | +# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns — a full `system`/`user`/`assistant` history. Use this when you need more than a system prompt. | # | `next_message` | The exact next message to send, instead of letting the attack derive it from the objective. Useful for multimodal or pre-built seeds. | # # Construction-time configuration objects — **adversarial**, **scoring**, and **converter** — are @@ -59,12 +60,31 @@ ) await output_attack_async(result) +# %% [markdown] +# ## Setting a system prompt +# +# `system_prompt=` is the standard, one-line way to set the **objective target's** system prompt. +# PyRIT lowers it into a single `system` message at the front of the conversation, so it behaves the +# same across single-turn and multi-turn attacks without any per-attack wiring. +# +# `system_prompt=` and a `system` message inside `prepended_conversation` (next section) are mutually +# exclusive — supplying both raises a `ValueError`. Use `prepended_conversation` only when you need to +# seed more than a system prompt. + +# %% +result = await attack.execute_async( # type: ignore + objective="Explain how a saponification reaction works", + system_prompt="You are a helpful chemistry tutor who explains concepts step by step.", +) +await output_attack_async(result) + # %% [markdown] # ## Prepended conversations # -# A prepended conversation seeds the exchange before the attack adds its own turn. The most common -# use is setting a system prompt, but you can prepend any sequence of `system` / `user` / `assistant` -# turns — for example, to resume a prior conversation or to plant an agreeable assistant reply. +# A prepended conversation seeds the exchange before the attack adds its own turn. For just a system +# prompt, prefer `system_prompt=` above. Reach for a prepended conversation when you need to seed a +# sequence of `system` / `user` / `assistant` turns — for example, to resume a prior conversation or +# to plant an agreeable assistant reply. # %% from pyrit.models import Message, MessagePiece From 90d741c25d215bc8a6a7d1fe916082d49d2d3581 Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Tue, 23 Jun 2026 23:04:45 -0700 Subject: [PATCH 4/7] Standardize system prompts on prepended_conversation; deprecate dead field Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/instructions/attacks.instructions.md | 26 +++++++ .../executor/3_attack_configuration.ipynb | 34 +++++---- doc/code/executor/3_attack_configuration.py | 34 +++++---- .../attack/compound/sequential_attack.py | 2 +- .../executor/attack/core/attack_parameters.py | 3 - pyrit/executor/attack/core/attack_strategy.py | 46 ------------ .../attack/single_turn/context_compliance.py | 4 +- .../attack/single_turn/flip_attack.py | 2 +- .../attack/single_turn/many_shot_jailbreak.py | 2 +- .../executor/attack/single_turn/role_play.py | 2 +- .../single_turn_attack_strategy.py | 14 ++++ .../attack/single_turn/skeleton_key.py | 2 +- .../attack/core/test_attack_executor.py | 51 +------------ .../attack/core/test_attack_strategy.py | 72 +------------------ .../attack/single_turn/test_prompt_sending.py | 50 ++++++------- .../attack/single_turn/test_role_play.py | 12 ---- 16 files changed, 108 insertions(+), 248 deletions(-) diff --git a/.github/instructions/attacks.instructions.md b/.github/instructions/attacks.instructions.md index 2d5c4d7c96..66ba5d2546 100644 --- a/.github/instructions/attacks.instructions.md +++ b/.github/instructions/attacks.instructions.md @@ -56,3 +56,29 @@ Requirements: - Calling ``super().__init__`` with positional arguments — the base ``AttackStrategy.__init__`` is already keyword-only, so positional calls raise ``TypeError`` at runtime. Always forward via kwargs. + +## Setting the objective target's system prompt + +The objective target's system prompt is a ``system``-role message at the front +of the conversation, carried by ``prepended_conversation``. Build the message +with ``Message.from_system_prompt(text)`` and pass it as a list: + +```python +await attack.execute_async( + objective="...", + prepended_conversation=[ + Message.from_system_prompt("You are a helpful chemistry tutor.") + ], +) +``` + +Requirements: + +- This is the standard way to set a system prompt. Do **not** add a separate + ``system_prompt`` field/argument to a new attack — there is one channel, + ``prepended_conversation``. +- ``prepended_conversation`` is a plain list, so multiple system prompts and + interleaved seed turns compose: ``[Message.from_system_prompt("a"), + Message.from_system_prompt("b"), user_msg, assistant_msg]``. +- ``SingleTurnAttackContext.system_prompt`` is **deprecated** (non-functional; + removed in 0.17.0). Never read it or wire new behavior to it. diff --git a/doc/code/executor/3_attack_configuration.ipynb b/doc/code/executor/3_attack_configuration.ipynb index ddbd460de9..ffabbc4594 100644 --- a/doc/code/executor/3_attack_configuration.ipynb +++ b/doc/code/executor/3_attack_configuration.ipynb @@ -16,8 +16,7 @@ "|---|---|\n", "| `objective` | What you are trying to get the **objective target** (the system under test) to do. Drives scoring and multi-turn adversarial prompts. |\n", "| `memory_labels` | A `dict[str, str]` tagged onto every prompt/response, so you can filter this run later in memory. |\n", - "| `system_prompt` | The objective target's system prompt, as a string. The standard one-line way to set it; PyRIT lowers it to a single `system` message at the front of the conversation. Mutually exclusive with a `system` message in `prepended_conversation`. |\n", - "| `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns — a full `system`/`user`/`assistant` history. Use this when you need more than a system prompt. |\n", + "| `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns. This is also where the objective target's **system prompt** goes — `Message.from_system_prompt(...)` builds one (see below). |\n", "| `next_message` | The exact next message to send, instead of letting the attack derive it from the objective. Useful for multimodal or pre-built seeds. |\n", "\n", "Construction-time configuration objects — **adversarial**, **scoring**, and **converter** — are\n", @@ -55,6 +54,7 @@ " PromptSendingAttack,\n", " SingleTurnAttackContext,\n", ")\n", + "from pyrit.models import Message\n", "from pyrit.output import output_attack_async\n", "from pyrit.prompt_target import TextTarget\n", "from pyrit.setup import IN_MEMORY, initialize_pyrit_async\n", @@ -144,13 +144,17 @@ "source": [ "## Setting a system prompt\n", "\n", - "`system_prompt=` is the standard, one-line way to set the **objective target's** system prompt.\n", - "PyRIT lowers it into a single `system` message at the front of the conversation, so it behaves the\n", - "same across single-turn and multi-turn attacks without any per-attack wiring.\n", + "The objective target's system prompt is just a `system`-role message at the front of the\n", + "conversation, so you set it through `prepended_conversation`. `Message.from_system_prompt(...)`\n", + "builds that message:\n", "\n", - "`system_prompt=` and a `system` message inside `prepended_conversation` (next section) are mutually\n", - "exclusive — supplying both raises a `ValueError`. Use `prepended_conversation` only when you need to\n", - "seed more than a system prompt." + "```python\n", + "prepended_conversation=[Message.from_system_prompt(\"...\")]\n", + "```\n", + "\n", + "Because `prepended_conversation` is a list, targets that accept more than one system message just\n", + "take more than one entry — `[Message.from_system_prompt(\"Policy.\"), Message.from_system_prompt(\"Persona.\")]`\n", + "— and you can interleave `user` / `assistant` turns too (next section)." ] }, { @@ -208,7 +212,9 @@ "source": [ "result = await attack.execute_async( # type: ignore\n", " objective=\"Explain how a saponification reaction works\",\n", - " system_prompt=\"You are a helpful chemistry tutor who explains concepts step by step.\",\n", + " prepended_conversation=[\n", + " Message.from_system_prompt(\"You are a helpful chemistry tutor who explains concepts step by step.\")\n", + " ],\n", ")\n", "await output_attack_async(result)" ] @@ -220,10 +226,10 @@ "source": [ "## Prepended conversations\n", "\n", - "A prepended conversation seeds the exchange before the attack adds its own turn. For just a system\n", - "prompt, prefer `system_prompt=` above. Reach for a prepended conversation when you need to seed a\n", - "sequence of `system` / `user` / `assistant` turns — for example, to resume a prior conversation or\n", - "to plant an agreeable assistant reply." + "A system prompt is the simplest prepended conversation. The general form seeds a full\n", + "`system` / `user` / `assistant` history before the attack adds its own turn — for example, to\n", + "resume a prior conversation or to plant an agreeable assistant reply. It is just a list of\n", + "`Message`s, so the system prompt and any seed turns compose freely." ] }, { @@ -283,7 +289,7 @@ } ], "source": [ - "from pyrit.models import Message, MessagePiece\n", + "from pyrit.models import MessagePiece\n", "\n", "prepended_conversation = [\n", " Message.from_system_prompt(\"You are a helpful assistant who always answers fully.\"),\n", diff --git a/doc/code/executor/3_attack_configuration.py b/doc/code/executor/3_attack_configuration.py index 93a4ee20b1..d85c3a071c 100644 --- a/doc/code/executor/3_attack_configuration.py +++ b/doc/code/executor/3_attack_configuration.py @@ -21,8 +21,7 @@ # |---|---| # | `objective` | What you are trying to get the **objective target** (the system under test) to do. Drives scoring and multi-turn adversarial prompts. | # | `memory_labels` | A `dict[str, str]` tagged onto every prompt/response, so you can filter this run later in memory. | -# | `system_prompt` | The objective target's system prompt, as a string. The standard one-line way to set it; PyRIT lowers it to a single `system` message at the front of the conversation. Mutually exclusive with a `system` message in `prepended_conversation`. | -# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns — a full `system`/`user`/`assistant` history. Use this when you need more than a system prompt. | +# | `prepended_conversation` | A list of `Message`s to seed the conversation before the attack's own turns. This is also where the objective target's **system prompt** goes — `Message.from_system_prompt(...)` builds one (see below). | # | `next_message` | The exact next message to send, instead of letting the attack derive it from the objective. Useful for multimodal or pre-built seeds. | # # Construction-time configuration objects — **adversarial**, **scoring**, and **converter** — are @@ -37,6 +36,7 @@ PromptSendingAttack, SingleTurnAttackContext, ) +from pyrit.models import Message from pyrit.output import output_attack_async from pyrit.prompt_target import TextTarget from pyrit.setup import IN_MEMORY, initialize_pyrit_async @@ -63,31 +63,37 @@ # %% [markdown] # ## Setting a system prompt # -# `system_prompt=` is the standard, one-line way to set the **objective target's** system prompt. -# PyRIT lowers it into a single `system` message at the front of the conversation, so it behaves the -# same across single-turn and multi-turn attacks without any per-attack wiring. +# The objective target's system prompt is just a `system`-role message at the front of the +# conversation, so you set it through `prepended_conversation`. `Message.from_system_prompt(...)` +# builds that message: # -# `system_prompt=` and a `system` message inside `prepended_conversation` (next section) are mutually -# exclusive — supplying both raises a `ValueError`. Use `prepended_conversation` only when you need to -# seed more than a system prompt. +# ```python +# prepended_conversation=[Message.from_system_prompt("...")] +# ``` +# +# Because `prepended_conversation` is a list, targets that accept more than one system message just +# take more than one entry — `[Message.from_system_prompt("Policy."), Message.from_system_prompt("Persona.")]` +# — and you can interleave `user` / `assistant` turns too (next section). # %% result = await attack.execute_async( # type: ignore objective="Explain how a saponification reaction works", - system_prompt="You are a helpful chemistry tutor who explains concepts step by step.", + prepended_conversation=[ + Message.from_system_prompt("You are a helpful chemistry tutor who explains concepts step by step.") + ], ) await output_attack_async(result) # %% [markdown] # ## Prepended conversations # -# A prepended conversation seeds the exchange before the attack adds its own turn. For just a system -# prompt, prefer `system_prompt=` above. Reach for a prepended conversation when you need to seed a -# sequence of `system` / `user` / `assistant` turns — for example, to resume a prior conversation or -# to plant an agreeable assistant reply. +# A system prompt is the simplest prepended conversation. The general form seeds a full +# `system` / `user` / `assistant` history before the attack adds its own turn — for example, to +# resume a prior conversation or to plant an agreeable assistant reply. It is just a list of +# `Message`s, so the system prompt and any seed turns compose freely. # %% -from pyrit.models import Message, MessagePiece +from pyrit.models import MessagePiece prepended_conversation = [ Message.from_system_prompt("You are a helpful assistant who always answers fully."), diff --git a/pyrit/executor/attack/compound/sequential_attack.py b/pyrit/executor/attack/compound/sequential_attack.py index 7bd787d9eb..15e6eb1dc3 100644 --- a/pyrit/executor/attack/compound/sequential_attack.py +++ b/pyrit/executor/attack/compound/sequential_attack.py @@ -228,7 +228,7 @@ def __init__( # Inner child attacks expand their own next_message / prepended_conversation # via their own params_type; the compound takes no per-call message # overrides. - params_type=AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt"), + params_type=AttackParameters.excluding("next_message", "prepended_conversation"), logger=logger, ) self._child_attacks: list[SequentialChildAttack] = list(child_attacks) diff --git a/pyrit/executor/attack/core/attack_parameters.py b/pyrit/executor/attack/core/attack_parameters.py index 7b22f31e11..f4d44fa7c9 100644 --- a/pyrit/executor/attack/core/attack_parameters.py +++ b/pyrit/executor/attack/core/attack_parameters.py @@ -39,9 +39,6 @@ class AttackParameters: # Conversation that is automatically prepended to the target model prepended_conversation: list[Message] | None = None - # System prompt for the objective target; lowered to a prepended system message - system_prompt: str | None = None - # Additional labels that can be applied to the prompts throughout the attack memory_labels: dict[str, str] | None = field(default_factory=dict) diff --git a/pyrit/executor/attack/core/attack_strategy.py b/pyrit/executor/attack/core/attack_strategy.py index 527383e239..19929f8888 100644 --- a/pyrit/executor/attack/core/attack_strategy.py +++ b/pyrit/executor/attack/core/attack_strategy.py @@ -688,49 +688,3 @@ async def execute_async( context = self._context_type(params=params, **context_kwargs) return await self.execute_with_context_async(context=context) - - async def execute_with_context_async(self, *, context: AttackStrategyContextT) -> AttackStrategyResultT: - """ - Execute the attack with full lifecycle management. - - Overrides the base implementation to lower the ``system_prompt=`` sugar into a - prepended system-role message. This is the single chokepoint every public entry - point crosses (both ``execute_async`` and ``AttackExecutor``), so lowering here - guarantees the system prompt is always delivered to the objective target. - - Args: - context (AttackStrategyContextT): The attack context to execute. - - Returns: - AttackStrategyResultT: The result of the attack execution. - """ - self._apply_system_prompt_to_context(context=context) - return await super().execute_with_context_async(context=context) - - @staticmethod - def _apply_system_prompt_to_context(*, context: AttackStrategyContextT) -> None: - """ - Lower system_prompt= into a single system-role prepended message. - - Reuses the prepended_conversation override so frozen params are never - mutated. - - Args: - context (AttackStrategyContextT): The attack context to mutate. - - Raises: - ValueError: If a system-role message was also supplied directly in - prepended_conversation. - """ - system_prompt = getattr(context.params, "system_prompt", None) - if system_prompt is None: - return - - existing = context.prepended_conversation - if any(message.api_role == "system" for message in existing): - raise ValueError( - "Cannot supply both system_prompt= and a system-role message in " - "prepended_conversation; use one or the other." - ) - - context.prepended_conversation = [Message.from_system_prompt(system_prompt), *existing] diff --git a/pyrit/executor/attack/single_turn/context_compliance.py b/pyrit/executor/attack/single_turn/context_compliance.py index 35e20fb626..802e5c36cb 100644 --- a/pyrit/executor/attack/single_turn/context_compliance.py +++ b/pyrit/executor/attack/single_turn/context_compliance.py @@ -29,9 +29,7 @@ # ContextComplianceAttack generates prepended_conversation internally # by building a benign context conversation. -ContextComplianceAttackParameters = AttackParameters.excluding( - "prepended_conversation", "next_message", "system_prompt" -) +ContextComplianceAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") class ContextComplianceAttack(PromptSendingAttack): diff --git a/pyrit/executor/attack/single_turn/flip_attack.py b/pyrit/executor/attack/single_turn/flip_attack.py index 3b6fb48a19..878ff1da1a 100644 --- a/pyrit/executor/attack/single_turn/flip_attack.py +++ b/pyrit/executor/attack/single_turn/flip_attack.py @@ -26,7 +26,7 @@ logger = logging.getLogger(__name__) # FlipAttack generates prepended_conversation internally from its system prompt. -FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") +FlipAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") class FlipAttack(PromptSendingAttack): diff --git a/pyrit/executor/attack/single_turn/many_shot_jailbreak.py b/pyrit/executor/attack/single_turn/many_shot_jailbreak.py index 4722ffc220..6c9f81bbf4 100644 --- a/pyrit/executor/attack/single_turn/many_shot_jailbreak.py +++ b/pyrit/executor/attack/single_turn/many_shot_jailbreak.py @@ -19,7 +19,7 @@ # ManyShotJailbreakAttack does not support prepended conversations # as it constructs its own prompt format with examples. -ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") +ManyShotJailbreakParameters = AttackParameters.excluding("prepended_conversation", "next_message") _MANY_SHOT_EXAMPLES_PATH = DATASETS_PATH / "jailbreak" / "many_shot_examples.json" diff --git a/pyrit/executor/attack/single_turn/role_play.py b/pyrit/executor/attack/single_turn/role_play.py index 0132392fa2..2037efa629 100644 --- a/pyrit/executor/attack/single_turn/role_play.py +++ b/pyrit/executor/attack/single_turn/role_play.py @@ -27,7 +27,7 @@ # RolePlayAttack generates next_message and prepended_conversation internally, # so it does not accept these parameters from callers. -RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation", "system_prompt") +RolePlayAttackParameters = AttackParameters.excluding("next_message", "prepended_conversation") class RolePlayPaths(enum.Enum): diff --git a/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py b/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py index 1f699e3654..d6b56ae847 100644 --- a/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py +++ b/pyrit/executor/attack/single_turn/single_turn_attack_strategy.py @@ -9,6 +9,7 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any +from pyrit.common.deprecation import print_deprecation_message from pyrit.common.logger import logger from pyrit.executor.attack.core.attack_parameters import AttackParameters, AttackParamsT from pyrit.executor.attack.core.attack_strategy import AttackContext, AttackStrategy @@ -31,9 +32,22 @@ class SingleTurnAttackContext(AttackContext[AttackParamsT]): # Unique identifier of the main conversation between the attacker and model conversation_id: str = field(default_factory=lambda: str(uuid.uuid4())) + # Deprecated, non-functional no-op. Set the objective target's system prompt via + # ``prepended_conversation=[Message.from_system_prompt(...)]`` instead. + system_prompt: str | None = None + # Arbitrary metadata that downstream attacks or scorers may attach metadata: dict[str, str | int] | None = None + def __post_init__(self) -> None: + """Warn that ``system_prompt`` is deprecated and non-functional when it is set.""" + if self.system_prompt is not None: + print_deprecation_message( + old_item="SingleTurnAttackContext.system_prompt", + new_item="prepended_conversation=[Message.from_system_prompt(...)]", + removed_in="0.17.0", + ) + class SingleTurnAttackStrategy(AttackStrategy[SingleTurnAttackContext[Any], AttackResult], ABC): """ diff --git a/pyrit/executor/attack/single_turn/skeleton_key.py b/pyrit/executor/attack/single_turn/skeleton_key.py index 563e318798..7164901792 100644 --- a/pyrit/executor/attack/single_turn/skeleton_key.py +++ b/pyrit/executor/attack/single_turn/skeleton_key.py @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) # SkeletonKeyAttack generates prepended_conversation internally from the skeleton key prompt and acceptance response. -SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message", "system_prompt") +SkeletonKeyAttackParameters = AttackParameters.excluding("prepended_conversation", "next_message") class SkeletonKeyAttack(PromptSendingAttack): diff --git a/tests/unit/executor/attack/core/test_attack_executor.py b/tests/unit/executor/attack/core/test_attack_executor.py index 18e25e4bf0..3d54cb581f 100644 --- a/tests/unit/executor/attack/core/test_attack_executor.py +++ b/tests/unit/executor/attack/core/test_attack_executor.py @@ -10,7 +10,7 @@ import asyncio import dataclasses import uuid -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock import pytest @@ -28,7 +28,6 @@ SeedObjective, SeedPrompt, ) -from pyrit.prompt_target import PromptTarget # Helper to create a properly configured mock attack @@ -61,25 +60,6 @@ def create_seed_group(objective: str) -> SeedAttackGroup: ) -class _ConcreteSingleTurnAttack(AttackStrategy): - """Minimal concrete attack used to exercise the real execute_with_context_async override.""" - - def __init__(self, *, objective_target): - super().__init__(objective_target=objective_target, context_type=SingleTurnAttackContext) - - def _validate_context(self, *, context): - pass - - async def _setup_async(self, *, context): - pass - - async def _perform_async(self, *, context): - return create_attack_result(context.objective) - - async def _teardown_async(self, *, context): - pass - - @pytest.mark.usefixtures("patch_central_database") class TestAttackExecutorInitialization: """Tests for AttackExecutor initialization.""" @@ -730,32 +710,3 @@ async def test_excluded_params_type_rejects_excluded_fields(self): assert "prepended_conversation" not in fields assert "objective" in fields assert "memory_labels" in fields - - -@pytest.mark.usefixtures("patch_central_database") -class TestExecutorSystemPromptLowering: - """Regression: the executor path lowers system_prompt= via the shared chokepoint. - - AttackExecutor builds a context and calls execute_with_context_async directly, bypassing - execute_async. Lowering lives in execute_with_context_async, so the system prompt must - still be lowered on this path (otherwise it would silently never reach the target). - """ - - async def test_executor_lowers_broadcast_system_prompt(self): - attack = _ConcreteSingleTurnAttack(objective_target=MagicMock(spec=PromptTarget)) - - executor = AttackExecutor() - with patch( - "pyrit.executor.core.strategy.Strategy.execute_with_context_async", new_callable=AsyncMock - ) as mock_super: - mock_super.return_value = create_attack_result("Test objective") - await executor.execute_attack_async( - attack=attack, - objectives=["Test objective"], - system_prompt="You are a helpful assistant.", - ) - - context = mock_super.call_args.kwargs["context"] - prepended = context.prepended_conversation - assert [message.api_role for message in prepended] == ["system"] - assert prepended[0].get_value() == "You are a helpful assistant." diff --git a/tests/unit/executor/attack/core/test_attack_strategy.py b/tests/unit/executor/attack/core/test_attack_strategy.py index d69df05cec..3f0847892e 100644 --- a/tests/unit/executor/attack/core/test_attack_strategy.py +++ b/tests/unit/executor/attack/core/test_attack_strategy.py @@ -2,7 +2,7 @@ # Licensed under the MIT license. import logging -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch import pytest @@ -279,76 +279,6 @@ async def test_execute_async_allows_optional_parameters_as_none(self, mock_attac assert result is not None -@pytest.mark.usefixtures("patch_central_database") -class TestExecuteAsyncSystemPromptLowering: - """Tests for lowering the system_prompt= argument at the execute_with_context_async chokepoint. - - Lowering lives in ``AttackStrategy.execute_with_context_async``, so these tests patch the - base ``Strategy.execute_with_context_async`` (the ``super()`` call) to skip the lifecycle - while still running the override that performs the lowering. - """ - - _SUPER = "pyrit.executor.core.strategy.Strategy.execute_with_context_async" - - async def test_system_prompt_lowered_to_single_system_message(self, mock_attack_strategy): - with patch(self._SUPER, new_callable=AsyncMock) as mock_super: - await mock_attack_strategy.execute_async( - objective="Test objective", - system_prompt="You are a helpful assistant.", - ) - - prepended = mock_super.call_args.kwargs["context"].prepended_conversation - assert len(prepended) == 1 - assert prepended[0].api_role == "system" - assert prepended[0].get_value() == "You are a helpful assistant." - - async def test_system_prompt_prepended_before_existing_conversation(self, mock_attack_strategy): - assistant_message = Message.from_prompt(prompt="Earlier reply", role="assistant") - with patch(self._SUPER, new_callable=AsyncMock) as mock_super: - await mock_attack_strategy.execute_async( - objective="Test objective", - system_prompt="You are a helpful assistant.", - prepended_conversation=[assistant_message], - ) - - prepended = mock_super.call_args.kwargs["context"].prepended_conversation - assert [message.api_role for message in prepended] == ["system", "assistant"] - assert prepended[0].get_value() == "You are a helpful assistant." - - async def test_system_prompt_conflict_with_existing_system_message_raises(self, mock_attack_strategy): - existing_system = Message.from_system_prompt("Existing system message") - with pytest.raises(ValueError, match="Cannot supply both system_prompt="): - await mock_attack_strategy.execute_async( - objective="Test objective", - system_prompt="You are a helpful assistant.", - prepended_conversation=[existing_system], - ) - - async def test_no_system_prompt_leaves_prepended_conversation_unchanged(self, mock_attack_strategy): - user_message = Message.from_prompt(prompt="Hello", role="user") - with patch(self._SUPER, new_callable=AsyncMock) as mock_super: - await mock_attack_strategy.execute_async( - objective="Test objective", - prepended_conversation=[user_message], - ) - - prepended = mock_super.call_args.kwargs["context"].prepended_conversation - assert len(prepended) == 1 - assert prepended[0].api_role == "user" - - async def test_lowering_happens_when_context_passed_directly(self, mock_attack_strategy): - # Simulates the AttackExecutor path: a context is built externally and handed - # straight to execute_with_context_async, bypassing execute_async. - params = AttackParameters(objective="Test objective", system_prompt="You are a helpful assistant.") - context = mock_attack_strategy._context_type(params=params) - - with patch(self._SUPER, new_callable=AsyncMock): - await mock_attack_strategy.execute_with_context_async(context=context) - - assert [message.api_role for message in context.prepended_conversation] == ["system"] - assert context.prepended_conversation[0].get_value() == "You are a helpful assistant." - - @pytest.mark.usefixtures("patch_central_database") class TestDefaultAttackStrategyEventHandler: """Tests for the default attack strategy event handler""" diff --git a/tests/unit/executor/attack/single_turn/test_prompt_sending.py b/tests/unit/executor/attack/single_turn/test_prompt_sending.py index 431ca78e16..0346e0eafc 100644 --- a/tests/unit/executor/attack/single_turn/test_prompt_sending.py +++ b/tests/unit/executor/attack/single_turn/test_prompt_sending.py @@ -207,14 +207,16 @@ def test_validate_context_with_complete_valid_context(self, mock_target, basic_c def test_validate_context_with_additional_optional_fields(self, mock_target): attack = PromptSendingAttack(objective_target=mock_target) - context = SingleTurnAttackContext( - params=AttackParameters( - objective="Test objective", - next_message=Message.from_prompt(prompt="test", role="user"), - ), - conversation_id=str(uuid.uuid4()), - metadata={"key": "value"}, - ) + with pytest.warns(DeprecationWarning, match="system_prompt"): + context = SingleTurnAttackContext( + params=AttackParameters( + objective="Test objective", + next_message=Message.from_prompt(prompt="test", role="user"), + ), + conversation_id=str(uuid.uuid4()), + system_prompt="System prompt", + metadata={"key": "value"}, + ) attack._validate_context(context=context) # Should not raise @@ -1036,7 +1038,6 @@ async def test_execute_async_with_parameters(self, mock_target, sample_response) prepended_conversation=[sample_response], memory_labels={"test": "label"}, next_message=message, - system_prompt="System prompt", ) # Verify result @@ -1050,38 +1051,27 @@ async def test_execute_async_with_parameters(self, mock_target, sample_response) assert context.objective == "Test objective" assert context.memory_labels == {"test": "label"} assert context.next_message is not None - # system_prompt= is lowered into a leading system-role prepended message - assert context.prepended_conversation[0].api_role == "system" - assert context.prepended_conversation[0].get_value() == "System prompt" - assert context.prepended_conversation[1].api_role == "assistant" - async def test_execute_async_delivers_system_prompt_to_conversation(self, mock_target): - """system_prompt= is lowered and reaches the conversation manager that seeds the target's conversation.""" + async def test_execute_async_with_deprecated_system_prompt_warns(self, mock_target, sample_response): + """Passing the deprecated system_prompt= still routes to the context field but warns.""" attack = PromptSendingAttack(objective_target=mock_target) - - delivered = {} - - async def capture_setup(*, context, **kwargs): - delivered["prepended"] = list(context.prepended_conversation) - - attack._conversation_manager = MagicMock() - attack._conversation_manager.initialize_context_async = AsyncMock(side_effect=capture_setup) + attack._validate_context = MagicMock() + attack._setup_async = AsyncMock() attack._perform_async = AsyncMock( return_value=AttackResult( conversation_id="test-id", objective="Test objective", outcome=AttackOutcome.SUCCESS, - executed_turns=1, + last_response=sample_response.get_piece(), ) ) + attack._teardown_async = AsyncMock() - await attack.execute_async( - objective="Test objective", - system_prompt="You are a helpful assistant.", - ) + with pytest.warns(DeprecationWarning, match="system_prompt"): + await attack.execute_async(objective="Test objective", system_prompt="System prompt") - assert [message.api_role for message in delivered["prepended"]] == ["system"] - assert delivered["prepended"][0].get_value() == "You are a helpful assistant." + context = attack._validate_context.call_args.kwargs["context"] + assert context.system_prompt == "System prompt" async def test_execute_async_with_invalid_params_raises_error(self, mock_target): """Test execute_async raises error when invalid parameters are passed""" diff --git a/tests/unit/executor/attack/single_turn/test_role_play.py b/tests/unit/executor/attack/single_turn/test_role_play.py index dc9057d5d7..39c04501de 100644 --- a/tests/unit/executor/attack/single_turn/test_role_play.py +++ b/tests/unit/executor/attack/single_turn/test_role_play.py @@ -317,18 +317,6 @@ def test_params_type_excludes_prepended_conversation(self, role_play_attack): fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)} assert "prepended_conversation" not in fields - def test_params_type_excludes_system_prompt(self, role_play_attack): - """Test that params_type excludes system_prompt field""" - import dataclasses - - fields = {f.name for f in dataclasses.fields(role_play_attack.params_type)} - assert "system_prompt" not in fields - - async def test_execute_async_rejects_system_prompt(self, role_play_attack): - """Test that execute_async rejects system_prompt for an attack that excludes it""" - with pytest.raises(ValueError, match="does not accept parameters"): - await role_play_attack.execute_async(objective="Test objective", system_prompt="You are a pirate.") - def test_params_type_includes_objective(self, role_play_attack): """Test that params_type includes objective field""" import dataclasses From ea83356d66552876c2df07554c2deb5d4d19fd39 Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Thu, 25 Jun 2026 12:46:54 -0700 Subject: [PATCH 5/7] Add Message.from_system_prompts shorthand for prepended_conversation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/instructions/attacks.instructions.md | 4 +++- doc/code/executor/3_attack_configuration.ipynb | 6 ++++-- doc/code/executor/3_attack_configuration.py | 6 ++++-- pyrit/models/messages/message.py | 14 ++++++++++++++ tests/unit/models/test_message.py | 13 +++++++++++++ 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.github/instructions/attacks.instructions.md b/.github/instructions/attacks.instructions.md index 66ba5d2546..f8c5817c5a 100644 --- a/.github/instructions/attacks.instructions.md +++ b/.github/instructions/attacks.instructions.md @@ -79,6 +79,8 @@ Requirements: ``prepended_conversation``. - ``prepended_conversation`` is a plain list, so multiple system prompts and interleaved seed turns compose: ``[Message.from_system_prompt("a"), - Message.from_system_prompt("b"), user_msg, assistant_msg]``. + Message.from_system_prompt("b"), user_msg, assistant_msg]``. For the + system-prompts-only case, ``Message.from_system_prompts("a", "b")`` is a + shorthand that returns that list. - ``SingleTurnAttackContext.system_prompt`` is **deprecated** (non-functional; removed in 0.17.0). Never read it or wire new behavior to it. diff --git a/doc/code/executor/3_attack_configuration.ipynb b/doc/code/executor/3_attack_configuration.ipynb index ffabbc4594..6e2f85cfe3 100644 --- a/doc/code/executor/3_attack_configuration.ipynb +++ b/doc/code/executor/3_attack_configuration.ipynb @@ -153,8 +153,10 @@ "```\n", "\n", "Because `prepended_conversation` is a list, targets that accept more than one system message just\n", - "take more than one entry — `[Message.from_system_prompt(\"Policy.\"), Message.from_system_prompt(\"Persona.\")]`\n", - "— and you can interleave `user` / `assistant` turns too (next section)." + "take more than one entry. `Message.from_system_prompts(...)` is a shorthand that builds the list for\n", + "you — `Message.from_system_prompts(\"Policy.\", \"Persona.\")` is the same as\n", + "`[Message.from_system_prompt(\"Policy.\"), Message.from_system_prompt(\"Persona.\")]` — and you can\n", + "interleave `user` / `assistant` turns too (next section)." ] }, { diff --git a/doc/code/executor/3_attack_configuration.py b/doc/code/executor/3_attack_configuration.py index d85c3a071c..c640ba2969 100644 --- a/doc/code/executor/3_attack_configuration.py +++ b/doc/code/executor/3_attack_configuration.py @@ -72,8 +72,10 @@ # ``` # # Because `prepended_conversation` is a list, targets that accept more than one system message just -# take more than one entry — `[Message.from_system_prompt("Policy."), Message.from_system_prompt("Persona.")]` -# — and you can interleave `user` / `assistant` turns too (next section). +# take more than one entry. `Message.from_system_prompts(...)` is a shorthand that builds the list for +# you — `Message.from_system_prompts("Policy.", "Persona.")` is the same as +# `[Message.from_system_prompt("Policy."), Message.from_system_prompt("Persona.")]` — and you can +# interleave `user` / `assistant` turns too (next section). # %% result = await attack.execute_async( # type: ignore diff --git a/pyrit/models/messages/message.py b/pyrit/models/messages/message.py index 3b0a2f5904..8e98a103e6 100644 --- a/pyrit/models/messages/message.py +++ b/pyrit/models/messages/message.py @@ -389,6 +389,20 @@ def from_system_prompt(cls, system_prompt: str) -> Message: """ return cls.from_prompt(prompt=system_prompt, role="system") + @classmethod + def from_system_prompts(cls, *system_prompts: str) -> list[Message]: + """ + Build a list of system-role messages, ready to pass as ``prepended_conversation``. + + Args: + *system_prompts (str): One or more system instruction texts. + + Returns: + list[Message]: One system-role message per input, in order. + + """ + return [cls.from_system_prompt(system_prompt) for system_prompt in system_prompts] + def duplicate(self) -> Message: """ Create a deep copy of this message with new IDs and timestamp for all message pieces. diff --git a/tests/unit/models/test_message.py b/tests/unit/models/test_message.py index c1dd633b76..41b26b8a7d 100644 --- a/tests/unit/models/test_message.py +++ b/tests/unit/models/test_message.py @@ -229,6 +229,19 @@ def test_from_system_prompt_creates_system_message(self) -> None: assert message.message_pieces[0].api_role == "system" assert message.message_pieces[0].original_value == "You are a helpful assistant" + def test_from_system_prompts_creates_system_messages_in_order(self) -> None: + """Test that from_system_prompts creates one system message per input, in order.""" + messages = Message.from_system_prompts("You are X.", "Always cite sources.") + + assert len(messages) == 2 + assert all(len(m.message_pieces) == 1 for m in messages) + assert all(m.message_pieces[0].api_role == "system" for m in messages) + assert [m.message_pieces[0].original_value for m in messages] == ["You are X.", "Always cite sources."] + + def test_from_system_prompts_with_no_arguments_returns_empty_list(self) -> None: + """Test that from_system_prompts returns an empty list when given no prompts.""" + assert Message.from_system_prompts() == [] + def test_from_prompt_with_empty_string(self) -> None: """Test that from_prompt works with empty string.""" message = Message.from_prompt(prompt="", role="user") From 96aa0c29ea50490326cda1fe3e1f408f4a482c4e Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Fri, 26 Jun 2026 10:33:18 -0700 Subject: [PATCH 6/7] Remove system-prompt section from attacks instructions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/instructions/attacks.instructions.md | 28 -------------------- 1 file changed, 28 deletions(-) diff --git a/.github/instructions/attacks.instructions.md b/.github/instructions/attacks.instructions.md index f8c5817c5a..2d5c4d7c96 100644 --- a/.github/instructions/attacks.instructions.md +++ b/.github/instructions/attacks.instructions.md @@ -56,31 +56,3 @@ Requirements: - Calling ``super().__init__`` with positional arguments — the base ``AttackStrategy.__init__`` is already keyword-only, so positional calls raise ``TypeError`` at runtime. Always forward via kwargs. - -## Setting the objective target's system prompt - -The objective target's system prompt is a ``system``-role message at the front -of the conversation, carried by ``prepended_conversation``. Build the message -with ``Message.from_system_prompt(text)`` and pass it as a list: - -```python -await attack.execute_async( - objective="...", - prepended_conversation=[ - Message.from_system_prompt("You are a helpful chemistry tutor.") - ], -) -``` - -Requirements: - -- This is the standard way to set a system prompt. Do **not** add a separate - ``system_prompt`` field/argument to a new attack — there is one channel, - ``prepended_conversation``. -- ``prepended_conversation`` is a plain list, so multiple system prompts and - interleaved seed turns compose: ``[Message.from_system_prompt("a"), - Message.from_system_prompt("b"), user_msg, assistant_msg]``. For the - system-prompts-only case, ``Message.from_system_prompts("a", "b")`` is a - shorthand that returns that list. -- ``SingleTurnAttackContext.system_prompt`` is **deprecated** (non-functional; - removed in 0.17.0). Never read it or wire new behavior to it. From 65e78bbe86a384c040b617a9e299a3d4e1f3eb2c Mon Sep 17 00:00:00 2001 From: Adrian Gavrila Date: Fri, 26 Jun 2026 11:39:41 -0700 Subject: [PATCH 7/7] Fix property docstring lint (no leading verb) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pyrit/models/messages/message.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyrit/models/messages/message.py b/pyrit/models/messages/message.py index 8e98a103e6..76be8e8f58 100644 --- a/pyrit/models/messages/message.py +++ b/pyrit/models/messages/message.py @@ -248,7 +248,7 @@ def get_piece_by_type( @property def api_role(self) -> ChatMessageRole: """ - Return the API-compatible role of the first message piece. + The API-compatible role of the first message piece. Maps simulated_assistant to assistant for API compatibility. All message pieces in a Message should have the same role. @@ -279,7 +279,7 @@ def is_simulated(self) -> bool: @property def conversation_id(self) -> str: """ - Return the conversation ID of the first request piece. + The conversation ID of the first request piece. Returns: str: Conversation identifier. @@ -295,7 +295,7 @@ def conversation_id(self) -> str: @property def sequence(self) -> int: """ - Return the sequence value of the first request piece. + The sequence value of the first request piece. Returns: int: Sequence number for the message turn.