feat: Add modality support detection system for prompt targets

Robert Fitzpatrick · Robert Fitzpatrick · commit ed5cdff6456f · 2026-02-19T21:58:29.000Z
- Add SUPPORTED_INPUT_MODALITIES class attribute to PromptTarget base class - Add input_modality_supported() and supports_multimodal_input() methods - Add supported_input_modalities property that returns list of supported modalities - Add supported_input_modalities and supports_conversation_history fields to TargetIdentifier - Update PromptTarget._create_identifier() to populate new fields - Implement modality declarations in OpenAIChatTarget (text, image_path), TextTarget (text), and HuggingFaceChatTarget (text) - Add comprehensive tests for modality support detection This system enables attacks to detect whether targets support multimodal input (text + other modalities) and route accordingly, addressing the limitation mentioned in PR microsoft#1377 where multimodal attacks need to know target capabilities.
diff --git a/tests/unit/prompt_target/test_modality_support.py b/tests/unit/prompt_target/test_modality_support.py
@@ -0,0 +1,143 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Tests for modality support detection in prompt targets.
+"""
+
+import pytest
+
+from pyrit.memory import CentralMemory
+from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget
+from pyrit.prompt_target.text_target import TextTarget
+from pyrit.prompt_target.hugging_face.hugging_face_chat_target import HuggingFaceChatTarget
+
+
+class TestModalitySupport:
+    """
+    Test cases for modality support detection in prompt targets.
+    """
+
+    @pytest.fixture(autouse=True)
+    def sqlite_instance(self):
+        """Initialize in-memory SQLite database for testing."""
+        memory_instance = CentralMemory()
+        CentralMemory.set_memory_instance(memory_instance)
+        yield memory_instance
+
+    def test_openai_chat_target_supports_multimodal(self, sqlite_instance):
+        """
+        Test that OpenAIChatTarget declares multimodal support correctly.
+        """
+        target = OpenAIChatTarget(
+            endpoint="https://test.openai.azure.com/",
+            model_name="gpt-4o",
+            api_key="test",
+        )
+
+        # OpenAI Chat should support both text and image_path
+        assert target.input_modality_supported("text")
+        assert target.input_modality_supported("image_path")
+        assert not target.input_modality_supported("audio")
+        assert target.supports_multimodal_input()
+        assert "text" in target.supported_input_modalities
+        assert "image_path" in target.supported_input_modalities
+        assert len(target.supported_input_modalities) == 2
+
+    def test_text_target_supports_text_only(self, sqlite_instance):
+        """
+        Test that TextTarget declares text-only support correctly.
+        """
+        target = TextTarget()
+
+        # TextTarget should only support text
+        assert target.input_modality_supported("text")
+        assert not target.input_modality_supported("image_path")
+        assert not target.input_modality_supported("audio")
+        assert not target.supports_multimodal_input()
+        assert target.supported_input_modalities == ["text"]
+        assert len(target.supported_input_modalities) == 1
+
+    def test_huggingface_chat_target_supports_text_only(self, sqlite_instance):
+        """
+        Test that HuggingFaceChatTarget declares text-only support correctly.
+        """
+        target = HuggingFaceChatTarget(
+            model_id="microsoft/DialoGPT-medium",
+            use_cuda=False,  # Avoid GPU dependency in tests
+        )
+
+        # HuggingFace Chat should only support text (for now)
+        assert target.input_modality_supported("text")
+        assert not target.input_modality_supported("image_path")
+        assert not target.input_modality_supported("audio")
+        assert not target.supports_multimodal_input()
+        assert target.supported_input_modalities == ["text"]
+        assert len(target.supported_input_modalities) == 1
+
+    def test_target_identifier_includes_modality_fields(self, sqlite_instance):
+        """
+        Test that target identifiers include modality support information.
+        """
+        # Test multimodal target (OpenAI)
+        openai_target = OpenAIChatTarget(
+            endpoint="https://test.openai.azure.com/",
+            model_name="gpt-4o",
+            api_key="test",
+        )
+        openai_id = openai_target.get_identifier()
+        assert openai_id.supported_input_modalities is not None
+        assert "text" in openai_id.supported_input_modalities
+        assert "image_path" in openai_id.supported_input_modalities
+        assert openai_id.supports_conversation_history is True
+
+        # Test text-only target
+        text_target = TextTarget()
+        text_id = text_target.get_identifier()
+        assert text_id.supported_input_modalities == ["text"]
+        assert text_id.supports_conversation_history is True
+
+    def test_modality_support_detection_differentiates_targets(self, sqlite_instance):
+        """
+        Test that modality support detection can differentiate between target types.
+        """
+        openai_target = OpenAIChatTarget(
+            endpoint="https://test.openai.azure.com/",
+            model_name="gpt-4o",
+            api_key="test",
+        )
+        text_target = TextTarget()
+
+        # Test differentiation
+        assert openai_target.supports_multimodal_input()
+        assert not text_target.supports_multimodal_input()
+
+        # Test that they support different modality sets
+        assert set(openai_target.supported_input_modalities) != set(text_target.supported_input_modalities)
+
+        # Both should support text
+        assert openai_target.input_modality_supported("text")
+        assert text_target.input_modality_supported("text")
+
+        # Only OpenAI should support image_path
+        assert openai_target.input_modality_supported("image_path")
+        assert not text_target.input_modality_supported("image_path")
+
+    def test_modality_support_properties_are_immutable(self, sqlite_instance):
+        """
+        Test that modality support properties return copies and are not directly modifiable.
+        """
+        target = OpenAIChatTarget(
+            endpoint="https://test.openai.azure.com/",
+            model_name="gpt-4o",
+            api_key="test",
+        )
+
+        # Get the list and modify it
+        modalities = target.supported_input_modalities
+        original_length = len(modalities)
+        modalities.append("fake_modality")
+
+        # Verify the target's actual list wasn't modified
+        assert len(target.supported_input_modalities) == original_length
+        assert "fake_modality" not in target.supported_input_modalities