Merge pull request #1462 from major/rlsapi-v1-shield-moderation

tisnik · web-flow · commit b7c6d082d90f · 2026-04-07T10:32:55.000+02:00
RSPEED-2809: Add shield moderation to rlsapi_v1 /infer endpoint
diff --git a/src/app/endpoints/rlsapi_v1.py b/src/app/endpoints/rlsapi_v1.py
@@ -48,6 +48,7 @@
     extract_token_usage,
     get_mcp_tools,
 )
+from utils.shields import run_shield_moderation
 from utils.suid import get_suid
 
 logger = get_logger(__name__)
@@ -318,6 +319,63 @@ def _queue_splunk_event(  # pylint: disable=too-many-arguments,too-many-position
     background_tasks.add_task(send_splunk_event, event, sourcetype)
 
 
+async def _check_shield_moderation(
+    input_text: str,
+    request_id: str,
+    background_tasks: BackgroundTasks,
+    infer_request: RlsapiV1InferRequest,
+    request: Request,
+) -> Optional[RlsapiV1InferResponse]:
+    """Run shield moderation and return a refusal response if blocked.
+
+    Uses all configured shields in Llama Stack. When no shields are
+    registered, moderation is a no-op and returns None immediately.
+
+    Args:
+        input_text: The combined user input to moderate.
+        request_id: Unique identifier for the request.
+        background_tasks: FastAPI background tasks for async Splunk event sending.
+        infer_request: The original inference request (for Splunk event context).
+        request: The FastAPI request object (for Splunk event context).
+
+    Returns:
+        An RlsapiV1InferResponse containing the refusal message if the input
+        was blocked, or None if moderation passed.
+    """
+    client = AsyncLlamaStackClientHolder().get_client()
+    moderation_result = await run_shield_moderation(client, input_text)
+
+    if moderation_result.decision != "blocked":
+        return None
+
+    logger.info(
+        "Request %s blocked by shield moderation: %s",
+        request_id,
+        moderation_result.message,
+    )
+    _queue_splunk_event(
+        background_tasks,
+        infer_request,
+        request,
+        request_id,
+        moderation_result.message,
+        0.0,
+        "infer_shield_blocked",
+    )
+    return RlsapiV1InferResponse(
+        data=RlsapiV1InferData(
+            text=moderation_result.message,
+            request_id=request_id,
+            tool_calls=None,
+            tool_results=None,
+            rag_chunks=None,
+            referenced_documents=None,
+            input_tokens=None,
+            output_tokens=None,
+        )
+    )
+
+
 def _record_inference_failure(  # pylint: disable=too-many-arguments,too-many-positional-arguments
     background_tasks: BackgroundTasks,
     infer_request: RlsapiV1InferRequest,
@@ -441,13 +499,24 @@ async def infer_endpoint(  # pylint: disable=R0914
     logger.info("Processing rlsapi v1 /infer request %s", request_id)
 
     input_source = infer_request.get_input_source()
-    model_id = await _get_default_model_id()
-    provider, model = extract_provider_and_model_from_model_id(model_id)
-    mcp_tools: list[Any] = await get_mcp_tools(request_headers=request.headers)
     logger.debug(
         "Request %s: Combined input source length: %d", request_id, len(input_source)
     )
 
+    # Run shield moderation on user input before inference.
+    # Uses all configured shields; no-op when no shields are registered.
+    # Runs before model/tool discovery so blocked requests short-circuit
+    # without incurring external I/O.
+    blocked_response = await _check_shield_moderation(
+        input_source, request_id, background_tasks, infer_request, request
+    )
+    if blocked_response is not None:
+        return blocked_response
+
+    model_id = await _get_default_model_id()
+    provider, model = extract_provider_and_model_from_model_id(model_id)
+    mcp_tools: list[Any] = await get_mcp_tools(request_headers=request.headers)
+
     start_time = time.monotonic()
 
     # Check if verbose metadata should be returned
diff --git a/tests/integration/endpoints/test_rlsapi_v1_integration.py b/tests/integration/endpoints/test_rlsapi_v1_integration.py
@@ -32,6 +32,7 @@
 from models.rlsapi.responses import RlsapiV1InferResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.suid import check_suid
+from utils.types import ShieldModerationPassed
 from version import __version__
 
 # ==========================================
@@ -80,6 +81,15 @@ def mock_authorization_fixture(mocker: MockerFixture) -> None:
     mock_authorization_resolvers(mocker)
 
 
+@pytest.fixture(autouse=True, name="mock_shield_passed")
+def mock_shield_passed_fixture(mocker: MockerFixture) -> None:
+    """Mock shield moderation to pass for all integration tests."""
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+    )
+
+
 def _create_mock_response_output(mocker: MockerFixture, text: str) -> Any:
     """Create a mock Responses API output item with assistant message."""
     mock_output_item = mocker.Mock()
diff --git a/tests/unit/app/endpoints/test_rlsapi_v1.py b/tests/unit/app/endpoints/test_rlsapi_v1.py
@@ -12,6 +12,7 @@
 
 import pytest
 from fastapi import HTTPException, status
+from llama_stack_api import OpenAIResponseMessage
 from llama_stack_client import APIConnectionError
 from pydantic import ValidationError
 from pytest_mock import MockerFixture
@@ -41,6 +42,7 @@
 from models.rlsapi.responses import RlsapiV1InferResponse
 from tests.unit.utils.auth_helpers import mock_authorization_resolvers
 from utils.suid import check_suid
+from utils.types import ShieldModerationBlocked, ShieldModerationPassed
 
 MOCK_AUTH: AuthTuple = ("mock_user_id", "mock_username", False, "mock_token")
 
@@ -125,6 +127,19 @@ def mock_auth_resolvers_fixture(mocker: MockerFixture) -> None:
     mock_authorization_resolvers(mocker)
 
 
+@pytest.fixture(autouse=True, name="mock_shield_passed")
+def mock_shield_passed_fixture(mocker: MockerFixture) -> None:
+    """Mock shield moderation to pass for all endpoint tests by default.
+
+    Individual tests can override this by patching run_shield_moderation
+    with a different return value.
+    """
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
+    )
+
+
 @pytest.fixture(name="mock_api_connection_error")
 def mock_api_connection_error_fixture(mocker: MockerFixture) -> None:
     """Mock responses.create() to raise APIConnectionError."""
@@ -850,6 +865,187 @@ async def test_infer_queues_splunk_error_event_on_failure(
     assert call_args[0][2] == "infer_error"
 
 
+# --- Test shield moderation ---
+
+
+def _create_blocked_moderation_result() -> ShieldModerationBlocked:
+    """Create a ShieldModerationBlocked result for testing."""
+    return ShieldModerationBlocked(
+        message="I can't answer that. Can I help with something else?",
+        moderation_id="modr-test-123",
+        refusal_response=OpenAIResponseMessage(
+            role="assistant",
+            content="I can't answer that. Can I help with something else?",
+        ),
+    )
+
+
+@pytest.mark.asyncio
+async def test_infer_shield_blocked_returns_refusal(
+    mocker: MockerFixture,
+    mock_configuration: AppConfig,
+    mock_llm_response: None,
+    mock_auth_resolvers: None,
+    mock_request_factory: Callable[..., Any],
+    mock_background_tasks: Any,
+) -> None:
+    """Test that blocked shield moderation returns refusal text without calling LLM."""
+    blocked = _create_blocked_moderation_result()
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=blocked),
+    )
+
+    infer_request = RlsapiV1InferRequest(question="How do I hack a server?")
+    mock_request = mock_request_factory()
+
+    response = await infer_endpoint(
+        infer_request=infer_request,
+        request=mock_request,
+        background_tasks=mock_background_tasks,
+        auth=MOCK_AUTH,
+    )
+
+    assert isinstance(response, RlsapiV1InferResponse)
+    assert response.data.text == blocked.message
+    assert response.data.request_id is not None
+    assert check_suid(response.data.request_id)
+    # Blocked response must not include verbose metadata
+    assert response.data.tool_calls is None
+    assert response.data.tool_results is None
+    assert response.data.rag_chunks is None
+    assert response.data.referenced_documents is None
+    assert response.data.input_tokens is None
+    assert response.data.output_tokens is None
+
+
+@pytest.mark.asyncio
+async def test_infer_shield_blocked_skips_llm_call(
+    mocker: MockerFixture,
+    mock_configuration: AppConfig,
+    mock_llm_response: None,
+    mock_auth_resolvers: None,
+    mock_request_factory: Callable[..., Any],
+    mock_background_tasks: Any,
+) -> None:
+    """Test that blocked shield moderation prevents any LLM call."""
+    blocked = _create_blocked_moderation_result()
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=blocked),
+    )
+    mock_retrieve = mocker.patch(
+        "app.endpoints.rlsapi_v1.retrieve_simple_response",
+        new=mocker.AsyncMock(),
+    )
+
+    infer_request = RlsapiV1InferRequest(question="How do I hack a server?")
+
+    await infer_endpoint(
+        infer_request=infer_request,
+        request=mock_request_factory(),
+        background_tasks=mock_background_tasks,
+        auth=MOCK_AUTH,
+    )
+
+    mock_retrieve.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_infer_shield_blocked_queues_splunk_event(
+    mocker: MockerFixture,
+    mock_configuration: AppConfig,
+    mock_llm_response: None,
+    mock_auth_resolvers: None,
+    mock_request_factory: Callable[..., Any],
+    mock_background_tasks: Any,
+) -> None:
+    """Test that blocked shield moderation queues a Splunk event with correct sourcetype."""
+    blocked = _create_blocked_moderation_result()
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mocker.AsyncMock(return_value=blocked),
+    )
+
+    infer_request = RlsapiV1InferRequest(question="How do I hack a server?")
+
+    await infer_endpoint(
+        infer_request=infer_request,
+        request=mock_request_factory(),
+        background_tasks=mock_background_tasks,
+        auth=MOCK_AUTH,
+    )
+
+    mock_background_tasks.add_task.assert_called_once()
+    call_args = mock_background_tasks.add_task.call_args
+    assert call_args[0][2] == "infer_shield_blocked"
+
+
+@pytest.mark.asyncio
+async def test_infer_shield_passed_proceeds_to_llm(
+    mocker: MockerFixture,
+    mock_configuration: AppConfig,
+    mock_llm_response: None,
+    mock_auth_resolvers: None,
+    mock_request_factory: Callable[..., Any],
+    mock_background_tasks: Any,
+) -> None:
+    """Test that passed shield moderation proceeds to normal LLM inference."""
+    # autouse fixture already patches with ShieldModerationPassed
+    infer_request = RlsapiV1InferRequest(question="How do I list files?")
+
+    response = await infer_endpoint(
+        infer_request=infer_request,
+        request=mock_request_factory(),
+        background_tasks=mock_background_tasks,
+        auth=MOCK_AUTH,
+    )
+
+    assert response.data.text == "This is a test LLM response."
+    # Splunk event should use normal sourcetype
+    call_args = mock_background_tasks.add_task.call_args
+    assert call_args[0][2] == "infer_with_llm"
+
+
+@pytest.mark.asyncio
+async def test_infer_shield_moderation_receives_combined_input(
+    mocker: MockerFixture,
+    mock_configuration: AppConfig,
+    mock_llm_response: None,
+    mock_auth_resolvers: None,
+    mock_request_factory: Callable[..., Any],
+    mock_background_tasks: Any,
+) -> None:
+    """Test that shield moderation receives the full combined input source."""
+    mock_moderation = mocker.AsyncMock(return_value=ShieldModerationPassed())
+    mocker.patch(
+        "app.endpoints.rlsapi_v1.run_shield_moderation",
+        new=mock_moderation,
+    )
+
+    infer_request = RlsapiV1InferRequest(
+        question="Why did this fail?",
+        context=RlsapiV1Context(
+            stdin="piped input",
+            terminal=RlsapiV1Terminal(output="permission denied"),
+        ),
+    )
+
+    await infer_endpoint(
+        infer_request=infer_request,
+        request=mock_request_factory(),
+        background_tasks=mock_background_tasks,
+        auth=MOCK_AUTH,
+    )
+
+    mock_moderation.assert_called_once()
+    # The input_text argument should be the combined input source
+    input_text = mock_moderation.call_args[0][1]
+    assert "Why did this fail?" in input_text
+    assert "piped input" in input_text
+    assert "permission denied" in input_text
+
+
 @pytest.mark.asyncio
 async def test_infer_splunk_event_includes_rh_identity_context(
     mocker: MockerFixture,