LCORE-1830: Implement Question Validity Safety Capability in Pydantic AI

Jazzcort · Jazzcort · commit a25393f661e4 · 2026-06-18T11:31:40.000-04:00
Implement an LLM-based guardrail that classifies user questions
as on-topic (Kubernetes/OpenShift or customized topic) before
the main agent processes them. Off-topic questions are
short-circuited with a rejection message, bypassing the primary
agent entirely. Includes unit tests.
diff --git a/src/constants.py b/src/constants.py
@@ -248,6 +248,47 @@
     "I cannot process this request due to policy restrictions."
 )
 
+
+DEFAULT_MODEL_PROMPT: Final[str] = """
+Instructions:
+- You are a question classifying tool
+- You are an expert in kubernetes and openshift
+- Your job is to determine where or a user's question is related to kubernetes and/or openshift technologies and to provide a one-word response.
+- If a question appears to be related to kubernetes or openshift technologies, answer with the word ${allowed}, otherwise answer with the word ${rejected}.
+- Do not explain your answer, just provide the one-word response. Do not give any other response.
+- If the given question is an empty string, answer with the word ${rejected}
+
+
+Example Question:
+Why is the sky blue?
+Example Response:
+${rejected}
+
+Example Question:
+Why is the grass green?
+Example Response:
+${rejected}
+
+Example Question:
+Why is sand yellow?
+Example Response:
+${rejected}
+
+Example Question:
+Can you help configure my cluster to automatically scale?
+Example Response:
+${allowed}
+
+Question:
+${message}
+Response:
+"""
+
+DEFAULT_INVALID_QUESTION_RESPONSE: Final[str] = """
+Hi, I'm the OpenShift Lightspeed assistant, I can help you with questions about OpenShift, 
+please ask me a question related to OpenShift.
+"""
+
 # Placeholder slug used in responses when the server substituted its own
 # system prompt for the client's instructions.  Avoids leaking the actual
 # server prompt back to the client.
diff --git a/src/models/config.py b/src/models/config.py
@@ -2060,6 +2060,24 @@ class SkillsConfiguration(ConfigurationBase):
     )
 
 
+class QuestionValidityConfig(ConfigurationBase):
+    """Configuration for the question validity guardrail."""
+
+    model_id: str = Field(
+        ..., title="Model id", description="The model_id to use for the guard"
+    )
+    model_prompt: str = Field(
+        default=constants.DEFAULT_MODEL_PROMPT,
+        title="Model prompt",
+        description="The default prompt sent to the LLM used to validate the Users' question.",
+    )
+    invalid_question_response: str = Field(
+        default=constants.DEFAULT_INVALID_QUESTION_RESPONSE,
+        title="Invalid question response",
+        description="The default response when the Users' question is determined to be invalid.",
+    )
+
+
 class Configuration(ConfigurationBase):
     """Global service configuration."""
 
diff --git a/src/pydantic_ai_lightspeed/capabilities/__init__.py b/src/pydantic_ai_lightspeed/capabilities/__init__.py
@@ -0,0 +1,10 @@
+"""Pluggable capabilities for pydantic-ai agents in Lightspeed.
+
+Provides safety, guardrail, and policy capabilities that hook into
+pydantic-ai's AbstractCapability lifecycle to enforce constraints
+before, during, or after agent runs.
+"""
+
+from pydantic_ai_lightspeed.capabilities.question_validity import QuestionValidity
+
+__all__ = ["QuestionValidity"]
diff --git a/src/pydantic_ai_lightspeed/capabilities/question_validity/__init__.py b/src/pydantic_ai_lightspeed/capabilities/question_validity/__init__.py
@@ -0,0 +1,7 @@
+"""Question validity capability for agent input validation."""
+
+from pydantic_ai_lightspeed.capabilities.question_validity._capability import (
+    QuestionValidity,
+)
+
+__all__ = ["QuestionValidity"]
diff --git a/src/pydantic_ai_lightspeed/capabilities/question_validity/_capability.py b/src/pydantic_ai_lightspeed/capabilities/question_validity/_capability.py
@@ -0,0 +1,132 @@
+"""Question validity capability for filtering off-topic user queries.
+
+This module implements a guardrail that classifies user questions as
+Kubernetes/OpenShift-related or not (It can be customized to any
+topic as well), using an LLM-based check before the main agent
+processes the request. Invalid questions are rejected with a
+predefined response, bypassing the primary agent entirely.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from string import Template
+
+from pydantic_ai import AgentRunResult, RunContext
+from pydantic_ai._agent_graph import GraphAgentState
+from pydantic_ai.capabilities import AbstractCapability, WrapRunHandler
+from pydantic_ai.direct import model_request
+from pydantic_ai.messages import ModelRequest, TextContent, UserContent
+from pydantic_ai.models import Model, infer_model
+
+from log import get_logger
+from models.config import (
+    QuestionValidityConfig,
+)
+
+logger = get_logger(__name__)
+
+SUBJECT_REJECTED = "REJECTED"
+SUBJECT_ALLOWED = "ALLOWED"
+
+
+def _extract_message_str_from_user_content(user_content: Sequence[UserContent]) -> str:
+    """Extract and combine all text content into a string from a UserContent sequence.
+
+    Parameters:
+        user_content: A sequence of user content items to extract text from.
+
+    Returns:
+        A single string with all text content joined by newlines.
+    """
+    str_arr: list[str] = []
+    for c in user_content:
+        match c:
+            case str() as s:
+                str_arr.append(s)
+            case TextContent(content=c):
+                str_arr.append(c)
+
+    return "\n".join(str_arr)
+
+
+@dataclass
+class QuestionValidity(AbstractCapability[None]):
+    """Block or modify user input based on a guardrail check.
+
+    The guard function receives the user prompt and returns True if safe.
+
+    Example:
+        ```python
+        from pydantic_ai import Agent
+        from pydantic_ai.models.openai import OpenAIResponsesModel
+
+        model = OpenAIResponsesModel("gpt-4o-mini")
+        agent = Agent("openai:gpt-4.1", capabilities=[QuestionValidity(model)])
+        ```
+    """
+
+    config: QuestionValidityConfig
+    _model: Model = field(init=False)
+
+    def __post_init__(self) -> None:
+        """Initialize the model instance from the configured model ID."""
+        self._model = infer_model(self.config.model_id)
+
+    def _build_prompt(self, message: str | Sequence[UserContent] | None) -> str:
+        """Build the classification prompt from the user message.
+
+        Parameters:
+            message: The user input as a string, sequence of user content, or None.
+
+        Returns:
+            The rendered prompt string ready to send to the validity model.
+        """
+        match message:
+            case str() as s:
+                _message = s
+            case Sequence() as seq:
+                _message = _extract_message_str_from_user_content(seq)
+            case None:
+                _message = ""
+
+        return Template(self.config.model_prompt).substitute(
+            message=_message, allowed=SUBJECT_ALLOWED, rejected=SUBJECT_REJECTED
+        )
+
+    async def wrap_run(
+        self, ctx: RunContext, *, handler: WrapRunHandler
+    ) -> AgentRunResult:
+        """Run the question validity check before delegating to the main agent.
+
+        Sends the user prompt to the validity model for classification.
+        If the question is allowed, the handler proceeds normally.
+        Otherwise, a rejection response is returned and the main agent
+        is bypassed.
+
+        Parameters:
+            ctx: The run context containing the user prompt and usage tracker.
+            handler: The handler that invokes the main agent run.
+
+        Returns:
+            The agent run result, either from the main agent or a rejection.
+        """
+        prompt = self._build_prompt(ctx.prompt)
+
+        result = await model_request(
+            model=self._model,
+            messages=[ModelRequest.user_text_prompt(prompt)],
+        )
+
+        # Include token usage from the question validity request
+        ctx.usage.incr(result.usage)
+
+        if result.text is not None and result.text.strip() == SUBJECT_ALLOWED:
+            return await handler()  # proceed with the real run
+
+        # short-circuit: return the rejection message with shield usage tracked
+        state = GraphAgentState(usage=ctx.usage)
+        return AgentRunResult(
+            output=self.config.invalid_question_response, _state=state
+        )
diff --git a/tests/unit/pydantic_ai_lightspeed/capabilities/__init__.py b/tests/unit/pydantic_ai_lightspeed/capabilities/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for pydantic_ai_lightspeed capabilities."""
diff --git a/tests/unit/pydantic_ai_lightspeed/capabilities/question_validity/__init__.py b/tests/unit/pydantic_ai_lightspeed/capabilities/question_validity/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for question validity capability."""
diff --git a/tests/unit/pydantic_ai_lightspeed/capabilities/question_validity/test_capability.py b/tests/unit/pydantic_ai_lightspeed/capabilities/question_validity/test_capability.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Unit tests for pydantic_ai_lightspeed capabilities."""`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Unit tests for question validity capability."""`