Add tool_choice + ForceTool (proposal 0025)

chris-colinsky · chris-colinsky · commit ea2e89e4306c · 2026-05-25T16:02:37.000-07:00
Provider.complete() gains an optional tool_choice parameter — one of
"auto", "required", "none", or a ForceTool record — constraining the
model's tool-calling behavior. Pre-send validation routes the three
§5 failure modes through ProviderInvalidRequest (§7's existing
category; no new category per the proposal's framing).

ForceTool is a frozen Pydantic model with type: Literal["tool"]
matching the spec discriminator. The OpenAI wire mapping in
_build_request_body translates the spec shape to OpenAI's body per
§8.1.1: string literals pass through verbatim; ForceTool renames
type to "function" and nests the name under a function sub-object.
None / omit preserves pre-0025 behavior — the field is absent on
the wire and the provider's own default applies.

15 unit tests cover the three validation rules, ForceTool shape
constraints (frozen, extras-forbid, Literal type), and the wire
mapping rows from §8.1.1.
diff --git a/src/openarmature/llm/__init__.py b/src/openarmature/llm/__init__.py
@@ -47,6 +47,7 @@
 from .messages import (
     AssistantMessage,
     ContentBlock,
+    ForceTool,
     ImageBlock,
     ImageSource,
     ImageSourceInline,
@@ -56,6 +57,7 @@
     TextBlock,
     Tool,
     ToolCall,
+    ToolChoice,
     ToolMessage,
     UserMessage,
 )
@@ -64,6 +66,7 @@
     strict_mode_supported,
     validate_message_list,
     validate_response_schema,
+    validate_tool_choice,
     validate_tools,
 )
 from .providers import OpenAIProvider, classify_http_error, parse_retry_after
@@ -83,6 +86,7 @@
     "AssistantMessage",
     "ContentBlock",
     "FinishReason",
+    "ForceTool",
     "ImageBlock",
     "ImageSource",
     "ImageSourceInline",
@@ -107,6 +111,7 @@
     "TextBlock",
     "Tool",
     "ToolCall",
+    "ToolChoice",
     "ToolMessage",
     "Usage",
     "UserMessage",
@@ -115,5 +120,6 @@
     "strict_mode_supported",
     "validate_message_list",
     "validate_response_schema",
+    "validate_tool_choice",
     "validate_tools",
 ]
diff --git a/src/openarmature/llm/messages.py b/src/openarmature/llm/messages.py
@@ -68,6 +68,46 @@ class Tool(BaseModel):
     parameters: dict[str, Any]
 
 
+# Spec: realizes llm-provider §5 `tool_choice` discriminated-union
+# (proposal 0025). The string-literal modes (`"auto"`, `"required"`,
+# `"none"`) and the `ForceTool` record share the `ToolChoice` alias.
+# Implementations validate `tool_choice` against `tools` before send
+# (see ``validate_tool_choice`` in :mod:`provider`); violations raise
+# ``ProviderInvalidRequest`` per §7.
+class ForceTool(BaseModel):
+    """Force the model to call exactly the named tool.
+
+    Use the record form of the §5 `tool_choice` discriminated union
+    when you need the model to call a specific tool by name. ``type``
+    is the spec-level discriminator (``"tool"``); the wire mapping
+    (§8.1.1) renames it to ``"function"`` for the OpenAI body. The
+    ``name`` MUST match a ``Tool.name`` in the supplied ``tools``
+    list; ``validate_tool_choice`` enforces this at pre-send time and
+    raises ``ProviderInvalidRequest`` on violation.
+    """
+
+    model_config = ConfigDict(extra="forbid", frozen=True)
+
+    # Frozen + extras-forbidden so a ``ForceTool`` instance is safely
+    # hashable and structurally pinned. The ``Literal["tool"]`` default
+    # makes ``ForceTool(name="search")`` ergonomic at the call site
+    # while preserving the spec-level discriminator on the type.
+    type: Literal["tool"] = "tool"
+    name: str
+
+
+# Per spec §5: `tool_choice` is one of:
+# - ``"auto"`` — the model decides.
+# - ``"required"`` — the model MUST call at least one tool.
+# - ``"none"`` — the model MUST NOT call tools.
+# - ``ForceTool(name=X)`` — the model MUST call the named tool.
+# A union of the three string literals plus the record form.
+# Callers pass ``tool_choice=None`` (the default) to omit the field
+# from the wire — the provider's own default applies, preserving
+# pre-0025 behavior.
+ToolChoice = Literal["auto", "required", "none"] | ForceTool
+
+
 # ---------------------------------------------------------------------------
 # Per-role message classes
 # ---------------------------------------------------------------------------
@@ -274,6 +314,7 @@ class ToolMessage(_MessageBase):
 __all__ = [
     "AssistantMessage",
     "ContentBlock",
+    "ForceTool",
     "ImageBlock",
     "ImageSource",
     "ImageSourceInline",
@@ -283,6 +324,7 @@ class ToolMessage(_MessageBase):
     "TextBlock",
     "Tool",
     "ToolCall",
+    "ToolChoice",
     "ToolMessage",
     "UserMessage",
 ]
diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py
@@ -48,9 +48,11 @@
 from .errors import ProviderInvalidRequest
 from .messages import (
     AssistantMessage,
+    ForceTool,
     Message,
     SystemMessage,
     Tool,
+    ToolChoice,
     ToolMessage,
     UserMessage,
 )
@@ -75,6 +77,7 @@ async def complete(
         tools: Sequence[Tool] | None = None,
         config: RuntimeConfig | None = None,
         response_schema: dict[str, Any] | type[BaseModel] | None = None,
+        tool_choice: ToolChoice | None = None,
     ) -> Response:
         """Perform a single completion call.
 
@@ -93,6 +96,12 @@ async def complete(
                 supplied, the implementation constrains the model's
                 output to the schema and populates ``Response.parsed``
                 with the validated value.
+            tool_choice: Optional tool-choice constraint (spec §5). One
+                of ``"auto"``, ``"required"``, ``"none"``, or a
+                :class:`ForceTool` record. When ``None`` (the default)
+                the wire ``tool_choice`` field is omitted and the
+                provider's own default applies. Pre-send validation
+                routes through ``provider_invalid_request``.
         """
         ...
 
@@ -174,6 +183,53 @@ def validate_tools(tools: Sequence[Tool] | None) -> None:
         seen.add(t.name)
 
 
+# Spec: realizes llm-provider §5 `tool_choice` pre-send validation
+# rules (proposal 0025). The three failure modes route through the
+# existing §7 ``provider_invalid_request`` category; no new error
+# categories per the spec's "no new category" framing. Validation
+# fires BEFORE any HTTP request is sent (fixture 031's mock_provider
+# returns an empty response list on these cases to fail the test
+# if a request escapes the validation gate).
+def validate_tool_choice(
+    tool_choice: ToolChoice | None,
+    tools: Sequence[Tool] | None,
+) -> None:
+    """Validate ``tool_choice`` against ``tools`` per spec §5.
+
+    Raises :class:`ProviderInvalidRequest` (the §7
+    ``provider_invalid_request`` category) on:
+
+    - ``tool_choice="required"`` supplied with empty / absent
+      ``tools``.
+    - ``tool_choice=ForceTool(name=X)`` supplied with empty / absent
+      ``tools``.
+    - ``tool_choice=ForceTool(name=X)`` supplied with ``X`` not in the
+      supplied tools list.
+
+    No-op when ``tool_choice`` is ``None`` (the default — preserves
+    pre-0025 behavior; the wire field is omitted and the provider's
+    own default applies). ``tool_choice="auto"`` and
+    ``tool_choice="none"`` have no ``tools``-related preconditions.
+    """
+    if tool_choice is None:
+        return
+    has_tools = bool(tools)
+    if tool_choice == "required" and not has_tools:
+        raise ProviderInvalidRequest('tool_choice="required" requires non-empty tools')
+    if isinstance(tool_choice, ForceTool):
+        if not has_tools:
+            raise ProviderInvalidRequest(
+                f"tool_choice ForceTool(name={tool_choice.name!r}) requires non-empty tools"
+            )
+        # ``tools`` is non-empty here per the preceding guard. The list
+        # is also guaranteed non-None inside this branch.
+        names = {t.name for t in tools or ()}
+        if tool_choice.name not in names:
+            raise ProviderInvalidRequest(
+                f"tool_choice name {tool_choice.name!r} not in tools (declared: {sorted(names)})"
+            )
+
+
 # ---------------------------------------------------------------------------
 # Schema helpers — used by structured-output Provider implementations
 # ---------------------------------------------------------------------------
@@ -485,5 +541,6 @@ def _resolve_ref(ref: str, root: dict[str, Any]) -> Any:
     "strict_mode_supported",
     "validate_message_list",
     "validate_response_schema",
+    "validate_tool_choice",
     "validate_tools",
 ]
diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py
@@ -76,19 +76,22 @@
 from ..messages import (
     AssistantMessage,
     ContentBlock,
+    ForceTool,
     ImageBlock,
     ImageSourceInline,
     Message,
     SystemMessage,
     TextBlock,
     Tool,
     ToolCall,
+    ToolChoice,
     UserMessage,
 )
 from ..provider import (
     strict_mode_supported,
     validate_message_list,
     validate_response_schema,
+    validate_tool_choice,
     validate_tools,
 )
 from ..response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage
@@ -232,24 +235,35 @@ async def complete(
         tools: Sequence[Tool] | None = None,
         config: RuntimeConfig | None = None,
         response_schema: dict[str, Any] | type[BaseModel] | None = None,
+        tool_choice: ToolChoice | None = None,
     ) -> Response:
         """Single completion call.
 
         Pre-send validation runs first (per-message Pydantic +
-        list-level invariants + response_schema shape check). HTTP
-        errors map to canonical provider-error categories. The
-        successful 200 body is parsed into a :class:`Response`;
-        failure to parse raises ``provider_invalid_response``; failure
-        to validate the response content against ``response_schema``
-        raises ``structured_output_invalid``.
+        list-level invariants + response_schema shape check +
+        ``tool_choice`` validation). HTTP errors map to canonical
+        provider-error categories. The successful 200 body is parsed
+        into a :class:`Response`; failure to parse raises
+        ``provider_invalid_response``; failure to validate the response
+        content against ``response_schema`` raises
+        ``structured_output_invalid``.
 
         When ``response_schema`` is supplied as a Pydantic BaseModel
         subclass, ``Response.parsed`` is a validated instance of that
         class; when supplied as a JSON Schema dict,
         ``Response.parsed`` is the deserialized dict.
+
+        ``tool_choice`` is validated against ``tools`` per spec §5:
+        ``"required"`` and the ``ForceTool`` record both demand
+        non-empty ``tools``, and ``ForceTool.name`` must appear in the
+        supplied list. Violations raise ``provider_invalid_request``
+        BEFORE any HTTP request is sent.
         """
         validate_message_list(messages)
         validate_tools(tools)
+        # ``validate_tool_choice`` runs after ``validate_tools`` so the
+        # name-membership check sees a structurally valid tools list.
+        validate_tool_choice(tool_choice, tools)
         schema_dict, schema_class = _normalize_response_schema(response_schema)
         # On the fallback path, the wire-side messages list is an
         # augmented COPY of the caller's messages — original messages
@@ -268,6 +282,7 @@ async def complete(
             # form calls (schema_dict is None) must preserve any
             # caller-supplied response_format from RuntimeConfig extras.
             include_response_format=(schema_dict is None or not self._force_prompt_augmentation_fallback),
+            tool_choice=tool_choice,
         )
 
         # Spec observability §5.5 LLM provider span: when an
@@ -399,6 +414,7 @@ def _build_request_body(
         config: RuntimeConfig | None,
         schema_dict: dict[str, Any] | None,
         include_response_format: bool = True,
+        tool_choice: ToolChoice | None = None,
     ) -> dict[str, Any]:
         body: dict[str, Any] = {
             "model": self.model,
@@ -439,6 +455,22 @@ def _build_request_body(
             # loop above; strip it here so the fallback contract holds
             # regardless of caller-supplied extras.
             body.pop("response_format", None)
+        # Per §8.1.1 (proposal 0025): map the spec-level `tool_choice`
+        # shape onto the OpenAI wire shape. ``None`` omits the field
+        # entirely so the OpenAI provider's own default applies —
+        # load-bearing for backward compat with pre-0025 callers. The
+        # string-literal modes pass through verbatim; the ``ForceTool``
+        # record renames ``type: "tool"`` → ``type: "function"`` and
+        # nests the name under a ``function`` sub-object per OpenAI's
+        # request shape.
+        if tool_choice is not None:
+            if isinstance(tool_choice, ForceTool):
+                body["tool_choice"] = {
+                    "type": "function",
+                    "function": {"name": tool_choice.name},
+                }
+            else:
+                body["tool_choice"] = tool_choice
         return body
 
     # ------------------------------------------------------------------
diff --git a/tests/unit/test_tool_choice.py b/tests/unit/test_tool_choice.py