microsoft · apsonawane · May 6, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 24, 2026
diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py
@@ -0,0 +1,157 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""End-to-end example for the OpenAI Responses API client.
+
+Run with::
+
+    python examples/responses.py
+
+Requires a loaded model and a started web service.
+"""
+
+from __future__ import annotations
+
+import json
+
+from foundry_local_sdk import (
+    Configuration,
+    FoundryLocalManager,
+    FunctionToolDefinition,
+    InputImageContent,
+    InputTextContent,
+    MessageItem,
+)
+
+MODEL_ALIAS = "phi-4-mini"
+
+
+def setup():
+    config = Configuration(app_name="ResponsesExample")
+    FoundryLocalManager.initialize(config)
+    mgr = FoundryLocalManager.instance
+
+    mgr.download_and_register_eps()
+
+    model = mgr.catalog.get_model(MODEL_ALIAS)
+    if model is None:
+        raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
+    if not model.is_cached:
+        print(f"Downloading {MODEL_ALIAS}...")
+        model.download(progress_callback=lambda p: print(f"  {p:.1f}%", end="\r"))
+        print()
+    print(f"Loading {model.alias}...", end="")
+    model.load()
+    print("loaded!")
+    mgr.start_web_service()
+
+    client = mgr.create_responses_client(model.id)
+    return mgr, model, client
+
+
+def basic_create(client):
+    print("\n=== 1. Basic create ===")
+    resp = client.create("What is 2 + 2? Answer in one word.")
+    print(f"status={resp.status}  text={resp.output_text!r}")
+
+
+def streaming(client):
+    print("\n=== 2. Streaming ===")
+    print("assistant: ", end="", flush=True)
+    for event in client.create_streaming("Count from 1 to 5, separated by spaces."):
+        if event.type == "response.output_text.delta":
+            print(event.delta, end="", flush=True)
+        elif event.type == "response.completed":
+            response = getattr(event, "response", None)
+            usage = getattr(response, "usage", None) if response is not None else None
+            total = getattr(usage, "total_tokens", None) if usage is not None else None
+            print(f"\n(completed{f', {total} tokens' if total is not None else ''})")
+
+
+def multi_turn(client):
+    print("\n=== 3. Multi-turn ===")
+    first = client.create("My favorite color is green. Remember that.", store=True)
+    print(f"first id={first.id!r}")
+    second = client.create(
+        "What is my favorite color?",
+        previous_response_id=first.id,
+    )
+    print(f"second: {second.output_text!r}")
+
+
+def tool_calling(client):
+    print("\n=== 4. Tool calling ===")
+    tools = [
+        FunctionToolDefinition(
+            name="multiply_numbers",
+            description="Multiply two integers together.",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "a": {"type": "integer"},
+                    "b": {"type": "integer"},
+                },
+                "required": ["a", "b"],
+            },
+        )
+    ]
+    resp = client.create("What is 7 times 6?", tools=tools)
+
+    # Find a function_call item in the output (if the model produced one).
+    for item in resp.output:
+        if getattr(item, "type", None) == "function_call":
+            print(f"call {item.name}({item.arguments})")
+            args = json.loads(item.arguments)
+            answer = args["a"] * args["b"]
+            follow = client.create(
+                [
+                    MessageItem(role="user", content="What is 7 times 6?"),
+                    item,
+                    # The function_call_output is sent back keyed by call_id
+                    {"type": "function_call_output", "call_id": item.call_id, "output": str(answer)},
+                ],
+                tools=tools,
+            )
+            print(f"final: {follow.output_text!r}")
+            return
+    print(f"no tool call — got text: {resp.output_text!r}")
+
+
+def vision(client):
+    print("\n=== 5. Vision ===")
+    # Requires a vision-capable model. Replace with a real PNG to see real output.
+    tiny_png = bytes.fromhex(
+        "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+        "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+        "ae426082"
+    )
+    msg = MessageItem(
+        role="user",
+        content=[
+            InputTextContent(text="Describe this image in one sentence."),
+            InputImageContent.from_bytes(tiny_png, "image/png"),
+        ],
+    )
+    try:
+        resp = client.create([msg])
+        print(f"vision response: {resp.output_text!r}")
+    except Exception as e:
+        print(f"(skipped — model may not support vision: {e})")
+
+
+def main():
+    mgr, model, client = setup()
+    try:
+        basic_create(client)
+        streaming(client)
+        multi_turn(client)
+        tool_calling(client)
+        vision(client)
+    finally:
+        mgr.stop_web_service()
+        model.unload()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
@@ -1,9 +1,8 @@
 pydantic>=2.0.0
 requests>=2.32.4
 openai>=2.24.0
-# Standard native binary packages from the ORT-Nightly PyPI feed.
-foundry-local-core==1.0.0rc1
-onnxruntime-core==1.24.4; sys_platform != "linux"
-onnxruntime-gpu==1.24.4; sys_platform == "linux"
-onnxruntime-genai-core==0.13.1; sys_platform != "linux"
-onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
+foundry-local-core==1.0.0
+onnxruntime-gpu==1.24.4; platform_system == "Linux"
+onnxruntime-core==1.24.4; platform_system != "Linux"
+onnxruntime-genai-cuda==0.13.1; platform_system == "Linux"
+onnxruntime-genai-core==0.13.1; platform_system != "Linux"
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
@@ -7,6 +7,34 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
+from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings
+from .openai.responses_types import (
+    ContentPart,
+    DeleteResponseResult,
+    FunctionCallItem,
+    FunctionCallOutputItem,
+    FunctionToolDefinition,
+    InputFileContent,
+    InputImageContent,
+    InputItemsListResponse,
+    InputTextContent,
+    ItemReference,
+    ListResponsesResult,
+    MessageItem,
+    OutputTextContent,
+    ReasoningConfig,
+    ReasoningItem,
+    RefusalContent,
+    ResponseError,
+    ResponseInputItem,
+    ResponseObject,
+    ResponseOutputItem,
+    ResponseUsage,
+    StreamingEvent,
+    TextConfig,
+    TextFormat,
+    parse_streaming_event,
+)
 from .version import __version__
 
 _logger = logging.getLogger(__name__)
@@ -20,4 +48,36 @@
 _logger.addHandler(_sc)
 _logger.propagate = False
 
-__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
+__all__ = [
+    "Configuration",
+    "ContentPart",
+    "DeleteResponseResult",
+    "FoundryLocalManager",
+    "FunctionCallItem",
+    "FunctionCallOutputItem",
+    "FunctionToolDefinition",
+    "InputFileContent",
+    "InputImageContent",
+    "InputItemsListResponse",
+    "InputTextContent",
+    "ItemReference",
+    "ListResponsesResult",
+    "MessageItem",
+    "OutputTextContent",
+    "ReasoningConfig",
+    "ReasoningItem",
+    "RefusalContent",
+    "ResponseError",
+    "ResponseInputItem",
+    "ResponseObject",
+    "ResponseOutputItem",
+    "ResponseUsage",
+    "ResponsesAPIError",
+    "ResponsesClient",
+    "ResponsesClientSettings",
+    "StreamingEvent",
+    "TextConfig",
+    "TextFormat",
+    "__version__",
+    "parse_streaming_event",
+]
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
@@ -11,6 +11,7 @@
 from ..openai.chat_client import ChatClient
 from ..openai.audio_client import AudioClient
 from ..openai.embedding_client import EmbeddingClient
+from ..openai.responses_client import ResponsesClient
 from .model_variant import ModelVariant
 from ..exception import FoundryLocalException
 from .core_interop import CoreInterop
@@ -146,3 +147,7 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Get an embedding client for the currently selected variant."""
         return self._selected_variant.get_embedding_client()
+
+    def create_responses_client(self, base_url: str) -> "ResponsesClient":
+        """Create a Responses API client for the currently selected variant."""
+        return self._selected_variant.create_responses_client(base_url)
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
@@ -17,6 +17,7 @@
 from ..openai.audio_client import AudioClient
 from ..openai.chat_client import ChatClient
 from ..openai.embedding_client import EmbeddingClient
+from ..openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -175,3 +176,11 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
         return EmbeddingClient(self.id, self._core_interop)
+
+    def create_responses_client(self, base_url: str) -> ResponsesClient:
+        """Create a Responses API client for this variant.
+
+        :param base_url: Base URL of the running Foundry Local web service
+            (e.g. ``manager.urls[0]``).
+        """
+        return ResponsesClient(base_url, self.id)
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
@@ -20,6 +20,7 @@
 from .detail.core_interop import CoreInterop, InteropRequest
 from .detail.model_load_manager import ModelLoadManager
 from .exception import FoundryLocalException
+from .openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -194,3 +195,25 @@ def stop_web_service(self):
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
 
             self.urls = None
+
+    def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
+        """Create a :class:`ResponsesClient` bound to the running web service.
+
+        The Responses API is HTTP-only, so the web service must be started
+        before calling this. Use :meth:`start_web_service` first.
+
+        Args:
+            model_id: Optional default model ID baked into the client. May also
+                be supplied per-call via ``options['model']``.
+
+        Returns:
+            A new :class:`ResponsesClient`.
+
+        Raises:
+            FoundryLocalException: If the web service has not been started.
+        """
+        if not self.urls:
+            raise FoundryLocalException(
+                "Web service is not running. Call start_web_service() first."
+            )
+        return ResponsesClient(self.urls[0], model_id)
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
@@ -10,6 +10,7 @@
 from .openai.chat_client import ChatClient
 from .openai.audio_client import AudioClient
 from .openai.embedding_client import EmbeddingClient
+from .openai.responses_client import ResponsesClient
 from .detail.model_data_types import ModelInfo
 
 class IModel(ABC):
@@ -136,6 +137,21 @@ def get_embedding_client(self) -> 'EmbeddingClient':
         """
         pass
 
+    @abstractmethod
+    def create_responses_client(self, base_url: str) -> 'ResponsesClient':
+        """
+        Create an OpenAI Responses API client bound to the running web service.
+
+        Unlike the other clients, the Responses API is HTTP-only and requires
+        the Foundry Local web service to be started. Pass the base URL
+        returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``),
+        or use :meth:`FoundryLocalManager.create_responses_client` directly.
+
+        :param base_url: Base URL of the running Foundry Local web service.
+        :return: ResponsesClient instance for this variant's model id.
+        """
+        pass
+
     @property
     @abstractmethod
     def variants(self) -> List['IModel']:

diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""OpenAI-compatible clients for chat completions and audio transcription."""
+"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API."""
 
 from .chat_client import ChatClient, ChatClientSettings
 from .audio_client import AudioClient
@@ -14,6 +14,7 @@
     LiveAudioTranscriptionResponse,
     TranscriptionContentPart,
 )
+from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError
 
 __all__ = [
     "AudioClient",
@@ -24,5 +25,8 @@
     "LiveAudioTranscriptionOptions",
     "LiveAudioTranscriptionResponse",
     "LiveAudioTranscriptionSession",
+    "ResponsesAPIError",
+    "ResponsesClient",
+    "ResponsesClientSettings",
     "TranscriptionContentPart",
-]
+]