Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions sdk/python/examples/responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""End-to-end example for the OpenAI Responses API client.

Run with::

python examples/responses.py

Requires a loaded model and a started web service.
"""

from __future__ import annotations

import json

from foundry_local_sdk import (
Configuration,
FoundryLocalManager,
FunctionToolDefinition,
InputImageContent,
InputTextContent,
MessageItem,
)

MODEL_ALIAS = "phi-4-mini"


def setup():
config = Configuration(app_name="ResponsesExample")
FoundryLocalManager.initialize(config)
mgr = FoundryLocalManager.instance

mgr.download_and_register_eps()

model = mgr.catalog.get_model(MODEL_ALIAS)
if model is None:
raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
if not model.is_cached:
print(f"Downloading {MODEL_ALIAS}...")
model.download(progress_callback=lambda p: print(f" {p:.1f}%", end="\r"))
print()
print(f"Loading {model.alias}...", end="")
model.load()
print("loaded!")
mgr.start_web_service()

client = mgr.create_responses_client(model.id)
return mgr, model, client


def basic_create(client):
print("\n=== 1. Basic create ===")
resp = client.create("What is 2 + 2? Answer in one word.")
print(f"status={resp.status} text={resp.output_text!r}")


def streaming(client):
print("\n=== 2. Streaming ===")
print("assistant: ", end="", flush=True)
for event in client.create_streaming("Count from 1 to 5, separated by spaces."):
if event.type == "response.output_text.delta":
print(event.delta, end="", flush=True)
elif event.type == "response.completed":
response = getattr(event, "response", None)
usage = getattr(response, "usage", None) if response is not None else None
total = getattr(usage, "total_tokens", None) if usage is not None else None
print(f"\n(completed{f', {total} tokens' if total is not None else ''})")


def multi_turn(client):
print("\n=== 3. Multi-turn ===")
first = client.create("My favorite color is green. Remember that.", store=True)
print(f"first id={first.id!r}")
second = client.create(
"What is my favorite color?",
previous_response_id=first.id,
)
print(f"second: {second.output_text!r}")


def tool_calling(client):
print("\n=== 4. Tool calling ===")
tools = [
FunctionToolDefinition(
name="multiply_numbers",
description="Multiply two integers together.",
parameters={
"type": "object",
"properties": {
"a": {"type": "integer"},
"b": {"type": "integer"},
},
"required": ["a", "b"],
},
)
]
resp = client.create("What is 7 times 6?", tools=tools)

# Find a function_call item in the output (if the model produced one).
for item in resp.output:
if getattr(item, "type", None) == "function_call":
print(f"call {item.name}({item.arguments})")
args = json.loads(item.arguments)
answer = args["a"] * args["b"]
follow = client.create(
[
MessageItem(role="user", content="What is 7 times 6?"),
item,
# The function_call_output is sent back keyed by call_id
{"type": "function_call_output", "call_id": item.call_id, "output": str(answer)},
],
tools=tools,
)
print(f"final: {follow.output_text!r}")
return
print(f"no tool call — got text: {resp.output_text!r}")


def vision(client):
print("\n=== 5. Vision ===")
# Requires a vision-capable model. Replace with a real PNG to see real output.
tiny_png = bytes.fromhex(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
"890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
"ae426082"
)
msg = MessageItem(
role="user",
content=[
InputTextContent(text="Describe this image in one sentence."),
InputImageContent.from_bytes(tiny_png, "image/png"),
],
)
try:
resp = client.create([msg])
print(f"vision response: {resp.output_text!r}")
except Exception as e:
print(f"(skipped — model may not support vision: {e})")


def main():
mgr, model, client = setup()
try:
basic_create(client)
streaming(client)
multi_turn(client)
tool_calling(client)
vision(client)
finally:
mgr.stop_web_service()
model.unload()


if __name__ == "__main__":
main()
11 changes: 5 additions & 6 deletions sdk/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
pydantic>=2.0.0
requests>=2.32.4
openai>=2.24.0
# Standard native binary packages from the ORT-Nightly PyPI feed.
foundry-local-core==1.0.0rc1
onnxruntime-core==1.24.4; sys_platform != "linux"
onnxruntime-gpu==1.24.4; sys_platform == "linux"
onnxruntime-genai-core==0.13.1; sys_platform != "linux"
onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
foundry-local-core==1.0.0
onnxruntime-gpu==1.24.4; platform_system == "Linux"
Comment thread
MaanavD marked this conversation as resolved.
Outdated
onnxruntime-core==1.24.4; platform_system != "Linux"
onnxruntime-genai-cuda==0.13.1; platform_system == "Linux"
onnxruntime-genai-core==0.13.1; platform_system != "Linux"
62 changes: 61 additions & 1 deletion sdk/python/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@

from .configuration import Configuration
from .foundry_local_manager import FoundryLocalManager
from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings
from .openai.responses_types import (
ContentPart,
DeleteResponseResult,
FunctionCallItem,
FunctionCallOutputItem,
FunctionToolDefinition,
InputFileContent,
InputImageContent,
InputItemsListResponse,
InputTextContent,
ItemReference,
ListResponsesResult,
MessageItem,
OutputTextContent,
ReasoningConfig,
ReasoningItem,
RefusalContent,
ResponseError,
ResponseInputItem,
ResponseObject,
ResponseOutputItem,
ResponseUsage,
StreamingEvent,
TextConfig,
TextFormat,
parse_streaming_event,
)
from .version import __version__

_logger = logging.getLogger(__name__)
Expand All @@ -20,4 +48,36 @@
_logger.addHandler(_sc)
_logger.propagate = False

__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
__all__ = [
"Configuration",
"ContentPart",
"DeleteResponseResult",
"FoundryLocalManager",
"FunctionCallItem",
"FunctionCallOutputItem",
"FunctionToolDefinition",
"InputFileContent",
"InputImageContent",
"InputItemsListResponse",
"InputTextContent",
"ItemReference",
"ListResponsesResult",
"MessageItem",
"OutputTextContent",
"ReasoningConfig",
"ReasoningItem",
"RefusalContent",
"ResponseError",
"ResponseInputItem",
"ResponseObject",
"ResponseOutputItem",
"ResponseUsage",
"ResponsesAPIError",
"ResponsesClient",
"ResponsesClientSettings",
"StreamingEvent",
"TextConfig",
"TextFormat",
"__version__",
"parse_streaming_event",
]
5 changes: 5 additions & 0 deletions sdk/python/src/detail/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..openai.chat_client import ChatClient
from ..openai.audio_client import AudioClient
from ..openai.embedding_client import EmbeddingClient
from ..openai.responses_client import ResponsesClient
from .model_variant import ModelVariant
from ..exception import FoundryLocalException
from .core_interop import CoreInterop
Expand Down Expand Up @@ -146,3 +147,7 @@ def get_audio_client(self) -> AudioClient:
def get_embedding_client(self) -> EmbeddingClient:
"""Get an embedding client for the currently selected variant."""
return self._selected_variant.get_embedding_client()

def create_responses_client(self, base_url: str) -> "ResponsesClient":
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"""Create a Responses API client for the currently selected variant."""
return self._selected_variant.create_responses_client(base_url)
9 changes: 9 additions & 0 deletions sdk/python/src/detail/model_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..openai.audio_client import AudioClient
from ..openai.chat_client import ChatClient
from ..openai.embedding_client import EmbeddingClient
from ..openai.responses_client import ResponsesClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -175,3 +176,11 @@ def get_audio_client(self) -> AudioClient:
def get_embedding_client(self) -> EmbeddingClient:
"""Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
return EmbeddingClient(self.id, self._core_interop)

def create_responses_client(self, base_url: str) -> ResponsesClient:
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"""Create a Responses API client for this variant.

:param base_url: Base URL of the running Foundry Local web service
(e.g. ``manager.urls[0]``).
"""
return ResponsesClient(base_url, self.id)
23 changes: 23 additions & 0 deletions sdk/python/src/foundry_local_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .detail.core_interop import CoreInterop, InteropRequest
from .detail.model_load_manager import ModelLoadManager
from .exception import FoundryLocalException
from .openai.responses_client import ResponsesClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -194,3 +195,25 @@ def stop_web_service(self):
raise FoundryLocalException(f"Error stopping web service: {response.error}")

self.urls = None

def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
"""Create a :class:`ResponsesClient` bound to the running web service.

The Responses API is HTTP-only, so the web service must be started
before calling this. Use :meth:`start_web_service` first.

Args:
model_id: Optional default model ID baked into the client. May also
be supplied per-call via ``options['model']``.

Returns:
A new :class:`ResponsesClient`.

Raises:
FoundryLocalException: If the web service has not been started.
"""
if not self.urls:
raise FoundryLocalException(
"Web service is not running. Call start_web_service() first."
)
return ResponsesClient(self.urls[0], model_id)
16 changes: 16 additions & 0 deletions sdk/python/src/imodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .openai.chat_client import ChatClient
from .openai.audio_client import AudioClient
from .openai.embedding_client import EmbeddingClient
from .openai.responses_client import ResponsesClient
from .detail.model_data_types import ModelInfo

class IModel(ABC):
Expand Down Expand Up @@ -136,6 +137,21 @@ def get_embedding_client(self) -> 'EmbeddingClient':
"""
pass

@abstractmethod
def create_responses_client(self, base_url: str) -> 'ResponsesClient':
"""
Create an OpenAI Responses API client bound to the running web service.

Unlike the other clients, the Responses API is HTTP-only and requires
the Foundry Local web service to be started. Pass the base URL
returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``),
or use :meth:`FoundryLocalManager.create_responses_client` directly.

:param base_url: Base URL of the running Foundry Local web service.
:return: ResponsesClient instance for this variant's model id.
"""
pass

@property
@abstractmethod
def variants(self) -> List['IModel']:
Expand Down
8 changes: 6 additions & 2 deletions sdk/python/src/openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""OpenAI-compatible clients for chat completions and audio transcription."""
"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API."""

from .chat_client import ChatClient, ChatClientSettings
from .audio_client import AudioClient
Expand All @@ -14,6 +14,7 @@
LiveAudioTranscriptionResponse,
TranscriptionContentPart,
)
from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError

__all__ = [
"AudioClient",
Expand All @@ -24,5 +25,8 @@
"LiveAudioTranscriptionOptions",
"LiveAudioTranscriptionResponse",
"LiveAudioTranscriptionSession",
"ResponsesAPIError",
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"ResponsesClient",
"ResponsesClientSettings",
"TranscriptionContentPart",
]
]
Loading
Loading