microsoft
diff --git a/‎sdk/rust/Cargo.toml‎
Lines changed: 7 additions & 1 deletion b/‎sdk/rust/Cargo.toml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎sdk/rust/examples/responses.rs‎
Lines changed: 245 additions & 0 deletions b/‎sdk/rust/examples/responses.rs‎
Lines changed: 245 additions & 0 deletions
diff --git a/‎sdk/rust/src/foundry_local_manager.rs‎
Lines changed: 15 additions & 0 deletions b/‎sdk/rust/src/foundry_local_manager.rs‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎sdk/rust/src/lib.rs‎
Lines changed: 10 additions & 0 deletions b/‎sdk/rust/src/lib.rs‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎sdk/rust/src/openai/mod.rs‎
Lines changed: 10 additions & 0 deletions b/‎sdk/rust/src/openai/mod.rs‎
Lines changed: 10 additions & 0 deletions
@@ -24,7 +24,9 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
 tokio-stream = "0.1"
 tokio-util = "0.7"
 futures-core = "0.3"
-reqwest = { version = "0.12", features = ["json"] }
+reqwest = { version = "0.12", features = ["json", "stream"] }
+bytes = "1"
+async-stream = "0.3"
 urlencoding = "2"
 async-openai = { version = "0.33", default-features = false, features = ["chat-completion-types", "embedding-types"] }
 
@@ -34,6 +36,10 @@ zip = "2"
 serde_json = "1"
 serde = { version = "1", features = ["derive"] }
 
+[[example]]
+name = "responses"
+path = "examples/responses.rs"
+
 [[example]]
 name = "chat_completion"
 path = "examples/chat_completion.rs"
 
@@ -0,0 +1,245 @@
+//! Responses API example demonstrating non-streaming and streaming usage,
+//! tool calling, and multi-turn conversations.
+
+use std::io::{self, Write};
+
+use foundry_local_sdk::{
+    FoundryLocalConfig, FoundryLocalError, FoundryLocalManager, FunctionToolDefinition,
+    ResponseInput, ResponseItem, StreamingEvent,
+};
+use serde_json::json;
+use tokio_stream::StreamExt;
+
+type Result<T> = std::result::Result<T, FoundryLocalError>;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // ── 1. Initialise the manager ────────────────────────────────────────────
+    let config = FoundryLocalConfig::new("foundry_local_responses_example");
+    let manager = FoundryLocalManager::create(config)?;
+
+    // ── 2. Start the web service ─────────────────────────────────────────────
+    println!("Starting web service…");
+    manager.start_web_service().await?;
+    println!("Web service URLs: {:?}", manager.urls()?);
+
+    // ── 3. Pick a model ──────────────────────────────────────────────────────
+    let models = manager.catalog().get_models().await?;
+    let model_alias = ["phi-4-mini", "phi-3.5-mini", "qwen2.5-0.5b"]
+        .iter()
+        .find(|alias| models.iter().any(|m| m.alias() == **alias))
+        .map(|s| s.to_string())
+        .or_else(|| models.first().map(|m| m.alias().to_string()))
+        .expect("No models available in the catalog");
+
+    println!("Using model: {model_alias}");
+    let model = manager.catalog().get_model(&model_alias).await?;
+
+    if !model.is_cached().await? {
+        println!("Downloading model {model_alias}…");
+        model.download(None::<fn(f64)>).await?;
+    }
+    model.load().await?;
+    println!("Model loaded.");
+
+    // ── 4. Create the Responses client ───────────────────────────────────────
+    let mut client = manager.get_responses_client(Some(&model.info().id))?;
+    client.settings.store = Some(true);
+
+    // ── 5. Non-streaming request ─────────────────────────────────────────────
+    println!("\n─── Non-streaming ───────────────────────────────────────────────");
+    let response = client
+        .create(
+            ResponseInput::Text("What is the capital of France? Reply in one word.".into()),
+            None,
+        )
+        .await?;
+
+    println!("Status : {}", response.status);
+    println!("Answer : {}", response.output_text());
+    if let Some(usage) = &response.usage {
+        println!(
+            "Tokens : {} in / {} out",
+            usage.input_tokens, usage.output_tokens
+        );
+    }
+
+    // ── 6. Streaming request ─────────────────────────────────────────────────
+    println!("\n─── Streaming ───────────────────────────────────────────────────");
+    print!("Story : ");
+    io::stdout().flush().ok();
+
+    let mut stream = client
+        .create_streaming(
+            ResponseInput::Text(
+                "Tell me a two-sentence story about a robot that loves ice cream.".into(),
+            ),
+            None,
+        )
+        .await?;
+
+    let mut full_text = String::new();
+    while let Some(event) = stream.next().await {
+        match event? {
+            StreamingEvent::OutputTextDelta { delta, .. } => {
+                print!("{delta}");
+                io::stdout().flush().ok();
+                full_text.push_str(&delta);
+            }
+            StreamingEvent::ResponseCompleted { response, .. } => {
+                if let Some(usage) = response.usage.as_ref() {
+                    println!("\n[completed, {} output tokens]", usage.output_tokens);
+                } else {
+                    println!("\n[completed]");
+                }
+            }
+            _ => {}
+        }
+    }
+
+    // ── 7. Multi-turn: follow-up using previous_response_id ─────────────────
+    println!("\n─── Multi-turn ──────────────────────────────────────────────────");
+    let first = client
+        .create(
+            ResponseInput::Text("My favourite number is 42. Remember this.".into()),
+            None,
+        )
+        .await?;
+    println!("Turn 1: {}", first.output_text());
+
+    let follow_up_opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is my favourite number?".into()),
+        previous_response_id: Some(first.id.clone()),
+        instructions: None,
+        tools: None,
+        tool_choice: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let second = client
+        .create(
+            ResponseInput::Text("What is my favourite number?".into()),
+            Some(follow_up_opts),
+        )
+        .await?;
+    println!("Turn 2: {}", second.output_text());
+
+    // ── 8. Tool calling ──────────────────────────────────────────────────────
+    println!("\n─── Tool calling ────────────────────────────────────────────────");
+    let add_tool = FunctionToolDefinition {
+        tool_type: "function".into(),
+        name: "add".into(),
+        description: Some("Add two integers and return the sum.".into()),
+        parameters: Some(json!({
+            "type": "object",
+            "properties": {
+                "a": { "type": "integer", "description": "First addend" },
+                "b": { "type": "integer", "description": "Second addend" }
+            },
+            "required": ["a", "b"]
+        })),
+        strict: None,
+    };
+
+    let tool_opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
+        tools: Some(vec![add_tool]),
+        tool_choice: Some(json!("required")),
+        instructions: None,
+        previous_response_id: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let tool_response = client
+        .create(
+            ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
+            Some(tool_opts),
+        )
+        .await?;
+
+    if let Some(ResponseItem::FunctionCall {
+        call_id,
+        name,
+        arguments,
+        ..
+    }) = tool_response
+        .output
+        .iter()
+        .find(|i| matches!(i, ResponseItem::FunctionCall { .. }))
+    {
+        println!("Model called tool: {name}({arguments})");
+        let args: serde_json::Value = serde_json::from_str(arguments)?;
+        let a = args["a"].as_i64().unwrap_or(0);
+        let b = args["b"].as_i64().unwrap_or(0);
+        let sum = a + b;
+
+        let result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
+            id: None,
+            call_id: call_id.clone(),
+            output: sum.to_string(),
+            status: None,
+        }]);
+
+        let final_opts = foundry_local_sdk::ResponseCreateRequest {
+            model: model.info().id.clone(),
+            input: result_input.clone(),
+            previous_response_id: Some(tool_response.id.clone()),
+            instructions: None,
+            tools: None,
+            tool_choice: None,
+            stream: None,
+            store: Some(true),
+            temperature: Some(0.0),
+            top_p: None,
+            max_output_tokens: None,
+            frequency_penalty: None,
+            presence_penalty: None,
+            seed: None,
+            truncation: None,
+            parallel_tool_calls: None,
+            metadata: None,
+            user: None,
+            reasoning: None,
+            text: None,
+        };
+
+        let final_response = client.create(result_input, Some(final_opts)).await?;
+        println!("Tool result: {}", final_response.output_text());
+    } else {
+        println!("No tool call in response (model may not support tool calling)");
+    }
+
+    // ── 9. Clean up ──────────────────────────────────────────────────────────
+    model.unload().await?;
+    manager.stop_web_service().await?;
+    println!("\nDone.");
+    Ok(())
+}
@@ -13,6 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger};
 use crate::detail::core_interop::CoreInterop;
 use crate::detail::ModelLoadManager;
 use crate::error::{FoundryLocalError, Result};
+use crate::openai::responses_client::ResponsesClient;
 use crate::types::{EpDownloadResult, EpInfo};
 
 /// Global singleton holder — only stores a successfully initialised manager.
@@ -135,6 +136,20 @@ impl FoundryLocalManager {
         Ok(())
     }
 
+    /// Get a [`ResponsesClient`] for the given model.
+    ///
+    /// The web service must be started before using the returned client.
+    /// Pass `model_id = None` to defer model selection to per-request options.
+    pub fn get_responses_client(&self, model_id: Option<&str>) -> Result<ResponsesClient> {
+        let urls = self.urls()?;
+        let base_url = urls.first().ok_or_else(|| FoundryLocalError::Validation {
+            reason:
+                "Web service not started. Call start_web_service() before getting a ResponsesClient."
+                    .into(),
+        })?;
+        Ok(ResponsesClient::new(base_url, model_id))
+    }
+
     /// Discover available execution providers and their registration status.
     pub fn discover_eps(&self) -> Result<Vec<EpInfo>> {
         let raw = self.core.execute_command("discover_eps", None)?;
 
@@ -43,3 +43,13 @@ pub use async_openai::types::chat::{
     CreateChatCompletionResponse, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
     FunctionCallStream,
 };
+
+// Re-export Responses API types.
+pub use crate::openai::{
+    Annotation, DeleteResponseResult, FunctionToolDefinition, IncompleteDetails,
+    InputItemsListResponse, InputTokensDetails, ListResponsesOptions, ListResponsesResult, LogProb,
+    MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateRequest, ResponseError,
+    ResponseInput, ResponseItem, ResponseObject, ResponseUsage, ResponsesClient,
+    ResponsesClientSettings, ResponsesContentPart, SseStream, StreamingEvent, TextConfig,
+    TextFormat,
+};
@@ -3,6 +3,8 @@ mod chat_client;
 mod embedding_client;
 mod json_stream;
 mod live_audio_client;
+pub mod responses_client;
+pub mod responses_types;
 
 pub use self::audio_client::{
     AudioClient, AudioClientSettings, AudioTranscriptionResponse, AudioTranscriptionStream,
@@ -15,3 +17,11 @@ pub use self::live_audio_client::{
     ContentPart, CoreErrorResponse, LiveAudioTranscriptionOptions, LiveAudioTranscriptionResponse,
     LiveAudioTranscriptionSession, LiveAudioTranscriptionStream,
 };
+pub use self::responses_client::{ResponsesClient, ResponsesClientSettings, SseStream};
+pub use self::responses_types::{
+    Annotation, ContentPart as ResponsesContentPart, DeleteResponseResult, FunctionToolDefinition,
+    IncompleteDetails, InputItemsListResponse, InputTokensDetails, ListResponsesOptions,
+    ListResponsesResult, LogProb, MessageContent, OutputTokensDetails, ReasoningConfig,
+    ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
+    ResponseUsage, StreamingEvent, TextConfig, TextFormat,
+};