Skip to content

Commit 815c0db

Browse files
MaanavDCopilot
andcommitted
Add Responses API to Rust SDK
Implements the HTTP Responses API client, wire types, SSE streaming parser, manager factory, tests, and example for the Rust SDK. Also aligns the Rust implementation with resolved Responses API review feedback: opt-in storage, request timeouts, image source validation, optional media type, list pagination fields, get_responses_client naming, and server-matching streaming event shapes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 573dbde commit 815c0db

11 files changed

Lines changed: 2532 additions & 1 deletion

File tree

sdk/rust/Cargo.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
2424
tokio-stream = "0.1"
2525
tokio-util = "0.7"
2626
futures-core = "0.3"
27-
reqwest = { version = "0.12", features = ["json"] }
27+
reqwest = { version = "0.12", features = ["json", "stream"] }
28+
bytes = "1"
29+
async-stream = "0.3"
2830
urlencoding = "2"
2931
async-openai = { version = "0.33", default-features = false, features = ["chat-completion-types", "embedding-types"] }
3032

@@ -34,6 +36,10 @@ zip = "2"
3436
serde_json = "1"
3537
serde = { version = "1", features = ["derive"] }
3638

39+
[[example]]
40+
name = "responses"
41+
path = "examples/responses.rs"
42+
3743
[[example]]
3844
name = "chat_completion"
3945
path = "examples/chat_completion.rs"

sdk/rust/examples/responses.rs

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
//! Responses API example demonstrating non-streaming and streaming usage,
2+
//! tool calling, and multi-turn conversations.
3+
4+
use std::io::{self, Write};
5+
6+
use foundry_local_sdk::{
7+
FoundryLocalConfig, FoundryLocalError, FoundryLocalManager, FunctionToolDefinition,
8+
ResponseInput, ResponseItem, StreamingEvent,
9+
};
10+
use serde_json::json;
11+
use tokio_stream::StreamExt;
12+
13+
type Result<T> = std::result::Result<T, FoundryLocalError>;
14+
15+
#[tokio::main]
16+
async fn main() -> Result<()> {
17+
// ── 1. Initialise the manager ────────────────────────────────────────────
18+
let config = FoundryLocalConfig::new("foundry_local_responses_example");
19+
let manager = FoundryLocalManager::create(config)?;
20+
21+
// ── 2. Start the web service ─────────────────────────────────────────────
22+
println!("Starting web service…");
23+
manager.start_web_service().await?;
24+
println!("Web service URLs: {:?}", manager.urls()?);
25+
26+
// ── 3. Pick a model ──────────────────────────────────────────────────────
27+
let models = manager.catalog().get_models().await?;
28+
let model_alias = ["phi-4-mini", "phi-3.5-mini", "qwen2.5-0.5b"]
29+
.iter()
30+
.find(|alias| models.iter().any(|m| m.alias() == **alias))
31+
.map(|s| s.to_string())
32+
.or_else(|| models.first().map(|m| m.alias().to_string()))
33+
.expect("No models available in the catalog");
34+
35+
println!("Using model: {model_alias}");
36+
let model = manager.catalog().get_model(&model_alias).await?;
37+
38+
if !model.is_cached().await? {
39+
println!("Downloading model {model_alias}…");
40+
model.download(None::<fn(f64)>).await?;
41+
}
42+
model.load().await?;
43+
println!("Model loaded.");
44+
45+
// ── 4. Create the Responses client ───────────────────────────────────────
46+
let mut client = manager.get_responses_client(Some(&model.info().id))?;
47+
client.settings.store = Some(true);
48+
49+
// ── 5. Non-streaming request ─────────────────────────────────────────────
50+
println!("\n─── Non-streaming ───────────────────────────────────────────────");
51+
let response = client
52+
.create(
53+
ResponseInput::Text("What is the capital of France? Reply in one word.".into()),
54+
None,
55+
)
56+
.await?;
57+
58+
println!("Status : {}", response.status);
59+
println!("Answer : {}", response.output_text());
60+
if let Some(usage) = &response.usage {
61+
println!(
62+
"Tokens : {} in / {} out",
63+
usage.input_tokens, usage.output_tokens
64+
);
65+
}
66+
67+
// ── 6. Streaming request ─────────────────────────────────────────────────
68+
println!("\n─── Streaming ───────────────────────────────────────────────────");
69+
print!("Story : ");
70+
io::stdout().flush().ok();
71+
72+
let mut stream = client
73+
.create_streaming(
74+
ResponseInput::Text(
75+
"Tell me a two-sentence story about a robot that loves ice cream.".into(),
76+
),
77+
None,
78+
)
79+
.await?;
80+
81+
let mut full_text = String::new();
82+
while let Some(event) = stream.next().await {
83+
match event? {
84+
StreamingEvent::OutputTextDelta { delta, .. } => {
85+
print!("{delta}");
86+
io::stdout().flush().ok();
87+
full_text.push_str(&delta);
88+
}
89+
StreamingEvent::ResponseCompleted { response, .. } => {
90+
if let Some(usage) = response.usage.as_ref() {
91+
println!("\n[completed, {} output tokens]", usage.output_tokens);
92+
} else {
93+
println!("\n[completed]");
94+
}
95+
}
96+
_ => {}
97+
}
98+
}
99+
100+
// ── 7. Multi-turn: follow-up using previous_response_id ─────────────────
101+
println!("\n─── Multi-turn ──────────────────────────────────────────────────");
102+
let first = client
103+
.create(
104+
ResponseInput::Text("My favourite number is 42. Remember this.".into()),
105+
None,
106+
)
107+
.await?;
108+
println!("Turn 1: {}", first.output_text());
109+
110+
let follow_up_opts = foundry_local_sdk::ResponseCreateRequest {
111+
model: model.info().id.clone(),
112+
input: ResponseInput::Text("What is my favourite number?".into()),
113+
previous_response_id: Some(first.id.clone()),
114+
instructions: None,
115+
tools: None,
116+
tool_choice: None,
117+
stream: None,
118+
store: Some(true),
119+
temperature: Some(0.0),
120+
top_p: None,
121+
max_output_tokens: None,
122+
frequency_penalty: None,
123+
presence_penalty: None,
124+
seed: None,
125+
truncation: None,
126+
parallel_tool_calls: None,
127+
metadata: None,
128+
user: None,
129+
reasoning: None,
130+
text: None,
131+
};
132+
133+
let second = client
134+
.create(
135+
ResponseInput::Text("What is my favourite number?".into()),
136+
Some(follow_up_opts),
137+
)
138+
.await?;
139+
println!("Turn 2: {}", second.output_text());
140+
141+
// ── 8. Tool calling ──────────────────────────────────────────────────────
142+
println!("\n─── Tool calling ────────────────────────────────────────────────");
143+
let add_tool = FunctionToolDefinition {
144+
tool_type: "function".into(),
145+
name: "add".into(),
146+
description: Some("Add two integers and return the sum.".into()),
147+
parameters: Some(json!({
148+
"type": "object",
149+
"properties": {
150+
"a": { "type": "integer", "description": "First addend" },
151+
"b": { "type": "integer", "description": "Second addend" }
152+
},
153+
"required": ["a", "b"]
154+
})),
155+
strict: None,
156+
};
157+
158+
let tool_opts = foundry_local_sdk::ResponseCreateRequest {
159+
model: model.info().id.clone(),
160+
input: ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
161+
tools: Some(vec![add_tool]),
162+
tool_choice: Some(json!("required")),
163+
instructions: None,
164+
previous_response_id: None,
165+
stream: None,
166+
store: Some(true),
167+
temperature: Some(0.0),
168+
top_p: None,
169+
max_output_tokens: None,
170+
frequency_penalty: None,
171+
presence_penalty: None,
172+
seed: None,
173+
truncation: None,
174+
parallel_tool_calls: None,
175+
metadata: None,
176+
user: None,
177+
reasoning: None,
178+
text: None,
179+
};
180+
181+
let tool_response = client
182+
.create(
183+
ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
184+
Some(tool_opts),
185+
)
186+
.await?;
187+
188+
if let Some(ResponseItem::FunctionCall {
189+
call_id,
190+
name,
191+
arguments,
192+
..
193+
}) = tool_response
194+
.output
195+
.iter()
196+
.find(|i| matches!(i, ResponseItem::FunctionCall { .. }))
197+
{
198+
println!("Model called tool: {name}({arguments})");
199+
let args: serde_json::Value = serde_json::from_str(arguments)?;
200+
let a = args["a"].as_i64().unwrap_or(0);
201+
let b = args["b"].as_i64().unwrap_or(0);
202+
let sum = a + b;
203+
204+
let result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
205+
id: None,
206+
call_id: call_id.clone(),
207+
output: sum.to_string(),
208+
status: None,
209+
}]);
210+
211+
let final_opts = foundry_local_sdk::ResponseCreateRequest {
212+
model: model.info().id.clone(),
213+
input: result_input.clone(),
214+
previous_response_id: Some(tool_response.id.clone()),
215+
instructions: None,
216+
tools: None,
217+
tool_choice: None,
218+
stream: None,
219+
store: Some(true),
220+
temperature: Some(0.0),
221+
top_p: None,
222+
max_output_tokens: None,
223+
frequency_penalty: None,
224+
presence_penalty: None,
225+
seed: None,
226+
truncation: None,
227+
parallel_tool_calls: None,
228+
metadata: None,
229+
user: None,
230+
reasoning: None,
231+
text: None,
232+
};
233+
234+
let final_response = client.create(result_input, Some(final_opts)).await?;
235+
println!("Tool result: {}", final_response.output_text());
236+
} else {
237+
println!("No tool call in response (model may not support tool calling)");
238+
}
239+
240+
// ── 9. Clean up ──────────────────────────────────────────────────────────
241+
model.unload().await?;
242+
manager.stop_web_service().await?;
243+
println!("\nDone.");
244+
Ok(())
245+
}

sdk/rust/src/foundry_local_manager.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger};
1313
use crate::detail::core_interop::CoreInterop;
1414
use crate::detail::ModelLoadManager;
1515
use crate::error::{FoundryLocalError, Result};
16+
use crate::openai::responses_client::ResponsesClient;
1617
use crate::types::{EpDownloadResult, EpInfo};
1718

1819
/// Global singleton holder — only stores a successfully initialised manager.
@@ -135,6 +136,20 @@ impl FoundryLocalManager {
135136
Ok(())
136137
}
137138

139+
/// Get a [`ResponsesClient`] for the given model.
140+
///
141+
/// The web service must be started before using the returned client.
142+
/// Pass `model_id = None` to defer model selection to per-request options.
143+
pub fn get_responses_client(&self, model_id: Option<&str>) -> Result<ResponsesClient> {
144+
let urls = self.urls()?;
145+
let base_url = urls.first().ok_or_else(|| FoundryLocalError::Validation {
146+
reason:
147+
"Web service not started. Call start_web_service() before getting a ResponsesClient."
148+
.into(),
149+
})?;
150+
Ok(ResponsesClient::new(base_url, model_id))
151+
}
152+
138153
/// Discover available execution providers and their registration status.
139154
pub fn discover_eps(&self) -> Result<Vec<EpInfo>> {
140155
let raw = self.core.execute_command("discover_eps", None)?;

sdk/rust/src/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,13 @@ pub use async_openai::types::chat::{
4343
CreateChatCompletionResponse, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
4444
FunctionCallStream,
4545
};
46+
47+
// Re-export Responses API types.
48+
pub use crate::openai::{
49+
Annotation, DeleteResponseResult, FunctionToolDefinition, IncompleteDetails,
50+
InputItemsListResponse, InputTokensDetails, ListResponsesOptions, ListResponsesResult, LogProb,
51+
MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateRequest, ResponseError,
52+
ResponseInput, ResponseItem, ResponseObject, ResponseUsage, ResponsesClient,
53+
ResponsesClientSettings, ResponsesContentPart, SseStream, StreamingEvent, TextConfig,
54+
TextFormat,
55+
};

sdk/rust/src/openai/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ mod chat_client;
33
mod embedding_client;
44
mod json_stream;
55
mod live_audio_client;
6+
pub mod responses_client;
7+
pub mod responses_types;
68

79
pub use self::audio_client::{
810
AudioClient, AudioClientSettings, AudioTranscriptionResponse, AudioTranscriptionStream,
@@ -15,3 +17,11 @@ pub use self::live_audio_client::{
1517
ContentPart, CoreErrorResponse, LiveAudioTranscriptionOptions, LiveAudioTranscriptionResponse,
1618
LiveAudioTranscriptionSession, LiveAudioTranscriptionStream,
1719
};
20+
pub use self::responses_client::{ResponsesClient, ResponsesClientSettings, SseStream};
21+
pub use self::responses_types::{
22+
Annotation, ContentPart as ResponsesContentPart, DeleteResponseResult, FunctionToolDefinition,
23+
IncompleteDetails, InputItemsListResponse, InputTokensDetails, ListResponsesOptions,
24+
ListResponsesResult, LogProb, MessageContent, OutputTokensDetails, ReasoningConfig,
25+
ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
26+
ResponseUsage, StreamingEvent, TextConfig, TextFormat,
27+
};

0 commit comments

Comments
 (0)