|
| 1 | +//! Responses API example demonstrating non-streaming and streaming usage, |
| 2 | +//! tool calling, and multi-turn conversations. |
| 3 | +
|
| 4 | +use std::io::{self, Write}; |
| 5 | + |
| 6 | +use foundry_local_sdk::{ |
| 7 | + FoundryLocalConfig, FoundryLocalError, FoundryLocalManager, FunctionToolDefinition, |
| 8 | + ResponseInput, ResponseItem, StreamingEvent, |
| 9 | +}; |
| 10 | +use serde_json::json; |
| 11 | +use tokio_stream::StreamExt; |
| 12 | + |
| 13 | +type Result<T> = std::result::Result<T, FoundryLocalError>; |
| 14 | + |
| 15 | +#[tokio::main] |
| 16 | +async fn main() -> Result<()> { |
| 17 | + // ── 1. Initialise the manager ──────────────────────────────────────────── |
| 18 | + let config = FoundryLocalConfig::new("foundry_local_responses_example"); |
| 19 | + let manager = FoundryLocalManager::create(config)?; |
| 20 | + |
| 21 | + // ── 2. Start the web service ───────────────────────────────────────────── |
| 22 | + println!("Starting web service…"); |
| 23 | + manager.start_web_service().await?; |
| 24 | + println!("Web service URLs: {:?}", manager.urls()?); |
| 25 | + |
| 26 | + // ── 3. Pick a model ────────────────────────────────────────────────────── |
| 27 | + let models = manager.catalog().get_models().await?; |
| 28 | + let model_alias = ["phi-4-mini", "phi-3.5-mini", "qwen2.5-0.5b"] |
| 29 | + .iter() |
| 30 | + .find(|alias| models.iter().any(|m| m.alias() == **alias)) |
| 31 | + .map(|s| s.to_string()) |
| 32 | + .or_else(|| models.first().map(|m| m.alias().to_string())) |
| 33 | + .expect("No models available in the catalog"); |
| 34 | + |
| 35 | + println!("Using model: {model_alias}"); |
| 36 | + let model = manager.catalog().get_model(&model_alias).await?; |
| 37 | + |
| 38 | + if !model.is_cached().await? { |
| 39 | + println!("Downloading model {model_alias}…"); |
| 40 | + model.download(None::<fn(f64)>).await?; |
| 41 | + } |
| 42 | + model.load().await?; |
| 43 | + println!("Model loaded."); |
| 44 | + |
| 45 | + // ── 4. Create the Responses client ─────────────────────────────────────── |
| 46 | + let mut client = manager.get_responses_client(Some(&model.info().id))?; |
| 47 | + client.settings.store = Some(true); |
| 48 | + |
| 49 | + // ── 5. Non-streaming request ───────────────────────────────────────────── |
| 50 | + println!("\n─── Non-streaming ───────────────────────────────────────────────"); |
| 51 | + let response = client |
| 52 | + .create( |
| 53 | + ResponseInput::Text("What is the capital of France? Reply in one word.".into()), |
| 54 | + None, |
| 55 | + ) |
| 56 | + .await?; |
| 57 | + |
| 58 | + println!("Status : {}", response.status); |
| 59 | + println!("Answer : {}", response.output_text()); |
| 60 | + if let Some(usage) = &response.usage { |
| 61 | + println!( |
| 62 | + "Tokens : {} in / {} out", |
| 63 | + usage.input_tokens, usage.output_tokens |
| 64 | + ); |
| 65 | + } |
| 66 | + |
| 67 | + // ── 6. Streaming request ───────────────────────────────────────────────── |
| 68 | + println!("\n─── Streaming ───────────────────────────────────────────────────"); |
| 69 | + print!("Story : "); |
| 70 | + io::stdout().flush().ok(); |
| 71 | + |
| 72 | + let mut stream = client |
| 73 | + .create_streaming( |
| 74 | + ResponseInput::Text( |
| 75 | + "Tell me a two-sentence story about a robot that loves ice cream.".into(), |
| 76 | + ), |
| 77 | + None, |
| 78 | + ) |
| 79 | + .await?; |
| 80 | + |
| 81 | + let mut full_text = String::new(); |
| 82 | + while let Some(event) = stream.next().await { |
| 83 | + match event? { |
| 84 | + StreamingEvent::OutputTextDelta { delta, .. } => { |
| 85 | + print!("{delta}"); |
| 86 | + io::stdout().flush().ok(); |
| 87 | + full_text.push_str(&delta); |
| 88 | + } |
| 89 | + StreamingEvent::ResponseCompleted { response, .. } => { |
| 90 | + if let Some(usage) = response.usage.as_ref() { |
| 91 | + println!("\n[completed, {} output tokens]", usage.output_tokens); |
| 92 | + } else { |
| 93 | + println!("\n[completed]"); |
| 94 | + } |
| 95 | + } |
| 96 | + _ => {} |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + // ── 7. Multi-turn: follow-up using previous_response_id ───────────────── |
| 101 | + println!("\n─── Multi-turn ──────────────────────────────────────────────────"); |
| 102 | + let first = client |
| 103 | + .create( |
| 104 | + ResponseInput::Text("My favourite number is 42. Remember this.".into()), |
| 105 | + None, |
| 106 | + ) |
| 107 | + .await?; |
| 108 | + println!("Turn 1: {}", first.output_text()); |
| 109 | + |
| 110 | + let follow_up_opts = foundry_local_sdk::ResponseCreateRequest { |
| 111 | + model: model.info().id.clone(), |
| 112 | + input: ResponseInput::Text("What is my favourite number?".into()), |
| 113 | + previous_response_id: Some(first.id.clone()), |
| 114 | + instructions: None, |
| 115 | + tools: None, |
| 116 | + tool_choice: None, |
| 117 | + stream: None, |
| 118 | + store: Some(true), |
| 119 | + temperature: Some(0.0), |
| 120 | + top_p: None, |
| 121 | + max_output_tokens: None, |
| 122 | + frequency_penalty: None, |
| 123 | + presence_penalty: None, |
| 124 | + seed: None, |
| 125 | + truncation: None, |
| 126 | + parallel_tool_calls: None, |
| 127 | + metadata: None, |
| 128 | + user: None, |
| 129 | + reasoning: None, |
| 130 | + text: None, |
| 131 | + }; |
| 132 | + |
| 133 | + let second = client |
| 134 | + .create( |
| 135 | + ResponseInput::Text("What is my favourite number?".into()), |
| 136 | + Some(follow_up_opts), |
| 137 | + ) |
| 138 | + .await?; |
| 139 | + println!("Turn 2: {}", second.output_text()); |
| 140 | + |
| 141 | + // ── 8. Tool calling ────────────────────────────────────────────────────── |
| 142 | + println!("\n─── Tool calling ────────────────────────────────────────────────"); |
| 143 | + let add_tool = FunctionToolDefinition { |
| 144 | + tool_type: "function".into(), |
| 145 | + name: "add".into(), |
| 146 | + description: Some("Add two integers and return the sum.".into()), |
| 147 | + parameters: Some(json!({ |
| 148 | + "type": "object", |
| 149 | + "properties": { |
| 150 | + "a": { "type": "integer", "description": "First addend" }, |
| 151 | + "b": { "type": "integer", "description": "Second addend" } |
| 152 | + }, |
| 153 | + "required": ["a", "b"] |
| 154 | + })), |
| 155 | + strict: None, |
| 156 | + }; |
| 157 | + |
| 158 | + let tool_opts = foundry_local_sdk::ResponseCreateRequest { |
| 159 | + model: model.info().id.clone(), |
| 160 | + input: ResponseInput::Text("What is 123 + 456? Use the add tool.".into()), |
| 161 | + tools: Some(vec![add_tool]), |
| 162 | + tool_choice: Some(json!("required")), |
| 163 | + instructions: None, |
| 164 | + previous_response_id: None, |
| 165 | + stream: None, |
| 166 | + store: Some(true), |
| 167 | + temperature: Some(0.0), |
| 168 | + top_p: None, |
| 169 | + max_output_tokens: None, |
| 170 | + frequency_penalty: None, |
| 171 | + presence_penalty: None, |
| 172 | + seed: None, |
| 173 | + truncation: None, |
| 174 | + parallel_tool_calls: None, |
| 175 | + metadata: None, |
| 176 | + user: None, |
| 177 | + reasoning: None, |
| 178 | + text: None, |
| 179 | + }; |
| 180 | + |
| 181 | + let tool_response = client |
| 182 | + .create( |
| 183 | + ResponseInput::Text("What is 123 + 456? Use the add tool.".into()), |
| 184 | + Some(tool_opts), |
| 185 | + ) |
| 186 | + .await?; |
| 187 | + |
| 188 | + if let Some(ResponseItem::FunctionCall { |
| 189 | + call_id, |
| 190 | + name, |
| 191 | + arguments, |
| 192 | + .. |
| 193 | + }) = tool_response |
| 194 | + .output |
| 195 | + .iter() |
| 196 | + .find(|i| matches!(i, ResponseItem::FunctionCall { .. })) |
| 197 | + { |
| 198 | + println!("Model called tool: {name}({arguments})"); |
| 199 | + let args: serde_json::Value = serde_json::from_str(arguments)?; |
| 200 | + let a = args["a"].as_i64().unwrap_or(0); |
| 201 | + let b = args["b"].as_i64().unwrap_or(0); |
| 202 | + let sum = a + b; |
| 203 | + |
| 204 | + let result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput { |
| 205 | + id: None, |
| 206 | + call_id: call_id.clone(), |
| 207 | + output: sum.to_string(), |
| 208 | + status: None, |
| 209 | + }]); |
| 210 | + |
| 211 | + let final_opts = foundry_local_sdk::ResponseCreateRequest { |
| 212 | + model: model.info().id.clone(), |
| 213 | + input: result_input.clone(), |
| 214 | + previous_response_id: Some(tool_response.id.clone()), |
| 215 | + instructions: None, |
| 216 | + tools: None, |
| 217 | + tool_choice: None, |
| 218 | + stream: None, |
| 219 | + store: Some(true), |
| 220 | + temperature: Some(0.0), |
| 221 | + top_p: None, |
| 222 | + max_output_tokens: None, |
| 223 | + frequency_penalty: None, |
| 224 | + presence_penalty: None, |
| 225 | + seed: None, |
| 226 | + truncation: None, |
| 227 | + parallel_tool_calls: None, |
| 228 | + metadata: None, |
| 229 | + user: None, |
| 230 | + reasoning: None, |
| 231 | + text: None, |
| 232 | + }; |
| 233 | + |
| 234 | + let final_response = client.create(result_input, Some(final_opts)).await?; |
| 235 | + println!("Tool result: {}", final_response.output_text()); |
| 236 | + } else { |
| 237 | + println!("No tool call in response (model may not support tool calling)"); |
| 238 | + } |
| 239 | + |
| 240 | + // ── 9. Clean up ────────────────────────────────────────────────────────── |
| 241 | + model.unload().await?; |
| 242 | + manager.stop_web_service().await?; |
| 243 | + println!("\nDone."); |
| 244 | + Ok(()) |
| 245 | +} |
0 commit comments