|
| 1 | +/* Expected result from running this example program. |
| 2 | +Connected. dg-request-id from upgrade headers: Some(<uuid>) |
| 3 | +Welcome event request_id: <uuid> |
| 4 | +Settings applied |
| 5 | +Conversation (assistant): Hello! How can I help today? |
| 6 | +Audio chunk: 4096 bytes |
| 7 | +Audio chunk: 4096 bytes |
| 8 | +... |
| 9 | +*/ |
| 10 | + |
| 11 | +//! Minimal Voice Agent example. |
| 12 | +//! |
| 13 | +//! Connects to `wss://agent.deepgram.com/v1/agent/converse`, sends a |
| 14 | +//! `Settings` message with a Deepgram-only Listen + Speak setup and an |
| 15 | +//! OpenAI Think provider, prints incoming events for a fixed duration, |
| 16 | +//! then closes the connection. |
| 17 | +//! |
| 18 | +//! The agent will speak a greeting on connect, but this example does |
| 19 | +//! not capture or send any microphone audio — for that, see the |
| 20 | +//! microphone example (when added). |
| 21 | +//! |
| 22 | +//! Run with: |
| 23 | +//! |
| 24 | +//! ```bash |
| 25 | +//! DEEPGRAM_API_KEY=<your-key> \ |
| 26 | +//! cargo run --features agent --example agent_simple |
| 27 | +//! ``` |
| 28 | +
|
| 29 | +use std::env; |
| 30 | +use std::time::Duration; |
| 31 | + |
| 32 | +use futures::stream::StreamExt; |
| 33 | + |
| 34 | +use deepgram::agent::{ |
| 35 | + audio::{AudioConfig, AudioInput, AudioInputEncoding}, |
| 36 | + listen::{AgentListenProvider, AgentListenSettings, DeepgramListenV2Provider}, |
| 37 | + settings::{AgentConfig, InlineAgentConfig, SettingsMessage}, |
| 38 | + speak::{DeepgramSpeakModel, DeepgramSpeakProvider, SpeakProvider, SpeakSettings}, |
| 39 | + think::{OpenAiModel, OpenAiThinkProvider, ThinkProvider, ThinkSettings}, |
| 40 | + AgentEvent, AgentResponse, |
| 41 | +}; |
| 42 | +use deepgram::{Deepgram, DeepgramError}; |
| 43 | + |
| 44 | +/// How long to keep the session open before closing. |
| 45 | +static SESSION_DURATION: Duration = Duration::from_secs(30); |
| 46 | + |
| 47 | +#[tokio::main] |
| 48 | +async fn main() -> Result<(), DeepgramError> { |
| 49 | + let api_key = env::var("DEEPGRAM_API_KEY").expect("DEEPGRAM_API_KEY environment variable"); |
| 50 | + |
| 51 | + let dg = Deepgram::new(&api_key)?; |
| 52 | + let (mut handle, mut events) = dg.agent().start().await?; |
| 53 | + |
| 54 | + println!( |
| 55 | + "Connected. dg-request-id from upgrade headers: {:?}", |
| 56 | + handle.request_id() |
| 57 | + ); |
| 58 | + |
| 59 | + let settings = SettingsMessage::new( |
| 60 | + AudioConfig::new( |
| 61 | + Some(AudioInput::new(AudioInputEncoding::Linear16, 16_000)), |
| 62 | + None, |
| 63 | + ), |
| 64 | + AgentConfig::inline( |
| 65 | + InlineAgentConfig::from_parts( |
| 66 | + AgentListenSettings::new(AgentListenProvider::DeepgramV2( |
| 67 | + DeepgramListenV2Provider::new("flux-general-en"), |
| 68 | + )), |
| 69 | + ThinkSettings::new(ThinkProvider::OpenAi(OpenAiThinkProvider::new( |
| 70 | + OpenAiModel::Gpt4oMini, |
| 71 | + ))), |
| 72 | + SpeakSettings::new(SpeakProvider::Deepgram(DeepgramSpeakProvider::new( |
| 73 | + DeepgramSpeakModel::Aura2ThaliaEn, |
| 74 | + ))), |
| 75 | + ) |
| 76 | + .with_greeting("Hello! How can I help today?"), |
| 77 | + ), |
| 78 | + ); |
| 79 | + handle.send_settings(settings).await?; |
| 80 | + |
| 81 | + let timeout = tokio::time::sleep(SESSION_DURATION); |
| 82 | + tokio::pin!(timeout); |
| 83 | + |
| 84 | + loop { |
| 85 | + tokio::select! { |
| 86 | + _ = &mut timeout => { |
| 87 | + println!("\nSession duration reached, closing."); |
| 88 | + break; |
| 89 | + } |
| 90 | + event = events.next() => { |
| 91 | + match event { |
| 92 | + Some(Ok(AgentEvent::Json(response))) => match response { |
| 93 | + AgentResponse::Welcome(w) => { |
| 94 | + println!("Welcome event request_id: {}", w.request_id); |
| 95 | + } |
| 96 | + AgentResponse::SettingsApplied(_) => { |
| 97 | + println!("Settings applied"); |
| 98 | + } |
| 99 | + AgentResponse::AgentThinking(t) => { |
| 100 | + println!("Agent thinking: {}", t.content); |
| 101 | + } |
| 102 | + AgentResponse::ConversationText(c) => { |
| 103 | + println!("Conversation ({:?}): {}", c.role, c.content); |
| 104 | + } |
| 105 | + AgentResponse::UserStartedSpeaking(_) => { |
| 106 | + println!("User started speaking"); |
| 107 | + } |
| 108 | + AgentResponse::AgentStartedSpeaking(s) => { |
| 109 | + println!( |
| 110 | + "Agent started speaking (total_latency={:.3}s)", |
| 111 | + s.total_latency |
| 112 | + ); |
| 113 | + } |
| 114 | + AgentResponse::AgentAudioDone(_) => { |
| 115 | + println!("Agent audio done"); |
| 116 | + } |
| 117 | + AgentResponse::Warning(w) => { |
| 118 | + println!("Warning [{}]: {}", w.code, w.description); |
| 119 | + } |
| 120 | + AgentResponse::Error(e) => { |
| 121 | + eprintln!("Error [{}]: {}", e.code, e.description); |
| 122 | + break; |
| 123 | + } |
| 124 | + other => println!("Other event: {:?}", other), |
| 125 | + }, |
| 126 | + Some(Ok(AgentEvent::Audio(bytes))) => { |
| 127 | + println!("Audio chunk: {} bytes", bytes.len()); |
| 128 | + } |
| 129 | + // AgentEvent is #[non_exhaustive]; future variants land here. |
| 130 | + Some(Ok(_)) => {} |
| 131 | + Some(Err(err)) => { |
| 132 | + eprintln!("Stream error: {}", err); |
| 133 | + break; |
| 134 | + } |
| 135 | + None => { |
| 136 | + println!("Server closed connection."); |
| 137 | + break; |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + handle.close().await?; |
| 145 | + Ok(()) |
| 146 | +} |
0 commit comments