diff --git a/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Audio-02-RealTimeAudio.csproj b/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Audio-02-RealTimeAudio.csproj
index 4b4a2ee0..d684083c 100644
--- a/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Audio-02-RealTimeAudio.csproj
+++ b/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Audio-02-RealTimeAudio.csproj
@@ -10,9 +10,9 @@
-
-
-
+
+
+
diff --git a/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Program.cs b/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Program.cs
index 16e86ce3..d9480e07 100644
--- a/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Program.cs
+++ b/03-CoreGenerativeAITechniques/src/Audio-02-RealTimeAudio/Program.cs
@@ -2,45 +2,34 @@
using Azure.Identity;
using Microsoft.Extensions.Configuration;
using OpenAI;
-using OpenAI.RealtimeConversation;
+using OpenAI.Realtime;
using System.ClientModel;
-#pragma warning disable OPENAI002
+#pragma warning disable AOAI001, OPENAI002
public class Program
{
+ private static string finishedConversationToolName = "user_wants_to_finish_conversation";
+
public static async Task Main(string[] args)
{
// First, we create a client according to configured environment variables (see end of file) and then start
// a new conversation session.
- RealtimeConversationClient client = GetConfiguredClient();
- using RealtimeConversationSession session = await client.StartConversationSessionAsync();
-
- // We'll add a simple function tool that enables the model to interpret user input to figure out when it
- // might be a good time to stop the interaction.
- ConversationFunctionTool finishConversationTool = new()
- {
- Name = "user_wants_to_finish_conversation",
- Description = "Invoked when the user says goodbye, expresses being finished, or otherwise seems to want to stop the interaction.",
- Parameters = BinaryData.FromString("{}")
- };
+ RealtimeClient client = GetConfiguredClient();
+ var realtimeModel = GetModel();
+ using RealtimeSession session = await client.StartConversationSessionAsync(realtimeModel);
// Now we configure the session using the tool we created along with transcription options that enable input
// audio transcription with whisper.
- await session.ConfigureSessionAsync(new ConversationSessionOptions()
- {
- Tools = { finishConversationTool },
- InputTranscriptionOptions = new()
- {
- Model = "whisper-1",
- },
- });
+ var prompt = "you are a useful chat that helps the user.";
+ ConversationSessionOptions conversationSessionOptions = CreateConversationSessionOptions(prompt);
+ await session.ConfigureConversationSessionAsync(conversationSessionOptions);
// For convenience, we'll proactively start playback to the speakers now. Nothing will play until it's enqueued.
SpeakerOutput speakerOutput = new();
// With the session configured, we start processing commands received from the service.
- await foreach (ConversationUpdate update in session.ReceiveUpdatesAsync())
+ await foreach (RealtimeUpdate update in session.ReceiveUpdatesAsync())
{
// session.created is the very first command on a session and lets us know that connection was successful.
if (update is ConversationSessionStartedUpdate)
@@ -61,7 +50,7 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
// input_audio_buffer.speech_started tells us that the beginning of speech was detected in the input audio
// we're sending from the microphone.
- if (update is ConversationInputSpeechStartedUpdate speechStartedUpdate)
+ if (update is InputAudioSpeechStartedUpdate speechStartedUpdate)
{
Console.WriteLine($" <<< Start of speech detected @ {speechStartedUpdate.AudioStartTime}");
// Like any good listener, we can use the cue that the user started speaking as a hint that the app
@@ -72,7 +61,7 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
// input_audio_buffer.speech_stopped tells us that the end of speech was detected in the input audio sent
// from the microphone. It'll automatically tell the model to start generating a response to reply back.
- if (update is ConversationInputSpeechFinishedUpdate speechFinishedUpdate)
+ if (update is InputAudioSpeechFinishedUpdate speechFinishedUpdate)
{
Console.WriteLine($" <<< End of speech detected @ {speechFinishedUpdate.AudioEndTime}");
}
@@ -80,14 +69,14 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
// conversation.item.input_audio_transcription.completed will only arrive if input transcription was
// configured for the session. It provides a written representation of what the user said, which can
// provide good feedback about what the model will use to respond.
- if (update is ConversationInputTranscriptionFinishedUpdate transcriptionFinishedUpdate)
+ if (update is InputAudioTranscriptionFinishedUpdate transcriptionFinishedUpdate)
{
Console.WriteLine($" >>> USER: {transcriptionFinishedUpdate.Transcript}");
}
// Item streaming delta updates provide a combined view into incremental item data including output
// the audio response transcript, function arguments, and audio data.
- if (update is ConversationItemStreamingPartDeltaUpdate deltaUpdate)
+ if (update is OutputDeltaUpdate deltaUpdate)
{
Console.Write(deltaUpdate.AudioTranscript);
Console.Write(deltaUpdate.Text);
@@ -96,10 +85,10 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
// response.output_item.done tells us that a model-generated item with streaming content is completed.
// That's a good signal to provide a visual break and perform final evaluation of tool calls.
- if (update is ConversationItemStreamingFinishedUpdate itemFinishedUpdate)
+ if (update is OutputStreamingStartedUpdate itemFinishedUpdate)
{
Console.WriteLine();
- if (itemFinishedUpdate.FunctionName == finishConversationTool.Name)
+ if (itemFinishedUpdate.FunctionName == finishedConversationToolName)
{
Console.WriteLine($" <<< Finish tool invoked -- ending conversation!");
break;
@@ -107,7 +96,7 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
}
// error commands, as the name implies, are raised when something goes wrong.
- if (update is ConversationErrorUpdate errorUpdate)
+ if (update is RealtimeErrorUpdate errorUpdate)
{
Console.WriteLine();
Console.WriteLine();
@@ -118,7 +107,49 @@ await session.ConfigureSessionAsync(new ConversationSessionOptions()
}
}
- private static RealtimeConversationClient GetConfiguredClient()
+ private static ConversationSessionOptions CreateConversationSessionOptions(string instructions)
+ {
+ ConversationSessionOptions sessionOptions = new()
+ {
+ Instructions = instructions,
+ Voice = ConversationVoice.Alloy,
+ InputAudioFormat = RealtimeAudioFormat.Pcm16,
+ OutputAudioFormat = RealtimeAudioFormat.Pcm16,
+ // Input transcription options must be provided to enable transcribed feedback for input audio
+ InputTranscriptionOptions = new()
+ {
+ Model = "whisper-1",
+ },
+ };
+
+ // We'll add a simple function tool that enables the model to interpret user input to figure out when it
+ // might be a good time to stop the interaction.
+ ConversationFunctionTool finishConversationTool = new(finishedConversationToolName)
+ {
+ Description = "Invoked when the user says goodbye, expresses being finished, or otherwise seems to want to stop the interaction.",
+ Parameters = BinaryData.FromString("{}")
+ };
+
+ sessionOptions.Tools.Add(finishConversationTool);
+ return sessionOptions;
+ }
+
+ #region Configuration
+
+ private static string GetModel()
+ {
+ string? aoaiDeployment = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT");
+
+
+ if (string.IsNullOrEmpty(aoaiDeployment))
+ {
+ var config = new ConfigurationBuilder().AddUserSecrets().Build();
+ aoaiDeployment = config["AZURE_OPENAI_DEPLOYMENT"];
+ }
+ return string.IsNullOrEmpty(aoaiDeployment) ? "gpt-realtime" : aoaiDeployment;
+ }
+
+ private static RealtimeClient GetConfiguredClient()
{
string? aoaiEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT");
string? aoaiUseEntra = Environment.GetEnvironmentVariable("AZURE_OPENAI_USE_ENTRA");
@@ -163,7 +194,7 @@ private static RealtimeConversationClient GetConfiguredClient()
}
}
- private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithEntra(
+ private static RealtimeClient GetConfiguredClientForAzureOpenAIWithEntra(
string aoaiEndpoint,
string? aoaiDeployment)
{
@@ -174,10 +205,10 @@ private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithE
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");
AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new DefaultAzureCredential());
- return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
+ return aoaiClient.GetRealtimeClient();
}
- private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithKey(
+ private static RealtimeClient GetConfiguredClientForAzureOpenAIWithKey(
string aoaiEndpoint,
string? aoaiDeployment,
string aoaiApiKey)
@@ -189,16 +220,17 @@ private static RealtimeConversationClient GetConfiguredClientForAzureOpenAIWithK
: $" * Using deployment (AZURE_OPENAI_DEPLOYMENT): {aoaiDeployment}");
AzureOpenAIClient aoaiClient = new(new Uri(aoaiEndpoint), new ApiKeyCredential(aoaiApiKey));
- return aoaiClient.GetRealtimeConversationClient(aoaiDeployment);
+ return aoaiClient.GetRealtimeClient();
}
- private static RealtimeConversationClient GetConfiguredClientForOpenAIWithKey(string oaiApiKey)
+ private static RealtimeClient GetConfiguredClientForOpenAIWithKey(string oaiApiKey)
{
string oaiEndpoint = "https://api.openai.com/v1";
Console.WriteLine($" * Connecting to OpenAI endpoint (OPENAI_ENDPOINT): {oaiEndpoint}");
Console.WriteLine($" * Using API key (OPENAI_API_KEY): {oaiApiKey[..5]}**");
OpenAIClient aoaiClient = new(new ApiKeyCredential(oaiApiKey));
- return aoaiClient.GetRealtimeConversationClient("gpt-4o-realtime-preview-2024-10-01");
+ return aoaiClient.GetRealtimeClient();
}
+ #endregion
}