Address Copilot review feedback: fix deadlock, validation, and safety

ruiren_microsoft · Copilot · ruiren_microsoft · commit ef253f02dea3 · 2026-04-24T15:58:25.000-07:00
- Fix potential deadlock: close resultQueue before joining pushThread in
  StopInternal, store final response in member variable instead of pushing
  to closed queue. TryGetNext returns it after queue drains.
- Use TryPush in PushWorkerLoop to prevent worker blocking on full result
  queue (log warning on drop instead of deadlocking).
- Validate push_queue_capacity &gt; 0 before Start() to prevent hang/DoS.
- Add bounds check for size_t to int32_t cast in callWithBinary.
- Improve error messages: distinguish not-started vs already-stopped.
- Fall back to raw response.error when parsed CoreErrorResponse.message
  is empty.
- Mark CreateLiveTranscriptionSession() as const.
- Add tests: AppendAfterStopThrows, Start_InvalidCapacityThrows.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/cpp/include/openai/openai_audio_client.h b/sdk/cpp/include/openai/openai_audio_client.h
@@ -38,7 +38,7 @@ namespace foundry_local {
         void TranscribeAudioStreaming(const std::filesystem::path& audioFilePath, const StreamCallback& onChunk) const;
 
         /// Create a new live audio transcription session for streaming PCM audio.
-        std::unique_ptr<LiveAudioTranscriptionSession> CreateLiveTranscriptionSession();
+        std::unique_ptr<LiveAudioTranscriptionSession> CreateLiveTranscriptionSession() const;
 
     private:
         OpenAIAudioClient(gsl::not_null<foundry_local::Internal::IFoundryLocalCore*> core, std::string_view modelId,
diff --git a/sdk/cpp/include/openai/openai_live_audio_client.h b/sdk/cpp/include/openai/openai_live_audio_client.h
@@ -94,6 +94,8 @@ namespace foundry_local {
 
         std::thread pushThread_;
         std::string errorMessage_;
+        LiveAudioTranscriptionResponse finalResult_;
+        bool hasFinalResult_ = false;
     };
 
 } // namespace foundry_local
diff --git a/sdk/cpp/src/core.h b/sdk/cpp/src/core.h
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 //
-// Core DLL interop � loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
+// Core DLL interop � loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
 // Internal header, not part of the public API.
 
 #pragma once
@@ -109,6 +109,9 @@ namespace foundry_local {
             }
 
             if (binaryData && binaryDataLength > 0) {
+                if (binaryDataLength > static_cast<size_t>(INT32_MAX)) {
+                    throw Exception("Binary data length exceeds maximum supported size (INT32_MAX).", logger);
+                }
                 request.BinaryData = binaryData;
                 request.BinaryDataLength = static_cast<int32_t>(binaryDataLength);
             }
diff --git a/sdk/cpp/src/openai_audio_client.cpp b/sdk/cpp/src/openai_audio_client.cpp
@@ -69,7 +69,7 @@ namespace foundry_local {
         }
     }
 
-    std::unique_ptr<LiveAudioTranscriptionSession> OpenAIAudioClient::CreateLiveTranscriptionSession() {
+    std::unique_ptr<LiveAudioTranscriptionSession> OpenAIAudioClient::CreateLiveTranscriptionSession() const {
         return std::make_unique<LiveAudioTranscriptionSession>(core_, modelId_, logger_);
     }
 
diff --git a/sdk/cpp/src/openai_live_audio_client.cpp b/sdk/cpp/src/openai_live_audio_client.cpp
@@ -48,6 +48,12 @@ namespace foundry_local {
         state_ = SessionState::Starting;
         activeSettings_ = settings_;
 
+        // Validate queue capacity early
+        if (activeSettings_.push_queue_capacity <= 0) {
+            state_ = SessionState::Created;
+            throw Exception("push_queue_capacity must be greater than 0.", *logger_);
+        }
+
         // Build the start command
         CoreInteropRequest req("audio_stream_start");
         req.AddParam("Model", modelId_);
@@ -77,11 +83,12 @@ namespace foundry_local {
             throw Exception("audio_stream_start returned an empty session handle.", *logger_);
         }
 
+        // Validate queue capacity
+        const size_t queueCapacity = static_cast<size_t>(activeSettings_.push_queue_capacity);
+
         // Create the queues
-        pushQueue_ = std::make_unique<Internal::ThreadSafeQueue<AudioChunk>>(
-            static_cast<size_t>(activeSettings_.push_queue_capacity));
-        resultQueue_ = std::make_unique<Internal::ThreadSafeQueue<LiveAudioTranscriptionResponse>>(
-            static_cast<size_t>(activeSettings_.push_queue_capacity));
+        pushQueue_ = std::make_unique<Internal::ThreadSafeQueue<AudioChunk>>(queueCapacity);
+        resultQueue_ = std::make_unique<Internal::ThreadSafeQueue<LiveAudioTranscriptionResponse>>(queueCapacity);
 
         state_ = SessionState::Started;
 
@@ -93,7 +100,11 @@ namespace foundry_local {
         {
             std::lock_guard<std::mutex> lock(mutex_);
             if (state_ != SessionState::Started) {
-                throw Exception("Session is not started. Call Start() first.", *logger_);
+                throw Exception(
+                    state_ == SessionState::Stopped
+                        ? "Session has already been stopped."
+                        : "Session is not started. Call Start() first.",
+                    *logger_);
             }
         }
 
@@ -118,8 +129,16 @@ namespace foundry_local {
                 return TranscriptionStatus::Result;
             case Internal::DequeueStatus::Timeout:
                 return TranscriptionStatus::Timeout;
-            case Internal::DequeueStatus::Closed:
+            case Internal::DequeueStatus::Closed: {
+                // Return the final result from Stop() if available
+                std::lock_guard<std::mutex> lock(mutex_);
+                if (hasFinalResult_) {
+                    result = std::move(finalResult_);
+                    hasFinalResult_ = false;
+                    return TranscriptionStatus::Result;
+                }
                 return TranscriptionStatus::Closed;
+            }
             case Internal::DequeueStatus::Error:
                 return TranscriptionStatus::Error;
             default:
@@ -144,9 +163,15 @@ namespace foundry_local {
             pushQueue_->Close();
         }
 
+        // Close the result queue to unblock any blocked Push() in the worker thread,
+        // preventing a deadlock when joining below.
+        if (resultQueue_) {
+            resultQueue_->Close();
+        }
+
         lock.unlock();
 
-        // Wait for the push thread to finish
+        // Wait for the push thread to finish (safe now — worker is unblocked)
         if (pushThread_.joinable()) {
             pushThread_.join();
         }
@@ -158,23 +183,20 @@ namespace foundry_local {
 
         auto response = core_->call(req.Command(), *logger_, &json);
 
-        // Enqueue the final transcription result from the stop response, then close
-        if (resultQueue_) {
-            if (response.HasError()) {
+        // Store the final result or error for retrieval via TryGetNext
+        if (response.HasError()) {
+            if (resultQueue_) {
                 resultQueue_->CloseWithError("audio_stream_stop failed: " + response.error);
             }
-            else {
-                if (!response.data.empty()) {
-                    try {
-                        auto finalResult = LiveAudioTranscriptionResponse::FromJson(response.data);
-                        resultQueue_->Push(std::move(finalResult));
-                    }
-                    catch (const std::exception& e) {
-                        logger_->Log(LogLevel::Warning,
-                                     std::string("Failed to parse final transcription response: ") + e.what());
-                    }
-                }
-                resultQueue_->Close();
+        }
+        else if (!response.data.empty()) {
+            try {
+                finalResult_ = LiveAudioTranscriptionResponse::FromJson(response.data);
+                hasFinalResult_ = true;
+            }
+            catch (const std::exception& e) {
+                logger_->Log(LogLevel::Warning,
+                             std::string("Failed to parse final transcription response: ") + e.what());
             }
         }
 
@@ -204,7 +226,10 @@ namespace foundry_local {
 
             if (response.HasError()) {
                 auto coreError = CoreErrorResponse::TryParse(response.error);
-                std::string msg = coreError.has_value() ? coreError->message : response.error;
+                std::string msg =
+                    (coreError.has_value() && !coreError->message.empty())
+                        ? coreError->message
+                        : response.error;
 
                 logger_->Log(LogLevel::Error, "audio_stream_push failed: " + msg);
                 pushQueue_->Close();
@@ -219,7 +244,11 @@ namespace foundry_local {
             if (!response.data.empty()) {
                 try {
                     auto result = LiveAudioTranscriptionResponse::FromJson(response.data);
-                    resultQueue_->Push(std::move(result));
+                    if (!resultQueue_->TryPush(std::move(result))) {
+                        logger_->Log(
+                            LogLevel::Warning,
+                            "Dropping transcription result because the result queue is full.");
+                    }
                 }
                 catch (const std::exception& e) {
                     logger_->Log(LogLevel::Warning,
diff --git a/sdk/cpp/test/live_audio_test.cpp b/sdk/cpp/test/live_audio_test.cpp
@@ -210,6 +210,22 @@ TEST_F(LiveAudioSessionTest, AppendBeforeStartThrows) {
     EXPECT_THROW(session.Append(data.data(), data.size()), Exception);
 }
 
+TEST_F(LiveAudioSessionTest, AppendAfterStopThrows) {
+    SetUpAllHandlers();
+
+    LiveAudioTranscriptionSession session(&core_, "whisper-model", &logger_);
+    session.Start();
+    session.Stop();
+    std::vector<uint8_t> data = {0, 1, 2, 3};
+    EXPECT_THROW(session.Append(data.data(), data.size()), Exception);
+}
+
+TEST_F(LiveAudioSessionTest, Start_InvalidCapacityThrows) {
+    LiveAudioTranscriptionSession session(&core_, "whisper-model", &logger_);
+    session.Settings().push_queue_capacity = 0;
+    EXPECT_THROW(session.Start(), Exception);
+}
+
 TEST_F(LiveAudioSessionTest, StopParseFinalResponse) {
     SetUpStartHandlers();
     SetUpPushHandler();

Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ namespace foundry_local {`
`69`	`69`	`}`
`70`	`70`	`}`
`71`	`71`
`72`		`- std::unique_ptr<LiveAudioTranscriptionSession> OpenAIAudioClient::CreateLiveTranscriptionSession() {`
	`72`	`+ std::unique_ptr<LiveAudioTranscriptionSession> OpenAIAudioClient::CreateLiveTranscriptionSession() const {`
`73`	`73`	`return std::make_unique<LiveAudioTranscriptionSession>(core_, modelId_, logger_);`
`74`	`74`	`}`
`75`	`75`