Skip to content
Merged
3 changes: 3 additions & 0 deletions sdk/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ add_library(CppSdk STATIC
src/catalog.cpp
src/openai_chat_client.cpp
src/openai_audio_client.cpp
src/openai_live_audio_types.cpp
src/openai_live_audio_client.cpp
src/foundry_local_manager.cpp
)
Comment thread
rui-ren marked this conversation as resolved.

Expand Down Expand Up @@ -91,6 +93,7 @@ if (BUILD_TESTING)
test/model_variant_test.cpp
test/catalog_test.cpp
test/client_test.cpp
test/live_audio_test.cpp
)

target_include_directories(CppSdkTests
Expand Down
2 changes: 2 additions & 0 deletions sdk/cpp/include/foundry_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@
#include "openai/openai_tool_types.h"
#include "openai/openai_chat_client.h"
#include "openai/openai_audio_client.h"
#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
6 changes: 6 additions & 0 deletions sdk/cpp/include/openai/openai_audio_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <string_view>
#include <functional>
#include <filesystem>
#include <memory>

#include <gsl/pointers>

Expand All @@ -22,6 +23,8 @@ namespace foundry_local {
std::string text;
};

class LiveAudioTranscriptionSession;

class OpenAIAudioClient final {
public:
explicit OpenAIAudioClient(const IModel& model);
Expand All @@ -34,6 +37,9 @@ namespace foundry_local {
using StreamCallback = std::function<void(const AudioCreateTranscriptionResponse& chunk)>;
void TranscribeAudioStreaming(const std::filesystem::path& audioFilePath, const StreamCallback& onChunk) const;

/// Create a new live audio transcription session for streaming PCM audio.
std::unique_ptr<LiveAudioTranscriptionSession> CreateLiveTranscriptionSession() const;

private:
OpenAIAudioClient(gsl::not_null<foundry_local::Internal::IFoundryLocalCore*> core, std::string_view modelId,
gsl::not_null<ILogger*> logger);
Expand Down
101 changes: 101 additions & 0 deletions sdk/cpp/include/openai/openai_live_audio_client.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <string>
#include <memory>
#include <thread>
#include <mutex>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <vector>

#include <gsl/pointers>

#include "openai_live_audio_types.h"

namespace foundry_local::Internal {
struct IFoundryLocalCore;
template <typename T> class ThreadSafeQueue;
} // namespace foundry_local::Internal

namespace foundry_local {
class ILogger;

class LiveAudioTranscriptionSession final {
public:
LiveAudioTranscriptionSession(gsl::not_null<Internal::IFoundryLocalCore*> core,
std::string modelId,
gsl::not_null<ILogger*> logger);
~LiveAudioTranscriptionSession() noexcept;

// Non-copyable, non-movable
LiveAudioTranscriptionSession(const LiveAudioTranscriptionSession&) = delete;
LiveAudioTranscriptionSession& operator=(const LiveAudioTranscriptionSession&) = delete;
LiveAudioTranscriptionSession(LiveAudioTranscriptionSession&&) = delete;
LiveAudioTranscriptionSession& operator=(LiveAudioTranscriptionSession&&) = delete;

/// Mutable settings reference; only effective before Start().
LiveAudioTranscriptionOptions& Settings() { return settings_; }
/// Read-only settings reference.
const LiveAudioTranscriptionOptions& Settings() const { return settings_; }
/// Settings that were active when Start() was called.
const LiveAudioTranscriptionOptions& ActiveSettings() const { return activeSettings_; }

/// Begin the streaming session. Must be called before Append/TryAppend.
void Start();

/// Enqueue PCM audio data. Blocks if the push queue is full.
void Append(const uint8_t* pcmData, size_t length);

/// Try to get the next transcription result within the given timeout.
TranscriptionStatus TryGetNext(LiveAudioTranscriptionResponse& result,
std::chrono::milliseconds timeout = std::chrono::seconds(5));

/// Signal the end of audio input and stop the session.
void Stop();

/// Returns the error message if the session is in an error state.
std::string GetErrorMessage() const;

/// Returns true if the session has been started.
bool IsStarted() const;

/// Returns true if the session has been stopped.
bool IsStopped() const;

private:
enum class SessionState {
Created,
Starting,
Started,
Stopped
};

void PushWorkerLoop();
void StopInternal(std::unique_lock<std::mutex>& lock);

gsl::not_null<Internal::IFoundryLocalCore*> core_;
std::string modelId_;
gsl::not_null<ILogger*> logger_;

LiveAudioTranscriptionOptions settings_;
LiveAudioTranscriptionOptions activeSettings_;

mutable std::mutex mutex_;
SessionState state_ = SessionState::Created;
std::string sessionHandle_;

using AudioChunk = std::vector<uint8_t>;
std::unique_ptr<Internal::ThreadSafeQueue<AudioChunk>> pushQueue_;
std::unique_ptr<Internal::ThreadSafeQueue<LiveAudioTranscriptionResponse>> resultQueue_;

std::thread pushThread_;
std::string errorMessage_;
LiveAudioTranscriptionResponse finalResult_;
bool hasFinalResult_ = false;
};

} // namespace foundry_local
50 changes: 50 additions & 0 deletions sdk/cpp/include/openai/openai_live_audio_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <string>
#include <vector>
#include <optional>

namespace foundry_local {

struct ContentPart {
std::string text;
std::string transcript;
};

struct LiveAudioTranscriptionResponse {
std::string text;
bool is_final = false;
std::optional<double> start_time;
std::optional<double> end_time;
std::vector<ContentPart> content;

static LiveAudioTranscriptionResponse FromJson(const std::string& json);
};

struct LiveAudioTranscriptionOptions {
int sample_rate = 16000;
int channels = 1;
int bits_per_sample = 16;
std::optional<std::string> language;
int push_queue_capacity = 100;
};

struct CoreErrorResponse {
std::string code;
std::string message;
bool is_transient = false;

static std::optional<CoreErrorResponse> TryParse(const std::string& error_string);
};

enum class TranscriptionStatus {
Result,
Timeout,
Closed,
Error
};

} // namespace foundry_local
50 changes: 49 additions & 1 deletion sdk/cpp/src/core.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// Core DLL interop – loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Core DLL interop � loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Internal header, not part of the public API.

#pragma once
Expand Down Expand Up @@ -46,6 +46,7 @@ namespace foundry_local {
module_.reset();
execCmd_ = nullptr;
execCbCmd_ = nullptr;
execBinaryCmd_ = nullptr;
freeResCmd_ = nullptr;
}

Expand Down Expand Up @@ -91,10 +92,55 @@ namespace foundry_local {
return result;
}

CoreResponse callWithBinary(std::string_view command, ILogger& logger,
const std::string* dataArgument,
const uint8_t* binaryData, size_t binaryDataLength) const override {
if (!module_ || !execBinaryCmd_ || !freeResCmd_) {
throw Exception("Core is not loaded. Cannot call command: " + std::string(command), logger);
}

StreamingRequestBuffer request{};
request.Command = command.empty() ? nullptr : command.data();
request.CommandLength = static_cast<int32_t>(command.size());

if (dataArgument && !dataArgument->empty()) {
request.Data = dataArgument->data();
request.DataLength = static_cast<int32_t>(dataArgument->size());
}

if (binaryData && binaryDataLength > 0) {
if (binaryDataLength > static_cast<size_t>(INT32_MAX)) {
throw Exception("Binary data length exceeds maximum supported size (INT32_MAX).", logger);
}
request.BinaryData = binaryData;
request.BinaryDataLength = static_cast<int32_t>(binaryDataLength);
}
Comment thread
rui-ren marked this conversation as resolved.

ResponseBuffer response{};
auto safeDeleter = [fn = freeResCmd_](ResponseBuffer* buf) {
if (fn)
fn(buf);
};
std::unique_ptr<ResponseBuffer, decltype(safeDeleter)> responseGuard(&response, safeDeleter);

execBinaryCmd_(&request, &response);

CoreResponse result;
if (response.Error && response.ErrorLength > 0) {
result.error.assign(static_cast<const char*>(response.Error), response.ErrorLength);
return result;
}
if (response.Data && response.DataLength > 0) {
result.data.assign(static_cast<const char*>(response.Data), response.DataLength);
}
return result;
}

private:
wil::unique_hmodule module_;
execute_command_fn execCmd_{};
execute_command_with_callback_fn execCbCmd_{};
execute_command_with_binary_fn execBinaryCmd_{};
free_response_fn freeResCmd_{};

void LoadFromPath(const std::filesystem::path& path) {
Expand All @@ -105,6 +151,8 @@ namespace foundry_local {
execCmd_ = reinterpret_cast<execute_command_fn>(RequireProc(m.get(), "execute_command"));
execCbCmd_ = reinterpret_cast<execute_command_with_callback_fn>(
RequireProc(m.get(), "execute_command_with_callback"));
execBinaryCmd_ = reinterpret_cast<execute_command_with_binary_fn>(
RequireProc(m.get(), "execute_command_with_binary"));
freeResCmd_ = reinterpret_cast<free_response_fn>(RequireProc(m.get(), "free_response"));

module_ = std::move(m);
Expand Down
11 changes: 11 additions & 0 deletions sdk/cpp/src/flcore_native.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,25 @@ extern "C"
// Callback signature: void(*)(void* data, int length, void* userData)
using UserCallbackFn = void(__cdecl*)(void*, int32_t, void*);

struct StreamingRequestBuffer {
const void* Command;
int32_t CommandLength;
const void* Data;
int32_t DataLength;
const void* BinaryData;
int32_t BinaryDataLength;
};

// Exported function pointer types
using execute_command_fn = void(__cdecl*)(RequestBuffer*, ResponseBuffer*);
using execute_command_with_callback_fn = void(__cdecl*)(RequestBuffer*, ResponseBuffer*, void* /*callback*/,
void* /*userData*/);
using execute_command_with_binary_fn = void(__cdecl*)(StreamingRequestBuffer*, ResponseBuffer*);
using free_response_fn = void(__cdecl*)(ResponseBuffer*);

static_assert(std::is_standard_layout<RequestBuffer>::value, "RequestBuffer must be standard layout");
static_assert(std::is_standard_layout<ResponseBuffer>::value, "ResponseBuffer must be standard layout");
static_assert(std::is_standard_layout<StreamingRequestBuffer>::value, "StreamingRequestBuffer must be standard layout");

#pragma pack(pop)
}
5 changes: 5 additions & 0 deletions sdk/cpp/src/foundry_local_internal_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ namespace foundry_local {
virtual CoreResponse call(std::string_view command, ILogger& logger,
const std::string* dataArgument = nullptr, NativeCallbackFn callback = nullptr,
void* data = nullptr) const = 0;

virtual CoreResponse callWithBinary(std::string_view command, ILogger& logger,
const std::string* dataArgument,
const uint8_t* binaryData, size_t binaryDataLength) const = 0;

virtual void unload() = 0;
};

Expand Down
6 changes: 6 additions & 0 deletions sdk/cpp/src/openai_audio_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "core_helpers.h"
#include "logger.h"

#include "openai/openai_live_audio_client.h"

namespace foundry_local {

OpenAIAudioClient::OpenAIAudioClient(gsl::not_null<Internal::IFoundryLocalCore*> core, std::string_view modelId,
Expand Down Expand Up @@ -67,4 +69,8 @@ namespace foundry_local {
}
}

std::unique_ptr<LiveAudioTranscriptionSession> OpenAIAudioClient::CreateLiveTranscriptionSession() const {
return std::make_unique<LiveAudioTranscriptionSession>(core_, modelId_, logger_);
}

} // namespace foundry_local
Loading
Loading