Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions samples/cpp/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Live Audio Transcription Example (C++)

Demonstrates real-time microphone-to-text using the Foundry Local C++ SDK.

Uses [PortAudio](http://www.portaudio.com/) for cross-platform microphone capture
(the C/C++ equivalent of `naudiodon2` used by the JS sample). If PortAudio is not
available, falls back to synthetic PCM audio.


## Build

```bash
# With PortAudio (live microphone)
g++ -std=c++20 -DHAS_PORTAUDIO main.cpp -lfoundry_local -lportaudio -o live-audio-transcription-example

# Without PortAudio (synthetic audio only)
g++ -std=c++20 main.cpp -lfoundry_local -o live-audio-transcription-example
```

## Run

```bash
# Live microphone (requires PortAudio)
./live-audio-transcription-example

# Synthetic 440Hz sine wave (no microphone needed)
./live-audio-transcription-example --synth
```
269 changes: 269 additions & 0 deletions samples/cpp/live-audio-transcription/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
// Live Audio Transcription — Foundry Local C++ SDK Example
//
// Demonstrates real-time microphone-to-text using the C++ SDK.
// Uses PortAudio for cross-platform mic capture (like naudiodon2 in the JS sample).
// Falls back to synthetic PCM if PortAudio is unavailable.
//
// Requires: PortAudio (libportaudio), Foundry Local C++ SDK
//
// Usage: ./live-audio-transcription-example [--synth]

#include <algorithm>
#include <atomic>
#include <chrono>
#include <climits>
#include <cmath>
#include <csignal>
#include <cstdint>
#include <deque>
#include <iostream>
#include <mutex>
#include <string>
#include <thread>
#include <vector>

#include "foundry_local.h"

// PortAudio is optional — compile with -DHAS_PORTAUDIO and link -lportaudio
// to enable live microphone capture.
#ifdef HAS_PORTAUDIO
#include <portaudio.h>
#endif

namespace {

// Global flag for Ctrl+C graceful shutdown (mirrors JS process.on('SIGINT'))
std::atomic<bool> g_running{true};

void SignalHandler(int /*signum*/) {
g_running = false;
}

// Bounded audio queue (mirrors JS appendQueue with cap of 100)
class AudioQueue {
public:
void Push(std::vector<uint8_t> chunk) {
std::lock_guard<std::mutex> lock(mu_);
if (queue_.size() >= kMaxSize) {
queue_.pop_front();
if (!warnedDrop_) {
warnedDrop_ = true;
std::cerr << "Audio append queue overflow; dropping oldest chunk to keep stream alive." << std::endl;
}
}
queue_.push_back(std::move(chunk));
}

bool TryPop(std::vector<uint8_t>& out) {
std::lock_guard<std::mutex> lock(mu_);
if (queue_.empty()) return false;
out = std::move(queue_.front());
queue_.pop_front();
return true;
}

private:
static constexpr size_t kMaxSize = 100;
std::deque<std::vector<uint8_t>> queue_;
std::mutex mu_;
bool warnedDrop_ = false;
};

std::vector<uint8_t> GenerateSineWavePcm(int sampleRate, int durationSeconds, double frequencyHz) {
const auto totalSamples = static_cast<size_t>(sampleRate * durationSeconds);
std::vector<uint8_t> pcm(totalSamples * 2, 0); // 16-bit mono, little-endian

for (size_t i = 0; i < totalSamples; ++i) {
const double t = static_cast<double>(i) / static_cast<double>(sampleRate);
const auto sample = static_cast<int16_t>(
static_cast<double>(INT16_MAX) * 0.5 * std::sin(2.0 * 3.14159265358979323846 * frequencyHz * t));
const auto encodedSample = static_cast<uint16_t>(sample);
pcm[i * 2] = static_cast<uint8_t>(encodedSample & 0xFF);
pcm[i * 2 + 1] = static_cast<uint8_t>((encodedSample >> 8) & 0xFF);
}
return pcm;
}

#ifdef HAS_PORTAUDIO
// PortAudio callback — captures 16-bit mono PCM and pushes to the queue
int PaCallback(const void* input, void* /*output*/,
unsigned long frameCount,
const PaStreamCallbackTimeInfo* /*timeInfo*/,
PaStreamCallbackFlags /*statusFlags*/,
void* userData) {
auto* queue = static_cast<AudioQueue*>(userData);
const auto* pcm = static_cast<const uint8_t*>(input);
const size_t byteCount = frameCount * 2; // 16-bit mono = 2 bytes per frame
std::vector<uint8_t> chunk(pcm, pcm + byteCount);
queue->Push(std::move(chunk));
return g_running ? paContinue : paComplete;
}
#endif

} // namespace

int main(int argc, char* argv[]) {
bool useSynth = false;
for (int i = 1; i < argc; ++i) {
if (std::string(argv[i]) == "--synth") useSynth = true;
}

// Install Ctrl+C handler (mirrors JS process.on('SIGINT'))
std::signal(SIGINT, SignalHandler);

try {
std::cout << "===========================================================" << std::endl;
std::cout << " Foundry Local -- Live Audio Transcription Demo (C++)" << std::endl;
std::cout << "===========================================================" << std::endl;
std::cout << std::endl;

foundry_local::Configuration config;
config.appName = "foundry_local_samples";
Comment thread
samuel100 marked this conversation as resolved.

foundry_local::Manager::Create(config);
auto& manager = foundry_local::Manager::Instance();
manager.EnsureEpsDownloaded();

auto& catalog = manager.GetCatalog();
auto* model = catalog.GetModel("nemotron-speech-streaming-en-0.6b");
if (!model) {
throw std::runtime_error("Model \"nemotron-speech-streaming-en-0.6b\" not found in catalog");
}

std::cout << "Downloading model (if needed)..." << std::endl;
model->Download([](float pct) {
std::cout << "\rDownloading: " << pct << "% " << std::flush;
});
std::cout << std::endl;
std::cout << "Loading model..." << std::endl;
model->Load();
std::cout << "Model loaded" << std::endl;

// NOTE: CreateLiveTranscriptionSession() is not yet available in the C++ SDK.
// The audio client and session code below is forward-looking.
Comment thread
samuel100 marked this conversation as resolved.
foundry_local::OpenAIAudioClient audioClient(*model);
auto session = audioClient.CreateLiveTranscriptionSession();
Comment thread
samuel100 marked this conversation as resolved.

session->Settings().sample_rate = 16000;
session->Settings().channels = 1;
session->Settings().bits_per_sample = 16;
session->Settings().language = "en";
session->Start();
std::cout << "Session started" << std::endl;

// Read transcription results in a background thread (mirrors JS readPromise)
std::thread readThread([&session]() {
foundry_local::LiveAudioTranscriptionResponse result;
while (g_running) {
const auto status = session->TryGetNext(result, std::chrono::milliseconds(500));
if (status == foundry_local::TranscriptionStatus::Result) {
if (result.is_final) {
std::cout << "\n [FINAL] " << result.text << std::endl;
} else if (!result.text.empty()) {
std::cout << result.text << std::flush;
}
} else if (status == foundry_local::TranscriptionStatus::Closed) {
break;
} else if (status == foundry_local::TranscriptionStatus::Timeout) {
continue;
} else {
std::cerr << "Transcription stream error: " << session->GetErrorMessage() << std::endl;
break;
}
}
});

// --- Microphone capture (mirrors JS naudiodon2 section) ---
// Uses PortAudio for cross-platform audio capture. If PortAudio is not
// available or --synth is passed, falls back to synthetic PCM.

bool micActive = false;

#ifdef HAS_PORTAUDIO
PaStream* paStream = nullptr;
AudioQueue audioQueue;

if (!useSynth) {
PaError err = Pa_Initialize();
if (err == paNoError) {
PaStreamParameters inputParams{};
inputParams.device = Pa_GetDefaultInputDevice();
if (inputParams.device != paNoDevice) {
inputParams.channelCount = 1;
inputParams.sampleFormat = paInt16;
inputParams.suggestedLatency =
Pa_GetDeviceInfo(inputParams.device)->defaultLowInputLatency;
inputParams.hostApiSpecificStreamInfo = nullptr;

// framesPerBuffer=3200 matches JS framesPerBuffer setting
err = Pa_OpenStream(&paStream, &inputParams, nullptr,
16000, 3200, paClipOff,
PaCallback, &audioQueue);
if (err == paNoError) {
err = Pa_StartStream(paStream);
}
}

if (err == paNoError && paStream) {
micActive = true;
std::cout << std::endl;
std::cout << "===========================================================" << std::endl;
std::cout << " LIVE TRANSCRIPTION ACTIVE" << std::endl;
std::cout << " Speak into your microphone." << std::endl;
std::cout << " Press Ctrl+C to stop." << std::endl;
std::cout << "===========================================================" << std::endl;
std::cout << std::endl;

// Pump audio from the queue to the session (mirrors JS pumpAudio)
while (g_running) {
std::vector<uint8_t> chunk;
if (audioQueue.TryPop(chunk)) {
session->Append(chunk.data(), chunk.size());
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}

Pa_StopStream(paStream);
Pa_CloseStream(paStream);
} else {
std::cerr << "Could not initialize microphone: "
<< Pa_GetErrorText(err) << std::endl;
std::cerr << "Falling back to synthetic audio test..." << std::endl;
std::cerr << std::endl;
}
Pa_Terminate();
}
}
#endif

// Fallback: push synthetic PCM (440Hz sine wave) — mirrors JS catch block
if (!micActive) {
std::cout << "Pushing synthetic audio (440Hz sine, 2s)..." << std::endl;
const auto pcm = GenerateSineWavePcm(16000, 2, 440.0);
const size_t chunkSize = static_cast<size_t>(16000 / 10 * 2); // 100ms
for (size_t offset = 0; offset < pcm.size() && g_running; offset += chunkSize) {
const size_t len = std::min(chunkSize, pcm.size() - offset);
session->Append(pcm.data() + offset, len);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
std::cout << "Synthetic audio pushed" << std::endl;

// Wait briefly for remaining transcription results
std::this_thread::sleep_for(std::chrono::seconds(3));
}

// Graceful shutdown (mirrors JS SIGINT handler)
std::cout << "\n\nStopping..." << std::endl;
session->Stop();
readThread.join();
model->Unload();
foundry_local::Manager::Destroy();
std::cout << "Done" << std::endl;
return 0;
} catch (const std::exception& ex) {
std::cerr << "Error: " << ex.what() << std::endl;
foundry_local::Manager::Destroy();
return 1;
}
}
4 changes: 2 additions & 2 deletions samples/cs/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
<PackageVersion Include="Microsoft.AI.Foundry.Local" Version="*-*" />
<PackageVersion Include="Microsoft.AI.Foundry.Local.WinML" Version="*-*" />
<PackageVersion Include="Betalgo.Ranul.OpenAI" Version="9.2.0" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="9.0.10" />
<PackageVersion Include="Microsoft.Extensions.Logging.Console" Version="9.0.10" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="9.0.15" />
<PackageVersion Include="Microsoft.Extensions.Logging.Console" Version="9.0.15" />
<PackageVersion Include="NAudio" Version="2.2.1" />
<PackageVersion Include="OpenAI" Version="2.5.0" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<!-- Windows: target Windows SDK for WinML hardware acceleration -->
<PropertyGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
<TargetFramework>net9.0-windows10.0.26100</TargetFramework>
<WindowsAppSDKSelfContained>false</WindowsAppSDKSelfContained>
<Platforms>ARM64;x64</Platforms>
<WindowsPackageType>None</WindowsPackageType>
<EnableCoreMrtTooling>false</EnableCoreMrtTooling>
</PropertyGroup>

<!-- Non-Windows: standard .NET -->
<PropertyGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
<TargetFramework>net9.0</TargetFramework>
</PropertyGroup>

<PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
<RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
</PropertyGroup>

<!-- Windows: WinML for hardware acceleration -->
<ItemGroup Condition="$([MSBuild]::IsOSPlatform('Windows'))">
<PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
</ItemGroup>

<!-- Non-Windows: standard SDK -->
<ItemGroup Condition="!$([MSBuild]::IsOSPlatform('Windows'))">
<PackageReference Include="Microsoft.AI.Foundry.Local" />
</ItemGroup>

<!-- Linux GPU support -->
<ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
<PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
<PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
</ItemGroup>

<!-- Shared utilities -->
<ItemGroup>
<Compile Include="../Shared/*.cs" />
</ItemGroup>

<!-- Packages -->
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging" />
<PackageReference Include="Microsoft.Extensions.Logging.Console" />
<PackageReference Include="NAudio" />
</ItemGroup>

</Project>
Loading
Loading