Skip to content

Commit 40f02b7

Browse files
ruiren_microsoftCopilot
andcommitted
Add nemotron live-audio samples across languages
Adds JS/C#/Python/Rust/C++ nemotron-focused live audio transcription samples and updates the Rust samples index. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent d2ef88d commit 40f02b7

10 files changed

Lines changed: 438 additions & 5 deletions

File tree

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Live Audio Transcription Example (C++)
2+
3+
This sample demonstrates the Nemotron live-audio API surface introduced in PR #655:
4+
5+
- `OpenAIAudioClient::CreateLiveTranscriptionSession()`
6+
- `LiveAudioTranscriptionSession::Start()`
7+
- `LiveAudioTranscriptionSession::Append(...)`
8+
- `LiveAudioTranscriptionSession::TryGetNext(...)`
9+
- `LiveAudioTranscriptionSession::Stop()`
10+
11+
The sample pushes synthetic PCM audio (440Hz sine wave) and prints streaming/final transcript text.
12+
13+
> This example assumes your branch includes the C++ SDK live-audio APIs from PR #655.
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Live Audio Transcription — Foundry Local C++ SDK Example
2+
//
3+
// This sample is based on the live-audio C++ APIs added in PR #655:
4+
// - OpenAIAudioClient::CreateLiveTranscriptionSession()
5+
// - LiveAudioTranscriptionSession::{Start, Append, TryGetNext, Stop}
6+
7+
#include <chrono>
8+
#include <climits>
9+
#include <algorithm>
10+
#include <cmath>
11+
#include <cstdint>
12+
#include <iostream>
13+
#include <string>
14+
#include <thread>
15+
#include <vector>
16+
17+
#include "foundry_local.h"
18+
19+
namespace {
20+
std::vector<uint8_t> GenerateSineWavePcm(int sampleRate, int durationSeconds, double frequencyHz) {
21+
const auto totalSamples = static_cast<size_t>(sampleRate * durationSeconds);
22+
std::vector<uint8_t> pcm(totalSamples * 2, 0); // 16-bit mono
23+
24+
for (size_t i = 0; i < totalSamples; ++i) {
25+
const double t = static_cast<double>(i) / static_cast<double>(sampleRate);
26+
const auto sample = static_cast<int16_t>(
27+
static_cast<double>(INT16_MAX) * 0.5 * std::sin(2.0 * 3.14159265358979323846 * frequencyHz * t));
28+
const auto b = reinterpret_cast<const uint8_t*>(&sample);
29+
pcm[i * 2] = b[0];
30+
pcm[i * 2 + 1] = b[1];
31+
}
32+
return pcm;
33+
}
34+
} // namespace
35+
36+
int main() {
37+
try {
38+
// Manager/model bootstrapping follows the same pattern as other Foundry Local SDK samples.
39+
foundry_local::Configuration config;
40+
config.appName = "foundry_local_samples";
41+
42+
auto manager = foundry_local::FoundryLocalManager::Create(config);
43+
auto catalog = manager->GetCatalog();
44+
auto model = catalog.GetModel("nemotron");
45+
if (!model) {
46+
throw std::runtime_error("Model \"nemotron\" not found in catalog");
47+
}
48+
49+
model->Download();
50+
model->Load();
51+
52+
auto audioClient = model->GetAudioClient();
53+
auto session = audioClient.CreateLiveTranscriptionSession();
54+
55+
session->Settings().sample_rate = 16000;
56+
session->Settings().channels = 1;
57+
session->Settings().bits_per_sample = 16;
58+
session->Settings().language = "en";
59+
session->Start();
60+
61+
std::cout << "Session started. Pushing synthetic audio..." << std::endl;
62+
const auto pcm = GenerateSineWavePcm(16000, 3, 440.0);
63+
const size_t chunkSize = static_cast<size_t>(16000 / 10 * 2); // 100ms
64+
for (size_t offset = 0; offset < pcm.size(); offset += chunkSize) {
65+
const size_t len = std::min(chunkSize, pcm.size() - offset);
66+
session->Append(pcm.data() + offset, len);
67+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
68+
}
69+
70+
foundry_local::LiveAudioTranscriptionResponse result;
71+
while (true) {
72+
const auto status = session->TryGetNext(result, std::chrono::milliseconds(500));
73+
if (status == foundry_local::TranscriptionStatus::Result) {
74+
if (result.is_final) {
75+
std::cout << "\n[FINAL] " << result.text << std::endl;
76+
} else {
77+
std::cout << result.text << std::flush;
78+
}
79+
} else if (status == foundry_local::TranscriptionStatus::Timeout) {
80+
break;
81+
} else if (status == foundry_local::TranscriptionStatus::Closed) {
82+
break;
83+
} else {
84+
std::cerr << "Transcription stream error: " << session->GetErrorMessage() << std::endl;
85+
break;
86+
}
87+
}
88+
89+
session->Stop();
90+
model->Unload();
91+
return 0;
92+
} catch (const std::exception& ex) {
93+
std::cerr << "Error: " << ex.what() << std::endl;
94+
return 1;
95+
}
96+
}

samples/cs/live-audio-transcription-example/Program.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
2121
var mgr = FoundryLocalManager.Instance;
2222

23-
await mgr.DownloadAndRegisterEpsAsync();
23+
await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync());
2424

2525
var catalog = await mgr.GetCatalogAsync();
2626

samples/js/live-audio-transcription-example/app.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,8 @@ try {
122122
};
123123

124124
audioInput.on('data', (buffer) => {
125-
const pcm = new Uint8Array(buffer);
126-
const copy = new Uint8Array(pcm.length);
127-
copy.set(pcm);
125+
// Single copy: slice the underlying ArrayBuffer to get an independent Uint8Array.
126+
const copy = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength).slice();
128127

129128
// Keep a bounded queue to avoid unbounded memory growth.
130129
if (appendQueue.length >= 100) {
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
foundry-local-sdk
2+
pyaudio
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Live Audio Transcription — Foundry Local SDK Example (Python)
2+
#
3+
# Demonstrates real-time microphone-to-text using:
4+
# SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
5+
#
6+
# Usage:
7+
# pip install -r requirements.txt
8+
# python src/app.py
9+
10+
import threading
11+
12+
import pyaudio
13+
from foundry_local_sdk import Configuration, FoundryLocalManager
14+
15+
print("===========================================================")
16+
print(" Foundry Local -- Live Audio Transcription Demo (Python)")
17+
print("===========================================================")
18+
print()
19+
20+
config = Configuration(app_name="foundry_local_samples")
21+
FoundryLocalManager.initialize(config)
22+
manager = FoundryLocalManager.instance
23+
24+
model = manager.catalog.get_model("nemotron")
25+
if model is None:
26+
raise RuntimeError('Model "nemotron" not found in catalog')
27+
28+
model.download(
29+
lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)
30+
)
31+
print()
32+
print(f"Loading model {model.id}...", end="")
33+
model.load()
34+
print("done.")
35+
36+
audio_client = model.get_audio_client()
37+
session = audio_client.create_live_transcription_session()
38+
session.settings.sample_rate = 16000
39+
session.settings.channels = 1
40+
session.settings.language = "en"
41+
42+
session.start()
43+
print(" Session started")
44+
45+
46+
def read_results():
47+
for result in session.get_transcription_stream():
48+
text = result.content[0].text if result.content else ""
49+
if result.is_final:
50+
print()
51+
print(f" [FINAL] {text}")
52+
elif text:
53+
print(f"\033[96m{text}\033[0m", end="", flush=True)
54+
55+
56+
read_thread = threading.Thread(target=read_results, daemon=True)
57+
read_thread.start()
58+
59+
rate = 16000
60+
channels = 1
61+
fmt = pyaudio.paInt16
62+
chunk = rate // 10 # 100ms
63+
64+
pa = pyaudio.PyAudio()
65+
stream = pa.open(
66+
format=fmt,
67+
channels=channels,
68+
rate=rate,
69+
input=True,
70+
frames_per_buffer=chunk,
71+
)
72+
73+
print()
74+
print("===========================================================")
75+
print(" LIVE TRANSCRIPTION ACTIVE")
76+
print(" Speak into your microphone.")
77+
print(" Transcription appears in real-time (cyan text).")
78+
print(" Press ENTER to stop recording.")
79+
print("===========================================================")
80+
print()
81+
82+
stop_recording = threading.Event()
83+
84+
85+
def capture_mic():
86+
while not stop_recording.is_set():
87+
pcm_data = stream.read(chunk, exception_on_overflow=False)
88+
if pcm_data:
89+
session.append(pcm_data)
90+
91+
92+
capture_thread = threading.Thread(target=capture_mic, daemon=True)
93+
capture_thread.start()
94+
95+
input()
96+
97+
stop_recording.set()
98+
capture_thread.join(timeout=2)
99+
100+
stream.stop_stream()
101+
stream.close()
102+
pa.terminate()
103+
104+
session.stop()
105+
read_thread.join()
106+
model.unload()

samples/rust/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,8 @@ Demonstrates tool calling with streaming responses, multi-turn conversation, and
2222

2323
### [Audio Transcription](./audio-transcription-example)
2424

25-
Demonstrates audio transcription (non-streaming and streaming) using the `whisper` model.
25+
Demonstrates audio transcription (non-streaming and streaming) using the `whisper` model.
26+
27+
### [Live Audio Transcription](./live-audio-transcription-example)
28+
29+
Demonstrates real-time microphone transcription using the `nemotron` model.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "live-audio-transcription-example"
3+
version = "0.1.0"
4+
edition = "2021"
5+
description = "Live audio transcription (streaming) example using the Foundry Local Rust SDK"
6+
7+
[dependencies]
8+
foundry-local-sdk = { path = "../../../sdk/rust" }
9+
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
10+
tokio-stream = "0.1"
11+
cpal = "0.15"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Sample: Live Audio Transcription
2+
3+
This sample demonstrates real-time microphone transcription using the Foundry Local Rust SDK and the `nemotron` model.
4+
5+
> This example requires a Rust SDK version that includes `create_live_transcription_session`.
6+
7+
## Run
8+
9+
```bash
10+
cargo run -p live-audio-transcription-example
11+
```
12+
13+
Use synthetic audio instead of a microphone:
14+
15+
```bash
16+
cargo run -p live-audio-transcription-example -- --synth
17+
```

0 commit comments

Comments
 (0)