Skip to content

Commit edb216d

Browse files
ruiren_microsoftCopilot
andcommitted
Address PR review feedback on nemotron samples
- C++: Use explicit little-endian PCM encoding for portability - C++: Continue on TryGetNext timeout with max-retry guard - Rust: Safe-access r.content via .first() to avoid panic on empty - Rust: Add sleep pacing in --synth mode for realistic ingestion - Rust: Update README run instructions (crate not in workspace) - Rust: Add SDK availability note in README - Python: Add exception handling in capture_mic thread - Python: Add SDK availability note in header comment Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 40f02b7 commit edb216d

4 files changed

Lines changed: 41 additions & 17 deletions

File tree

samples/cpp/live-audio-transcription-example/main.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
namespace {
2020
std::vector<uint8_t> GenerateSineWavePcm(int sampleRate, int durationSeconds, double frequencyHz) {
2121
const auto totalSamples = static_cast<size_t>(sampleRate * durationSeconds);
22-
std::vector<uint8_t> pcm(totalSamples * 2, 0); // 16-bit mono
22+
std::vector<uint8_t> pcm(totalSamples * 2, 0); // 16-bit mono, little-endian
2323

2424
for (size_t i = 0; i < totalSamples; ++i) {
2525
const double t = static_cast<double>(i) / static_cast<double>(sampleRate);
2626
const auto sample = static_cast<int16_t>(
2727
static_cast<double>(INT16_MAX) * 0.5 * std::sin(2.0 * 3.14159265358979323846 * frequencyHz * t));
28-
const auto b = reinterpret_cast<const uint8_t*>(&sample);
29-
pcm[i * 2] = b[0];
30-
pcm[i * 2 + 1] = b[1];
28+
const auto encodedSample = static_cast<uint16_t>(sample);
29+
pcm[i * 2] = static_cast<uint8_t>(encodedSample & 0xFF);
30+
pcm[i * 2 + 1] = static_cast<uint8_t>((encodedSample >> 8) & 0xFF);
3131
}
3232
return pcm;
3333
}
@@ -68,16 +68,22 @@ int main() {
6868
}
6969

7070
foundry_local::LiveAudioTranscriptionResponse result;
71+
int consecutiveTimeouts = 0;
72+
const int maxConsecutiveTimeouts = 10; // 5 seconds of silence
7173
while (true) {
7274
const auto status = session->TryGetNext(result, std::chrono::milliseconds(500));
7375
if (status == foundry_local::TranscriptionStatus::Result) {
76+
consecutiveTimeouts = 0;
7477
if (result.is_final) {
7578
std::cout << "\n[FINAL] " << result.text << std::endl;
7679
} else {
7780
std::cout << result.text << std::flush;
7881
}
7982
} else if (status == foundry_local::TranscriptionStatus::Timeout) {
80-
break;
83+
if (++consecutiveTimeouts >= maxConsecutiveTimeouts) {
84+
break; // No more results after extended wait
85+
}
86+
continue; // Engine may still be processing buffered audio
8187
} else if (status == foundry_local::TranscriptionStatus::Closed) {
8288
break;
8389
} else {

samples/python/live-audio-transcription/src/app.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Live Audio Transcription — Foundry Local SDK Example (Python)
22
#
3+
# NOTE: The live-transcription session API (create_live_transcription_session)
4+
# is not yet available in the Python SDK. This sample is a forward-looking
5+
# reference and will not run until the API is added to the SDK.
6+
#
37
# Demonstrates real-time microphone-to-text using:
48
# SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
59
#
@@ -84,9 +88,14 @@ def read_results():
8488

8589
def capture_mic():
8690
while not stop_recording.is_set():
87-
pcm_data = stream.read(chunk, exception_on_overflow=False)
88-
if pcm_data:
89-
session.append(pcm_data)
91+
try:
92+
pcm_data = stream.read(chunk, exception_on_overflow=False)
93+
if pcm_data:
94+
session.append(pcm_data)
95+
except Exception as e:
96+
print(f"\n[ERROR] Microphone capture failed: {e}")
97+
stop_recording.set()
98+
break
9099

91100

92101
capture_thread = threading.Thread(target=capture_mic, daemon=True)

samples/rust/live-audio-transcription-example/README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
This sample demonstrates real-time microphone transcription using the Foundry Local Rust SDK and the `nemotron` model.
44

55
> This example requires a Rust SDK version that includes `create_live_transcription_session`.
6+
> The live-transcription session API is not yet available in the Rust SDK (`sdk/rust`).
7+
> This sample is included as a forward-looking reference and will not compile until
8+
> the API is added. It is intentionally excluded from the workspace `Cargo.toml`.
69
710
## Run
811

912
```bash
10-
cargo run -p live-audio-transcription-example
13+
cd samples/rust/live-audio-transcription-example
14+
cargo run
1115
```
1216

1317
Use synthetic audio instead of a microphone:
1418

1519
```bash
16-
cargo run -p live-audio-transcription-example -- --synth
20+
cd samples/rust/live-audio-transcription-example
21+
cargo run -- --synth
1722
```

samples/rust/live-audio-transcription-example/src/main.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,15 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
5252
while let Some(result) = stream.next().await {
5353
match result {
5454
Ok(r) => {
55-
let text = &r.content[0].text;
56-
if r.is_final {
57-
println!();
58-
println!(" [FINAL] {text}");
59-
} else if !text.is_empty() {
60-
print!("{text}");
61-
io::stdout().flush().ok();
55+
if let Some(content) = r.content.first() {
56+
let text = &content.text;
57+
if r.is_final {
58+
println!();
59+
println!(" [FINAL] {text}");
60+
} else if !text.is_empty() {
61+
print!("{text}");
62+
io::stdout().flush().ok();
63+
}
6264
}
6365
}
6466
Err(e) => {
@@ -72,9 +74,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
7274
if use_synth {
7375
let pcm_data = generate_sine_wave_pcm(16000, 3, 440.0);
7476
let chunk_size = 16000 / 10 * 2;
77+
let chunk_interval = std::time::Duration::from_millis(100);
7578
for offset in (0..pcm_data.len()).step_by(chunk_size) {
7679
let end = std::cmp::min(offset + chunk_size, pcm_data.len());
7780
session.append(&pcm_data[offset..end], None).await?;
81+
tokio::time::sleep(chunk_interval).await;
7882
}
7983
} else {
8084
let host = cpal::default_host();

0 commit comments

Comments
 (0)