Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ logs/

# Local NuGet packages built from source
local-packages/

# JS SDK downloaded native core binaries
sdk/js/foundry-local-core/
2 changes: 1 addition & 1 deletion samples/cs/live-audio-transcription/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ await model.DownloadAsync(progress =>
{
try
{
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
var text = result.Content?[0]?.Text;
if (result.IsFinal)
Expand Down
4 changes: 2 additions & 2 deletions samples/cs/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dotnet run -- --synth
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `NAudio.WaveInEvent` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.AppendAsync()` through a bounded channel for backpressure
5. Reads transcription results via `await foreach (var result in session.GetTranscriptionStream())`
5. Reads transcription results via `await foreach (var result in session.GetStream())`
6. Access text via `result.Content[0].Text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -54,7 +54,7 @@ await session.StartAsync();
await session.AppendAsync(pcmBytes);

// Read results
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
Console.WriteLine(result.Content[0].Text); // transcribed text
Console.WriteLine(result.Content[0].Transcript); // alias (OpenAI compat)
Expand Down
4 changes: 2 additions & 2 deletions samples/js/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C` t
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `naudiodon2` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.append()`
5. Reads transcription results via `for await (const result of session.getTranscriptionStream())`
5. Reads transcription results via `for await (const result of session.getStream())`
6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -48,7 +48,7 @@ await session.start();
await session.append(pcmBytes);

// Read results
for await (const result of session.getTranscriptionStream()) {
for await (const result of session.getStream()) {
console.log(result.content[0].text); // transcribed text
console.log(result.content[0].transcript); // alias (OpenAI compat)
console.log(result.is_final); // true for final results
Expand Down
4 changes: 2 additions & 2 deletions samples/js/live-audio-transcription/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ console.log();
// Initialize the Foundry Local SDK
console.log('Initializing Foundry Local SDK...');
const manager = FoundryLocalManager.create({
appName: 'foundry',
appName: 'foundry_local_samples',
logLevel: 'info'
});
console.log('✓ SDK initialized');
Expand Down Expand Up @@ -55,7 +55,7 @@ console.log('✓ Session started');
// Read transcription results in background
const readPromise = (async () => {
try {
for await (const result of session.getTranscriptionStream()) {
for await (const result of session.getStream()) {
const text = result.content?.[0]?.text;
if (!text) continue;

Expand Down
4 changes: 2 additions & 2 deletions samples/python/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ python src/app.py --synth
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `pyaudio` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.append()`
5. Reads transcription results in a background thread via `for result in session.get_transcription_stream()`
5. Reads transcription results in a background thread via `for result in session.get_stream()`
6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -59,7 +59,7 @@ session.start()
session.append(pcm_bytes)

# Read results (typically on a background thread)
for result in session.get_transcription_stream():
for result in session.get_stream():
print(result.content[0].text) # transcribed text
print(result.content[0].transcript) # alias (OpenAI compat)
print(result.is_final) # True for final results
Expand Down
2 changes: 1 addition & 1 deletion samples/python/live-audio-transcription/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# --- Background thread reads transcription results (mirrors JS readPromise) ---

def read_results():
for result in session.get_transcription_stream():
for result in session.get_stream():
text = result.content[0].text if result.content else ""
if result.is_final:
print()
Expand Down
2 changes: 1 addition & 1 deletion samples/rust/live-audio-transcription/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("✓ Session started\n");

// --- Background task reads transcription results (mirrors JS readPromise) ---
let mut stream = session.get_transcription_stream().await?;
let mut stream = session.get_stream().await?;
let read_task = tokio::spawn(async move {
while let Some(result) = stream.next().await {
match result {
Expand Down
2 changes: 1 addition & 1 deletion sdk/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ add_library(CppSdk STATIC
src/openai_chat_client.cpp
src/openai_audio_client.cpp
src/openai_live_audio_types.cpp
src/openai_live_audio_client.cpp
src/openai_live_audio_session.cpp
src/foundry_local_manager.cpp
)

Expand Down
2 changes: 1 addition & 1 deletion sdk/cpp/include/foundry_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
#include "openai/openai_chat_client.h"
#include "openai/openai_audio_client.h"
#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
#include "openai/openai_live_audio_session.h"
2 changes: 1 addition & 1 deletion sdk/cpp/src/openai_audio_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "core_helpers.h"
#include "logger.h"

#include "openai/openai_live_audio_client.h"
#include "openai/openai_live_audio_session.h"
Comment thread
kunal-vaishnavi marked this conversation as resolved.
Outdated

namespace foundry_local {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include <nlohmann/json.hpp>

#include "openai/openai_live_audio_client.h"
#include "openai/openai_live_audio_session.h"
#include "openai/openai_live_audio_types.h"
#include "foundry_local_internal_core.h"
#include "foundry_local_exception.h"
Expand Down
2 changes: 1 addition & 1 deletion sdk/cpp/test/live_audio_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "foundry_local_exception.h"

#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
#include "openai/openai_live_audio_session.h"

#include <nlohmann/json.hpp>

Expand Down
4 changes: 2 additions & 2 deletions sdk/cs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ waveIn.DataAvailable += (sender, e) =>
};

// Read transcription results as they arrive
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
// result follows the OpenAI Realtime ConversationItem pattern:
// - result.Content[0].Text — incremental transcribed text (per chunk, not accumulated)
Expand Down Expand Up @@ -341,7 +341,7 @@ await session.StopAsync();
|--------|-------------|
| `StartAsync()` | Initialize the streaming session. Settings are frozen after this call. |
| `AppendAsync(pcmData)` | Push a chunk of raw PCM audio. Thread-safe (bounded internal queue). |
| `GetTranscriptionStream()` | Async enumerable of transcription results. |
| `GetStream()` | Async enumerable of transcription results. |
| `StopAsync()` | Signal end-of-audio, flush remaining audio, and clean up. |
| `DisposeAsync()` | Calls `StopAsync` if needed. Use `await using` for automatic cleanup. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
private bool _started;
private bool _stopped;

// Output channel: native callback writes, user reads via GetTranscriptionStream
// Output channel: native callback writes, user reads via GetStream
private Channel<LiveAudioTranscriptionResponse>? _outputChannel;

// Internal push queue: user writes audio chunks, background loop drains to native core.
Expand Down Expand Up @@ -90,7 +90,7 @@ internal LiveAudioTranscriptionSession(string modelId)

/// <summary>
/// Start a real-time audio streaming session.
/// Must be called before <see cref="AppendAsync"/> or <see cref="GetTranscriptionStream"/>.
/// Must be called before <see cref="AppendAsync"/> or <see cref="GetStream"/>.
/// Settings are frozen after this call.
/// </summary>
/// <param name="ct">Cancellation token.</param>
Expand Down Expand Up @@ -249,7 +249,7 @@ private async Task PushLoopAsync(CancellationToken ct)
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Async enumerable of transcription results.</returns>
public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetTranscriptionStream(
public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetStream(
[EnumeratorCancellation] CancellationToken ct = default)
{
if (_outputChannel == null)
Expand All @@ -266,7 +266,7 @@ public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetTranscriptionSt
/// <summary>
/// Signal end-of-audio and stop the streaming session.
/// Any remaining buffered audio in the push queue will be drained to native core first.
/// Final results are delivered through <see cref="GetTranscriptionStream"/> before it completes.
/// Final results are delivered through <see cref="GetStream"/> before it completes.
/// </summary>
/// <param name="ct">Cancellation token.</param>
public async Task StopAsync(CancellationToken ct = default)
Expand Down
6 changes: 3 additions & 3 deletions sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,14 @@ public async Task AppendAsync_BeforeStart_Throws()

[Test]
[SkipUnlessIntegration]
public async Task GetTranscriptionStream_BeforeStart_Throws()
public async Task GetStream_BeforeStart_Throws()
{
await using var session = new LiveAudioTranscriptionSession("test-model");

FoundryLocalException? caught = null;
try
{
await foreach (var _ in session.GetTranscriptionStream())
await foreach (var _ in session.GetStream())
{
// should not reach here
}
Expand Down Expand Up @@ -212,7 +212,7 @@ public async Task LiveStreaming_E2E_WithSyntheticPCM_ReturnsValidResponse()
var results = new List<LiveAudioTranscriptionResponse>();
var readTask = Task.Run(async () =>
{
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
results.Add(result);
}
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/detail/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ChatClient } from '../openai/chatClient.js';
import { AudioClient } from '../openai/audioClient.js';
import { EmbeddingClient } from '../openai/embeddingClient.js';
import { ResponsesClient } from '../openai/responsesClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioSession.js';
import { IModel } from '../imodel.js';
import { ModelInfo } from '../types.js';

Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/detail/modelVariant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { ModelInfo } from '../types.js';
import { ChatClient } from '../openai/chatClient.js';
import { AudioClient } from '../openai/audioClient.js';
import { EmbeddingClient } from '../openai/embeddingClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioSession.js';
import { ResponsesClient } from '../openai/responsesClient.js';
import { IModel } from '../imodel.js';

Expand Down
7 changes: 0 additions & 7 deletions sdk/js/src/imodel.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { ChatClient } from './openai/chatClient.js';
import { AudioClient } from './openai/audioClient.js';
import { EmbeddingClient } from './openai/embeddingClient.js';
import { LiveAudioTranscriptionSession } from './openai/liveAudioTranscriptionClient.js';
import { ResponsesClient } from './openai/responsesClient.js';
import { ModelInfo } from './types.js';

Expand All @@ -28,12 +27,6 @@ export interface IModel {
createAudioClient(): AudioClient;
createEmbeddingClient(): EmbeddingClient;

/**
* Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
* The model must be loaded before calling this method.
* @returns A LiveAudioTranscriptionSession instance.
*/
createLiveTranscriptionSession(): LiveAudioTranscriptionSession;
/**
* Creates a ResponsesClient for interacting with the model via the Responses API.
* Unlike createChatClient/createAudioClient (which use FFI), the Responses API
Expand Down
4 changes: 2 additions & 2 deletions sdk/js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ export type { IModel } from './imodel.js';
export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
export { EmbeddingClient } from './openai/embeddingClient.js';
export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioSession.js';
export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTypes.js';
export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
export { ModelLoadManager } from './detail/modelLoadManager.js';
/** @internal */
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/openai/audioClient.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CoreInterop } from '../detail/coreInterop.js';
import { LiveAudioTranscriptionSession } from './liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from './liveAudioSession.js';

export class AudioClientSettings {
language?: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CoreInterop } from '../detail/coreInterop.js';
import { LiveAudioTranscriptionResponse, parseTranscriptionResult, tryParseCoreError } from './liveAudioTranscriptionTypes.js';
import { LiveAudioTranscriptionResponse, parseTranscriptionResult, tryParseCoreError } from './liveAudioTypes.js';

/**
* Audio format settings for a streaming session.
Expand Down Expand Up @@ -191,7 +191,7 @@ export class LiveAudioTranscriptionSession {

/**
* Start a real-time audio streaming session.
* Must be called before append() or getTranscriptionStream().
* Must be called before append() or getStream().
* Settings are frozen after this call.
*/
public async start(): Promise<void> {
Expand Down Expand Up @@ -319,17 +319,17 @@ export class LiveAudioTranscriptionSession {
*
* Usage:
* ```ts
* for await (const result of client.getTranscriptionStream()) {
* for await (const result of client.getStream()) {
* console.log(result.content[0].text);
* }
* ```
*/
public async *getTranscriptionStream(): AsyncGenerator<LiveAudioTranscriptionResponse> {
public async *getStream(): AsyncGenerator<LiveAudioTranscriptionResponse> {
if (!this.outputQueue) {
throw new Error('No active streaming session. Call start() first.');
}
if (this.streamConsumed) {
throw new Error('getTranscriptionStream() can only be called once per session. The output stream has already been consumed.');
throw new Error('getStream() can only be called once per session. The output stream has already been consumed.');
}
this.streamConsumed = true;

Expand All @@ -341,7 +341,7 @@ export class LiveAudioTranscriptionSession {
/**
* Signal end-of-audio and stop the streaming session.
* Any remaining buffered audio in the push queue will be drained to native core first.
* Final results are delivered through getTranscriptionStream() before it completes.
* Final results are delivered through getStream() before it completes.
*/
public async stop(): Promise<void> {
if (!this.started || this.stopped) {
Expand Down
6 changes: 3 additions & 3 deletions sdk/js/test/openai/liveAudioTranscription.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, it } from 'mocha';
import { expect } from 'chai';
import { parseTranscriptionResult, tryParseCoreError } from '../../src/openai/liveAudioTranscriptionTypes.js';
import { LiveAudioTranscriptionOptions } from '../../src/openai/liveAudioTranscriptionClient.js';
import { parseTranscriptionResult, tryParseCoreError } from '../../src/openai/liveAudioTypes.js';
import { LiveAudioTranscriptionOptions } from '../../src/openai/liveAudioSession.js';
import { getTestManager } from '../testUtils.js';
Comment thread
kunal-vaishnavi marked this conversation as resolved.

describe('Live Audio Transcription Types', () => {
Expand Down Expand Up @@ -160,7 +160,7 @@ describe('Live Audio Transcription Types', () => {
// Collect results in background (must start before pushing audio)
const results: any[] = [];
const readPromise = (async () => {
for await (const result of session.getTranscriptionStream()) {
for await (const result of session.getStream()) {
results.push(result);
}
})();
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/src/openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from .chat_client import ChatClient, ChatClientSettings
from .audio_client import AudioClient
from .embedding_client import EmbeddingClient
from .live_audio_transcription_client import LiveAudioTranscriptionSession
from .live_audio_transcription_types import (
from .live_audio_session import LiveAudioTranscriptionSession
from .live_audio_types import (
CoreErrorResponse,
LiveAudioTranscriptionOptions,
LiveAudioTranscriptionResponse,
Expand Down
4 changes: 2 additions & 2 deletions sdk/python/src/openai/audio_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from ..detail.core_interop import CoreInterop, InteropRequest
from ..exception import FoundryLocalException
from .live_audio_transcription_client import LiveAudioTranscriptionSession
from .live_audio_session import LiveAudioTranscriptionSession

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,7 +76,7 @@ def create_live_transcription_session(self) -> LiveAudioTranscriptionSession:
session.settings.sample_rate = 16000
session.start()
session.append(pcm_bytes)
for result in session.get_transcription_stream():
for result in session.get_stream():
print(result.content[0].text)
"""
return LiveAudioTranscriptionSession(self.model_id, self._core_interop)
Expand Down
Loading
Loading