Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ logs/

# Local NuGet packages built from source
local-packages/

# JS SDK downloaded native core binaries
sdk/js/foundry-local-core/
2 changes: 1 addition & 1 deletion samples/cs/live-audio-transcription/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ await model.DownloadAsync(progress =>
{
try
{
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
var text = result.Content?[0]?.Text;
if (result.IsFinal)
Expand Down
4 changes: 2 additions & 2 deletions samples/cs/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dotnet run -- --synth
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `NAudio.WaveInEvent` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.AppendAsync()` through a bounded channel for backpressure
5. Reads transcription results via `await foreach (var result in session.GetTranscriptionStream())`
5. Reads transcription results via `await foreach (var result in session.GetStream())`
6. Access text via `result.Content[0].Text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -54,7 +54,7 @@ await session.StartAsync();
await session.AppendAsync(pcmBytes);

// Read results
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
Console.WriteLine(result.Content[0].Text); // transcribed text
Console.WriteLine(result.Content[0].Transcript); // alias (OpenAI compat)
Expand Down
4 changes: 2 additions & 2 deletions samples/js/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Speak into your microphone. Transcription appears in real-time. Press `Ctrl+C` t
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `naudiodon2` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.append()`
5. Reads transcription results via `for await (const result of session.getTranscriptionStream())`
5. Reads transcription results via `for await (const result of session.getStream())`
6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -48,7 +48,7 @@ await session.start();
await session.append(pcmBytes);

// Read results
for await (const result of session.getTranscriptionStream()) {
for await (const result of session.getStream()) {
console.log(result.content[0].text); // transcribed text
console.log(result.content[0].transcript); // alias (OpenAI compat)
console.log(result.is_final); // true for final results
Expand Down
4 changes: 2 additions & 2 deletions samples/js/live-audio-transcription/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ console.log();
// Initialize the Foundry Local SDK
console.log('Initializing Foundry Local SDK...');
const manager = FoundryLocalManager.create({
appName: 'foundry',
appName: 'foundry_local_samples',
logLevel: 'info'
});
console.log('✓ SDK initialized');
Expand Down Expand Up @@ -55,7 +55,7 @@ console.log('✓ Session started');
// Read transcription results in background
const readPromise = (async () => {
try {
for await (const result of session.getTranscriptionStream()) {
for await (const result of session.getStream()) {
const text = result.content?.[0]?.text;
if (!text) continue;

Expand Down
4 changes: 2 additions & 2 deletions samples/python/live-audio-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ python src/app.py --synth
2. Creates a `LiveAudioTranscriptionSession` with 16kHz/16-bit/mono PCM settings
3. Captures microphone audio via `pyaudio` (or generates synthetic audio as fallback)
4. Pushes PCM chunks to the SDK via `session.append()`
5. Reads transcription results in a background thread via `for result in session.get_transcription_stream()`
5. Reads transcription results in a background thread via `for result in session.get_stream()`
6. Access text via `result.content[0].text` (OpenAI Realtime ConversationItem pattern)

## API
Expand All @@ -59,7 +59,7 @@ session.start()
session.append(pcm_bytes)

# Read results (typically on a background thread)
for result in session.get_transcription_stream():
for result in session.get_stream():
print(result.content[0].text) # transcribed text
print(result.content[0].transcript) # alias (OpenAI compat)
print(result.is_final) # True for final results
Expand Down
2 changes: 1 addition & 1 deletion samples/python/live-audio-transcription/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# --- Background thread reads transcription results (mirrors JS readPromise) ---

def read_results():
for result in session.get_transcription_stream():
for result in session.get_stream():
text = result.content[0].text if result.content else ""
if result.is_final:
print()
Expand Down
2 changes: 1 addition & 1 deletion samples/rust/live-audio-transcription/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("✓ Session started\n");

// --- Background task reads transcription results (mirrors JS readPromise) ---
let mut stream = session.get_transcription_stream().await?;
let mut stream = session.get_stream().await?;
let read_task = tokio::spawn(async move {
while let Some(result) = stream.next().await {
match result {
Expand Down
8 changes: 4 additions & 4 deletions sdk/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ endif()
add_library(CppSdk STATIC
src/model.cpp
src/catalog.cpp
src/openai_chat_client.cpp
src/openai_audio_client.cpp
src/openai_live_audio_types.cpp
src/openai_live_audio_client.cpp
src/chat_client.cpp
src/audio_client.cpp
src/live_audio_types.cpp
src/live_audio_session.cpp
src/foundry_local_manager.cpp
)

Expand Down
10 changes: 5 additions & 5 deletions sdk/cpp/include/foundry_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include "model.h"
#include "catalog.h"
#include "foundry_local_manager.h"
#include "openai/openai_tool_types.h"
#include "openai/openai_chat_client.h"
#include "openai/openai_audio_client.h"
#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
#include "openai/tool_types.h"
#include "openai/chat_client.h"
#include "openai/audio_client.h"
#include "openai/live_audio_types.h"
#include "openai/live_audio_session.h"
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <gsl/pointers>
#include <gsl/span>

#include "openai_tool_types.h"
#include "tool_types.h"

namespace foundry_local::Internal {
struct IFoundryLocalCore;
Expand Down Expand Up @@ -60,7 +60,7 @@ namespace foundry_local {

std::vector<ChatChoice> choices;

/// Returns the object type string. Derived from is_delta no allocation.
/// Returns the object type string. Derived from is_delta - no allocation.
const char* GetObject() const noexcept { return is_delta ? "chat.completion.chunk" : "chat.completion"; }

/// Returns the created timestamp as an ISO 8601 string.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include <gsl/pointers>

#include "openai_live_audio_types.h"
#include "live_audio_types.h"

namespace foundry_local::Internal {
struct IFoundryLocalCore;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "core_helpers.h"
#include "logger.h"

#include "openai/openai_live_audio_client.h"
#include "openai/live_audio_session.h"

namespace foundry_local {

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion sdk/cpp/src/core.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// Core DLL interop � loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Core DLL interop - loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Internal header, not part of the public API.

#pragma once
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

#include <nlohmann/json.hpp>

#include "openai/openai_live_audio_client.h"
#include "openai/openai_live_audio_types.h"
#include "openai/live_audio_session.h"
#include "openai/live_audio_types.h"
#include "foundry_local_internal_core.h"
#include "foundry_local_exception.h"
#include "core_interop_request.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include <nlohmann/json.hpp>

#include "openai/openai_live_audio_types.h"
#include "openai/live_audio_types.h"

namespace foundry_local {

Expand Down
4 changes: 2 additions & 2 deletions sdk/cpp/test/live_audio_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include "mock_object_factory.h"
#include "foundry_local_exception.h"

#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
#include "openai/live_audio_types.h"
#include "openai/live_audio_session.h"

#include <nlohmann/json.hpp>

Expand Down
4 changes: 2 additions & 2 deletions sdk/cs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ waveIn.DataAvailable += (sender, e) =>
};

// Read transcription results as they arrive
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
// result follows the OpenAI Realtime ConversationItem pattern:
// - result.Content[0].Text — incremental transcribed text (per chunk, not accumulated)
Expand Down Expand Up @@ -341,7 +341,7 @@ await session.StopAsync();
|--------|-------------|
| `StartAsync()` | Initialize the streaming session. Settings are frozen after this call. |
| `AppendAsync(pcmData)` | Push a chunk of raw PCM audio. Thread-safe (bounded internal queue). |
| `GetTranscriptionStream()` | Async enumerable of transcription results. |
| `GetStream()` | Async enumerable of transcription results. |
| `StopAsync()` | Signal end-of-audio, flush remaining audio, and clean up. |
| `DisposeAsync()` | Calls `StopAsync` if needed. Use `await using` for automatic cleanup. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
private bool _started;
private bool _stopped;

// Output channel: native callback writes, user reads via GetTranscriptionStream
// Output channel: native callback writes, user reads via GetStream
private Channel<LiveAudioTranscriptionResponse>? _outputChannel;

// Internal push queue: user writes audio chunks, background loop drains to native core.
Expand Down Expand Up @@ -90,7 +90,7 @@ internal LiveAudioTranscriptionSession(string modelId)

/// <summary>
/// Start a real-time audio streaming session.
/// Must be called before <see cref="AppendAsync"/> or <see cref="GetTranscriptionStream"/>.
/// Must be called before <see cref="AppendAsync"/> or <see cref="GetStream"/>.
/// Settings are frozen after this call.
/// </summary>
/// <param name="ct">Cancellation token.</param>
Expand Down Expand Up @@ -249,7 +249,7 @@ private async Task PushLoopAsync(CancellationToken ct)
/// </summary>
/// <param name="ct">Cancellation token.</param>
/// <returns>Async enumerable of transcription results.</returns>
public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetTranscriptionStream(
public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetStream(
[EnumeratorCancellation] CancellationToken ct = default)
{
if (_outputChannel == null)
Expand All @@ -266,7 +266,7 @@ public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetTranscriptionSt
/// <summary>
/// Signal end-of-audio and stop the streaming session.
/// Any remaining buffered audio in the push queue will be drained to native core first.
/// Final results are delivered through <see cref="GetTranscriptionStream"/> before it completes.
/// Final results are delivered through <see cref="GetStream"/> before it completes.
/// </summary>
/// <param name="ct">Cancellation token.</param>
public async Task StopAsync(CancellationToken ct = default)
Expand Down
6 changes: 3 additions & 3 deletions sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,14 @@ public async Task AppendAsync_BeforeStart_Throws()

[Test]
[SkipUnlessIntegration]
public async Task GetTranscriptionStream_BeforeStart_Throws()
public async Task GetStream_BeforeStart_Throws()
{
await using var session = new LiveAudioTranscriptionSession("test-model");

FoundryLocalException? caught = null;
try
{
await foreach (var _ in session.GetTranscriptionStream())
await foreach (var _ in session.GetStream())
{
// should not reach here
}
Expand Down Expand Up @@ -212,7 +212,7 @@ public async Task LiveStreaming_E2E_WithSyntheticPCM_ReturnsValidResponse()
var results = new List<LiveAudioTranscriptionResponse>();
var readTask = Task.Run(async () =>
{
await foreach (var result in session.GetTranscriptionStream())
await foreach (var result in session.GetStream())
{
results.Add(result);
}
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/detail/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { ChatClient } from '../openai/chatClient.js';
import { AudioClient } from '../openai/audioClient.js';
import { EmbeddingClient } from '../openai/embeddingClient.js';
import { ResponsesClient } from '../openai/responsesClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioSession.js';
import { IModel } from '../imodel.js';
import { ModelInfo } from '../types.js';

Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/detail/modelVariant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { ModelInfo } from '../types.js';
import { ChatClient } from '../openai/chatClient.js';
import { AudioClient } from '../openai/audioClient.js';
import { EmbeddingClient } from '../openai/embeddingClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from '../openai/liveAudioSession.js';
import { ResponsesClient } from '../openai/responsesClient.js';
import { IModel } from '../imodel.js';

Expand Down
7 changes: 0 additions & 7 deletions sdk/js/src/imodel.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { ChatClient } from './openai/chatClient.js';
import { AudioClient } from './openai/audioClient.js';
import { EmbeddingClient } from './openai/embeddingClient.js';
import { LiveAudioTranscriptionSession } from './openai/liveAudioTranscriptionClient.js';
import { ResponsesClient } from './openai/responsesClient.js';
import { ModelInfo } from './types.js';

Expand All @@ -28,12 +27,6 @@ export interface IModel {
createAudioClient(): AudioClient;
createEmbeddingClient(): EmbeddingClient;

/**
* Creates a LiveAudioTranscriptionSession for real-time audio streaming ASR.
* The model must be loaded before calling this method.
* @returns A LiveAudioTranscriptionSession instance.
*/
createLiveTranscriptionSession(): LiveAudioTranscriptionSession;
/**
* Creates a ResponsesClient for interacting with the model via the Responses API.
* Unlike createChatClient/createAudioClient (which use FFI), the Responses API
Expand Down
4 changes: 2 additions & 2 deletions sdk/js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ export type { IModel } from './imodel.js';
export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
export { EmbeddingClient } from './openai/embeddingClient.js';
export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioSession.js';
export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTypes.js';
export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
export { ModelLoadManager } from './detail/modelLoadManager.js';
/** @internal */
Expand Down
2 changes: 1 addition & 1 deletion sdk/js/src/openai/audioClient.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CoreInterop } from '../detail/coreInterop.js';
import { LiveAudioTranscriptionSession } from './liveAudioTranscriptionClient.js';
import { LiveAudioTranscriptionSession } from './liveAudioSession.js';

export class AudioClientSettings {
language?: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { CoreInterop } from '../detail/coreInterop.js';
import { LiveAudioTranscriptionResponse, parseTranscriptionResult, tryParseCoreError } from './liveAudioTranscriptionTypes.js';
import { LiveAudioTranscriptionResponse, parseTranscriptionResult, tryParseCoreError } from './liveAudioTypes.js';

/**
* Audio format settings for a streaming session.
Expand Down Expand Up @@ -191,7 +191,7 @@ export class LiveAudioTranscriptionSession {

/**
* Start a real-time audio streaming session.
* Must be called before append() or getTranscriptionStream().
* Must be called before append() or getStream().
* Settings are frozen after this call.
*/
public async start(): Promise<void> {
Expand Down Expand Up @@ -319,17 +319,17 @@ export class LiveAudioTranscriptionSession {
*
* Usage:
* ```ts
* for await (const result of client.getTranscriptionStream()) {
* for await (const result of session.getStream()) {
* console.log(result.content[0].text);
* }
* ```
*/
public async *getTranscriptionStream(): AsyncGenerator<LiveAudioTranscriptionResponse> {
public async *getStream(): AsyncGenerator<LiveAudioTranscriptionResponse> {
if (!this.outputQueue) {
throw new Error('No active streaming session. Call start() first.');
}
if (this.streamConsumed) {
throw new Error('getTranscriptionStream() can only be called once per session. The output stream has already been consumed.');
throw new Error('getStream() can only be called once per session. The output stream has already been consumed.');
}
this.streamConsumed = true;

Expand All @@ -341,7 +341,7 @@ export class LiveAudioTranscriptionSession {
/**
* Signal end-of-audio and stop the streaming session.
* Any remaining buffered audio in the push queue will be drained to native core first.
* Final results are delivered through getTranscriptionStream() before it completes.
* Final results are delivered through getStream() before it completes.
*/
public async stop(): Promise<void> {
if (!this.started || this.stopped) {
Expand Down
Loading
Loading