Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions samples/cs/embeddings/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,52 @@
// Initialize the singleton instance.
await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
var mgr = FoundryLocalManager.Instance;

// Discover available execution providers and their registration status.
var eps = mgr.DiscoverEps();
int maxNameLen = 30;
Console.WriteLine("Available execution providers:");
Console.WriteLine($" {"Name".PadRight(maxNameLen)} Registered");
Console.WriteLine($" {new string('─', maxNameLen)} {"──────────"}");
foreach (var ep in eps)
{
Console.WriteLine($" {ep.Name.PadRight(maxNameLen)} {ep.IsRegistered}");
}

// Download and register all execution providers with per-EP progress.
// EP packages include dependencies and may be large.
// Download is only required again if a new version of the EP is released.
// For cross platform builds there is no dynamic EP download and this will return immediately.
Console.WriteLine("\nDownloading execution providers:");
if (eps.Length > 0)
{
string currentEp = "";
await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
{
if (epName != currentEp)
{
if (currentEp != "")
{
Console.WriteLine();
}
currentEp = epName;
}
Console.Write($"\r {epName.PadRight(maxNameLen)} {percent,6:F1}%");
});
Console.WriteLine();
}
else
{
Console.WriteLine("No execution providers to download.");
}
// </init>

// <model_setup>
// Get the model catalog
var catalog = await mgr.GetCatalogAsync();

// Get an embedding model
var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");

// Download the model (the method skips download if already cached)
await model.DownloadAsync(progress =>
Expand All @@ -40,7 +78,7 @@

// <single_embedding>
// Get an embedding client
var embeddingClient = await model.GetEmbeddingClientAsync();

Check failure on line 81 in samples/cs/embeddings/Program.cs

View workflow job for this annotation

GitHub Actions / cs-samples (macos)

'IModel' does not contain a definition for 'GetEmbeddingClientAsync' and no accessible extension method 'GetEmbeddingClientAsync' accepting a first argument of type 'IModel' could be found (are you missing a using directive or an assembly reference?)

Check failure on line 81 in samples/cs/embeddings/Program.cs

View workflow job for this annotation

GitHub Actions / cs-samples (macos)

'IModel' does not contain a definition for 'GetEmbeddingClientAsync' and no accessible extension method 'GetEmbeddingClientAsync' accepting a first argument of type 'IModel' could be found (are you missing a using directive or an assembly reference?)

Check failure on line 81 in samples/cs/embeddings/Program.cs

View workflow job for this annotation

GitHub Actions / cs-samples (windows)

'IModel' does not contain a definition for 'GetEmbeddingClientAsync' and no accessible extension method 'GetEmbeddingClientAsync' accepting a first argument of type 'IModel' could be found (are you missing a using directive or an assembly reference?)

Check failure on line 81 in samples/cs/embeddings/Program.cs

View workflow job for this annotation

GitHub Actions / cs-samples (windows)

'IModel' does not contain a definition for 'GetEmbeddingClientAsync' and no accessible extension method 'GetEmbeddingClientAsync' accepting a first argument of type 'IModel' could be found (are you missing a using directive or an assembly reference?)

// Generate a single embedding
Console.WriteLine("\n--- Single Embedding ---");
Expand Down Expand Up @@ -69,6 +107,5 @@
// <cleanup>
// Tidy up - unload the model
await model.UnloadAsync();
Console.WriteLine("\nModel unloaded.");
// </cleanup>
// </complete_code>
32 changes: 31 additions & 1 deletion samples/js/embeddings/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,39 @@ const manager = FoundryLocalManager.create({
// </init>
console.log('✓ SDK initialized successfully');

// Discover available execution providers and their registration status.
const eps = manager.discoverEps();
const maxNameLen = 30;
console.log('\nAvailable execution providers:');
console.log(` ${'Name'.padEnd(maxNameLen)} Registered`);
console.log(` ${'─'.repeat(maxNameLen)} ──────────`);
for (const ep of eps) {
console.log(` ${ep.name.padEnd(maxNameLen)} ${ep.isRegistered}`);
}

// Download and register all execution providers with per-EP progress.
// EP packages include dependencies and may be large.
// Download is only required again if a new version of the EP is released.
console.log('\nDownloading execution providers:');
if (eps.length > 0) {
let currentEp = '';
await manager.downloadAndRegisterEps((epName, percent) => {
if (epName !== currentEp) {
if (currentEp !== '') {
process.stdout.write('\n');
}
currentEp = epName;
}
process.stdout.write(`\r ${epName.padEnd(maxNameLen)} ${percent.toFixed(1).padStart(5)}%`);
});
process.stdout.write('\n');
} else {
console.log('No execution providers to download.');
}

// <model_setup>
// Get an embedding model
const modelAlias = 'qwen3-0.6b-embedding';
const modelAlias = 'qwen3-embedding-0.6b';
const model = await manager.catalog.getModel(modelAlias);

// Download the model
Expand Down
29 changes: 28 additions & 1 deletion samples/python/embeddings/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,35 @@ def main():
FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance

# Discover available execution providers and their registration status.
eps = manager.discover_eps()
max_name_len = 30
print("Available execution providers:")
print(f" {'Name':<{max_name_len}} Registered")
print(f" {'─' * max_name_len} ──────────")
for ep in eps:
print(f" {ep.name:<{max_name_len}} {ep.is_registered}")

# Download and register all execution providers.
print("\nDownloading execution providers:")
current_ep = ""
def ep_progress(ep_name: str, percent: float):
nonlocal current_ep
if ep_name != current_ep:
if current_ep:
print()
current_ep = ep_name
print(f"\r {ep_name:<{max_name_len}} {percent:5.1f}%", end="", flush=True)

if eps:
manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
else:
print("No execution providers to download.")

# Select and load an embedding model from the catalog
model = manager.catalog.get_model("qwen3-0.6b-embedding")
model = manager.catalog.get_model("qwen3-embedding-0.6b")
model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
Expand Down
37 changes: 36 additions & 1 deletion samples/rust/embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
// Licensed under the MIT License.

// <imports>
use std::io::{self, Write};

use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
// </imports>

const ALIAS: &str = "qwen3-0.6b-embedding";
const ALIAS: &str = "qwen3-embedding-0.6b";

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand All @@ -18,6 +20,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
// </init>

// Discover available execution providers and their registration status.
let eps = manager.discover_eps()?;
let max_name_len = 30;
println!("Available execution providers:");
println!(" {:<width$} Registered", "Name", width = max_name_len);
println!(" {:─<width$} ──────────", "", width = max_name_len);
for ep in &eps {
println!(" {:<width$} {}", ep.name, ep.is_registered, width = max_name_len);
}

// Download and register all execution providers.
println!("\nDownloading execution providers:");
if !eps.is_empty() {
manager
.download_and_register_eps_with_progress(None, {
let mut current_ep = String::new();
move |ep_name: &str, percent: f64| {
if ep_name != current_ep {
if !current_ep.is_empty() {
println!();
}
current_ep = ep_name.to_string();
}
print!("\r {:<width$} {:5.1}%", ep_name, percent, width = max_name_len);
io::stdout().flush().ok();
}
})
.await?;
println!();
} else {
println!("No execution providers to download.");
}

// ── 2. Pick a model and ensure it is downloaded ─────────────────────
// <model_setup>
let model = manager.catalog().get_model(ALIAS).await?;
Expand Down
9 changes: 9 additions & 0 deletions sdk/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Enable MSVC exception handling so wil's exception-based APIs (e.g. the
# one-arg GetModuleFileNameW template and THROW_IF_FAILED) are defined.
# Without /EHsc, WIL_ENABLE_EXCEPTIONS is not set and those declarations
# are omitted from <wil/win32_helpers.h>.
if (MSVC)
add_compile_options(/EHsc)
endif()

# Optional: target Windows 10+ APIs (adjust if you need older)
add_compile_definitions(_WIN32_WINNT=0x0A00 WINVER=0x0A00)

Expand All @@ -56,6 +64,7 @@ add_library(CppSdk STATIC
src/openai_audio_client.cpp
src/openai_live_audio_types.cpp
src/openai_live_audio_client.cpp
src/openai_embedding_client.cpp
src/foundry_local_manager.cpp
)

Expand Down
1 change: 1 addition & 0 deletions sdk/cpp/include/foundry_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
#include "openai/openai_audio_client.h"
#include "openai/openai_live_audio_types.h"
#include "openai/openai_live_audio_client.h"
#include "openai/openai_embedding_client.h"
2 changes: 2 additions & 0 deletions sdk/cpp/include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace foundry_local {
class OpenAIChatClient;
class OpenAIAudioClient;
class OpenAIEmbeddingClient;
}

namespace foundry_local::Internal {
Expand Down Expand Up @@ -59,6 +60,7 @@ namespace foundry_local {

friend class OpenAIChatClient;
friend class OpenAIAudioClient;
friend class OpenAIEmbeddingClient;
};

enum class DeviceType {
Expand Down
64 changes: 64 additions & 0 deletions sdk/cpp/include/openai/openai_embedding_client.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <string>
#include <string_view>
#include <vector>
#include <optional>

#include <gsl/pointers>
#include <gsl/span>

namespace foundry_local::Internal {
struct IFoundryLocalCore;
}

namespace foundry_local {
class ILogger;
class IModel;

struct EmbeddingObject {
int index = 0;
std::vector<float> embedding;
};

struct EmbeddingUsage {
std::optional<int> prompt_tokens;
std::optional<int> total_tokens;
};

struct EmbeddingCreateResponse {
std::string model;
std::string object; ///< Always "list"
std::vector<EmbeddingObject> data;
std::optional<EmbeddingUsage> usage;
};

class OpenAIEmbeddingClient final {
public:
explicit OpenAIEmbeddingClient(const IModel& model);

/// Returns the model ID this client was created for.
const std::string& GetModelId() const noexcept { return modelId_; }

/// Generate embedding for a single input string.
EmbeddingCreateResponse GenerateEmbedding(std::string_view input) const;

/// Generate embeddings for multiple input strings in a single request.
EmbeddingCreateResponse GenerateEmbeddings(gsl::span<const std::string> inputs) const;

private:
OpenAIEmbeddingClient(gsl::not_null<foundry_local::Internal::IFoundryLocalCore*> core, std::string_view modelId,
gsl::not_null<ILogger*> logger);

std::string BuildSingleRequestJson(std::string_view input) const;
std::string BuildBatchRequestJson(gsl::span<const std::string> inputs) const;

std::string modelId_;
gsl::not_null<foundry_local::Internal::IFoundryLocalCore*> core_;
gsl::not_null<ILogger*> logger_;
};

} // namespace foundry_local
43 changes: 43 additions & 0 deletions sdk/cpp/sample/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,46 @@ void ChatWithToolCalling(Manager& manager, const std::string& alias) {
std::cout << "Model unloaded.\n";
}

// ---------------------------------------------------------------------------
// Example 6 – Embeddings (single and batch)
// ---------------------------------------------------------------------------
void GenerateEmbeddings(Manager& manager, const std::string& alias) {
std::cout << "\n=== Example 6: Embeddings ===\n";

auto& catalog = manager.GetCatalog();

auto* model = catalog.GetModel(alias);
if (!model) {
std::cerr << "Model '" << alias << "' not found in catalog.\n";
return;
}

model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; });
std::cout << "\n";

model->Load();

OpenAIEmbeddingClient embeddings(*model);

// Single input
auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog");
if (!single.data.empty()) {
std::cout << "Single embedding: dim=" << single.data[0].embedding.size() << "\n";
}

// Batch input
std::vector<std::string> inputs = {"The capital of France is Paris", "Machine learning is a subset of AI"};
auto batch = embeddings.GenerateEmbeddings(inputs);
std::cout << "Batch embeddings: count=" << batch.data.size();
if (!batch.data.empty()) {
std::cout << " dim=" << batch.data[0].embedding.size();
}
std::cout << "\n";

model->Unload();
std::cout << "Model unloaded.\n";
}

// ---------------------------------------------------------------------------
// main
// ---------------------------------------------------------------------------
Expand All @@ -346,6 +386,9 @@ int main() {
// 5. Tool calling (define tools, let the model call them, feed results back)
ChatWithToolCalling(manager, "phi-3.5-mini");

// 6. Embeddings — generate single and batch embeddings
GenerateEmbeddings(manager, "qwen3-embedding-0.6b");

Manager::Destroy();
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion sdk/cpp/src/core.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// Core DLL interop � loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Core DLL interop loads Microsoft.AI.Foundry.Local.Core.dll at runtime.
// Internal header, not part of the public API.

#pragma once
Expand Down
Loading
Loading