diff --git a/samples/cs/audio-transcription-example/Program.cs b/samples/cs/audio-transcription-example/Program.cs index b78e13d2c..ac5689c1f 100644 --- a/samples/cs/audio-transcription-example/Program.cs +++ b/samples/cs/audio-transcription-example/Program.cs @@ -20,7 +20,7 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // @@ -56,6 +56,7 @@ await model.DownloadAsync(progress => // // Get an audio client var audioClient = await model.GetAudioClientAsync(); +audioClient.Settings.Language = "en"; // Get a transcription with streaming outputs var audioFile = args.Length > 0 ? args[0] : Path.Combine(AppContext.BaseDirectory, "Recording.mp3"); diff --git a/samples/cs/foundry-local-web-server/Program.cs b/samples/cs/foundry-local-web-server/Program.cs index 3ca68854a..9225ad7d9 100644 --- a/samples/cs/foundry-local-web-server/Program.cs +++ b/samples/cs/foundry-local-web-server/Program.cs @@ -26,7 +26,7 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // diff --git a/samples/cs/live-audio-transcription-example/Program.cs b/samples/cs/live-audio-transcription-example/Program.cs index 68bba83fc..9b4e5921f 100644 --- a/samples/cs/live-audio-transcription-example/Program.cs +++ b/samples/cs/live-audio-transcription-example/Program.cs @@ -20,7 +20,7 @@ await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger()); var mgr = FoundryLocalManager.Instance; -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await mgr.DownloadAndRegisterEpsAsync(); var catalog = await mgr.GetCatalogAsync(); diff --git a/samples/cs/model-management-example/Program.cs b/samples/cs/model-management-example/Program.cs index 38dec588f..a34d2737a 100644 --- a/samples/cs/model-management-example/Program.cs +++ b/samples/cs/model-management-example/Program.cs @@ -16,11 +16,8 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. -// EP packages include dependencies and may be large. -// Download is only required again if a new version of the EP is released. -// For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +// Download and register all execution providers. +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // Model catalog operations diff --git a/samples/cs/native-chat-completions/Program.cs b/samples/cs/native-chat-completions/Program.cs index 082a19f5e..d15275038 100644 --- a/samples/cs/native-chat-completions/Program.cs +++ b/samples/cs/native-chat-completions/Program.cs @@ -19,11 +19,43 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. +// Discover available execution providers and their registration status. +var eps = mgr.DiscoverEps(); +Console.WriteLine("Available execution providers:"); +foreach (var ep in eps) +{ + Console.WriteLine($" {ep.Name} (registered: {ep.IsRegistered})"); +} + +// Download and register all execution providers with per-EP progress. // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +if (eps.Length > 0) +{ + int maxNameLen = eps.Max(e => e.Name.Length); + string currentEp = ""; + await mgr.DownloadAndRegisterEpsAsync((epName, percent) => + { + if (epName != currentEp) + { + if (currentEp != "") + { + Console.WriteLine(); + } + currentEp = epName; + } + Console.Write($"\r {epName.PadRight(maxNameLen)} {percent,6:F1}%"); + if (percent >= 100) + { + Console.WriteLine(); + } + }); +} +else +{ + Console.WriteLine("No execution providers to download."); +} // diff --git a/samples/cs/tool-calling-foundry-local-sdk/Program.cs b/samples/cs/tool-calling-foundry-local-sdk/Program.cs index bbb050c03..8ac96369f 100644 --- a/samples/cs/tool-calling-foundry-local-sdk/Program.cs +++ b/samples/cs/tool-calling-foundry-local-sdk/Program.cs @@ -26,7 +26,7 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // diff --git a/samples/cs/tool-calling-foundry-local-web-server/Program.cs b/samples/cs/tool-calling-foundry-local-web-server/Program.cs index 4c283cd47..48ee6c6f5 100644 --- a/samples/cs/tool-calling-foundry-local-web-server/Program.cs +++ b/samples/cs/tool-calling-foundry-local-web-server/Program.cs @@ -21,11 +21,8 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. -// EP packages include dependencies and may be large. -// Download is only required again if a new version of the EP is released. -// For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +// Download and register all execution providers. +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // Get the model catalog diff --git a/samples/js/native-chat-completions/app.js b/samples/js/native-chat-completions/app.js index 399fd6345..4246f64fb 100644 --- a/samples/js/native-chat-completions/app.js +++ b/samples/js/native-chat-completions/app.js @@ -14,6 +14,35 @@ const manager = FoundryLocalManager.create({ // console.log('✓ SDK initialized successfully'); +// Discover available execution providers and their registration status. +const eps = manager.discoverEps(); +console.log('\nAvailable execution providers:'); +for (const ep of eps) { + console.log(` ${ep.name} (registered: ${ep.isRegistered})`); +} + +// Download and register all execution providers with per-EP progress. +// EP packages include dependencies and may be large. +// Download is only required again if a new version of the EP is released. +if (eps.length > 0) { + const maxNameLen = Math.max(...eps.map(e => e.name.length)); + let currentEp = ''; + await manager.downloadAndRegisterEps((epName, percent) => { + if (epName !== currentEp) { + if (currentEp !== '') { + process.stdout.write('\n'); + } + currentEp = epName; + } + process.stdout.write(`\r ${epName.padEnd(maxNameLen)} ${percent.toFixed(1).padStart(5)}%`); + if (percent >= 100) { + process.stdout.write('\n'); + } + }); +} else { + console.log('No execution providers to download.'); +} + // // Get the model object const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above diff --git a/sdk/cs/README.md b/sdk/cs/README.md index 7037814b6..ad6f477a7 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -48,7 +48,10 @@ dotnet build src/Microsoft.AI.Foundry.Local.csproj /p:UseWinML=true ### Triggering EP download -EP download can be time-consuming. Call `DownloadAndRegisterEpsAsync` early (after initialization) to separate the download step from catalog access: +EP management is explicit via two methods: + +- **`DiscoverEps()`** — returns an array of `EpInfo` describing each available EP and whether it is already registered. +- **`DownloadAndRegisterEpsAsync(names?, progressCallback?, ct?)`** — downloads and registers the specified EPs (or all available EPs if no names are given). Returns an `EpDownloadResult`. Overloads are provided so you can pass just a callback without specifying names. ```csharp // Initialize the manager first (see Quick Start) @@ -56,13 +59,49 @@ await FoundryLocalManager.CreateAsync( new Configuration { AppName = "my-app" }, NullLogger.Instance); -await FoundryLocalManager.Instance.DownloadAndRegisterEpsAsync(); +var mgr = FoundryLocalManager.Instance; -// Now catalog access won't trigger an EP download -var catalog = await FoundryLocalManager.Instance.GetCatalogAsync(); +// Discover what EPs are available +var eps = mgr.DiscoverEps(); +foreach (var ep in eps) +{ + Console.WriteLine($"{ep.Name} — registered: {ep.IsRegistered}"); +} + +// Download and register all EPs +var result = await mgr.DownloadAndRegisterEpsAsync(); +Console.WriteLine($"Success: {result.Success}, Status: {result.Status}"); + +// Or download only specific EPs +var result2 = await mgr.DownloadAndRegisterEpsAsync(new[] { eps[0].Name }); +``` + +#### Per-EP download progress + +Pass an optional `Action` callback to receive `(epName, percent)` updates +as each EP downloads (`percent` is 0–100): + +```csharp +string currentEp = ""; +await mgr.DownloadAndRegisterEpsAsync((epName, percent) => +{ + if (epName != currentEp) + { + if (currentEp != "") + { + Console.WriteLine(); + } + currentEp = epName; + } + Console.Write($"\r {epName} {percent,6:F1}%"); + if (percent >= 100) + { + Console.WriteLine(); + } +}); ``` -If you skip this step, EPs are downloaded automatically the first time you access the catalog. Once cached, subsequent calls are fast. +Catalog access no longer blocks on EP downloads. Call `DownloadAndRegisterEpsAsync` explicitly when you need hardware-accelerated execution providers. ## Quick Start diff --git a/sdk/cs/docs/api/index.md b/sdk/cs/docs/api/index.md index 1dcc4e4c3..4d084f875 100644 --- a/sdk/cs/docs/api/index.md +++ b/sdk/cs/docs/api/index.md @@ -6,6 +6,10 @@ [DeviceType](./microsoft.ai.foundry.local.devicetype.md) +[EpDownloadResult](./microsoft.ai.foundry.local.epdownloadresult.md) + +[EpInfo](./microsoft.ai.foundry.local.epinfo.md) + [FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md) [FoundryLocalManager](./microsoft.ai.foundry.local.foundrylocalmanager.md) @@ -22,8 +26,6 @@ [ModelSettings](./microsoft.ai.foundry.local.modelsettings.md) -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md) - [OpenAIAudioClient](./microsoft.ai.foundry.local.openaiaudioclient.md) [OpenAIChatClient](./microsoft.ai.foundry.local.openaichatclient.md) @@ -39,3 +41,11 @@ [AsyncLock](./microsoft.ai.foundry.local.detail.asynclock.md) [CoreInteropRequest](./microsoft.ai.foundry.local.detail.coreinteroprequest.md) + +## Microsoft.AI.Foundry.Local.OpenAI + +[LiveAudioTranscriptionResponse](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionresponse.md) + +[LiveAudioTranscriptionSession](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md) + +[ResponseFormatExtended](./microsoft.ai.foundry.local.openai.responseformatextended.md) diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md new file mode 100644 index 000000000..c9ebeb82a --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md @@ -0,0 +1,59 @@ +# EpDownloadResult + +Namespace: Microsoft.AI.Foundry.Local + +Result of an explicit EP download and registration operation. + +```csharp +public record EpDownloadResult +``` + +## Properties + +### **Success** + +True if all requested EPs were successfully downloaded and registered. + +```csharp +public bool Success { get; set; } +``` + +#### Property Value + +[Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
+ +### **Status** + +Human-readable status message. + +```csharp +public string Status { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **RegisteredEps** + +Names of EPs that were successfully registered. + +```csharp +public String[] RegisteredEps { get; set; } +``` + +#### Property Value + +[String[]](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **FailedEps** + +Names of EPs that failed to register. + +```csharp +public String[] FailedEps { get; set; } +``` + +#### Property Value + +[String[]](https://docs.microsoft.com/en-us/dotnet/api/system.string)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md new file mode 100644 index 000000000..d2df44d3b --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md @@ -0,0 +1,35 @@ +# EpInfo + +Namespace: Microsoft.AI.Foundry.Local + +Describes a discoverable execution provider bootstrapper. + +```csharp +public record EpInfo +``` + +## Properties + +### **Name** + +The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + +```csharp +public string Name { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **IsRegistered** + +True if this EP has already been successfully downloaded and registered. + +```csharp +public bool IsRegistered { get; set; } +``` + +#### Property Value + +[Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md index 9e5be8aa2..5f1ba50e9 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md @@ -96,9 +96,9 @@ The model catalog. **Remarks:** -The catalog is populated on first use. - If you are using a WinML build this will trigger a one-off execution provider download if not already done. - It is recommended to call [FoundryLocalManager.DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)](./microsoft.ai.foundry.local.foundrylocalmanager.md#downloadandregisterepsasyncnullablecancellationtoken) first to separate out the two steps. +The catalog is populated on first use and returns models based on currently available execution providers. + To ensure all hardware-accelerated models are listed, call [FoundryLocalManager.DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)](./microsoft.ai.foundry.local.foundrylocalmanager.md#downloadandregisterepsasyncnullablecancellationtoken) first to + register execution providers, then access the catalog. ### **StartWebServiceAsync(Nullable<CancellationToken>)** @@ -141,17 +141,26 @@ Optional cancellation token. [Task](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task)
Task stopping the web service. +### **DiscoverEps()** + +Discovers all available execution provider bootstrappers. + Returns metadata about each EP including whether it is already registered. + +```csharp +public EpInfo[] DiscoverEps() +``` + +#### Returns + +[EpInfo[]](./microsoft.ai.foundry.local.epinfo.md)
+Array of EP bootstrapper info describing available EPs. + ### **DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)** -Download and register execution providers. - Only relevant when using WinML. - - Execution provider download can be time consuming due to the size of the packages. - Once downloaded, EPs are not re-downloaded unless a new version is available, so this method will be fast - on subsequent calls. +Downloads and registers all available execution providers. ```csharp -public Task DownloadAndRegisterEpsAsync(Nullable ct) +public Task DownloadAndRegisterEpsAsync(Nullable ct) ``` #### Parameters @@ -161,7 +170,104 @@ Optional cancellation token. #### Returns -[Task](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task)
+[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(IEnumerable<String>, Nullable<CancellationToken>)** + +Downloads and registers the specified execution providers. + +```csharp +public Task DownloadAndRegisterEpsAsync(IEnumerable names, Nullable ct) +``` + +#### Parameters + +`names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(Action<String, Double>, Nullable<CancellationToken>)** + +Downloads and registers all available execution providers, reporting progress. + +```csharp +public Task DownloadAndRegisterEpsAsync(Action progressCallback, Nullable ct) +``` + +#### Parameters + +`progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
+Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(IEnumerable<String>, Action<String, Double>, Nullable<CancellationToken>)** + +Downloads and registers the specified execution providers, reporting progress. + +```csharp +public Task DownloadAndRegisterEpsAsync(IEnumerable names, Action progressCallback, Nullable ct) +``` + +#### Parameters + +`names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). + +`progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
+Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **Dispose(Boolean)** + +```csharp +protected void Dispose(bool disposing) +``` + +#### Parameters + +`disposing` [Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
### **Dispose()** diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md index dc68c173f..6a3858b2c 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md @@ -29,7 +29,7 @@ public abstract string Name { get; } List the available models in the catalog. ```csharp -Task> ListModelsAsync(Nullable ct) +Task> ListModelsAsync(Nullable ct) ``` #### Parameters @@ -39,15 +39,15 @@ Optional CancellationToken. #### Returns -[Task<List<Model>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of Model instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. ### **GetModelAsync(String, Nullable<CancellationToken>)** Lookup a model by its alias. ```csharp -Task GetModelAsync(string modelAlias, Nullable ct) +Task GetModelAsync(string modelAlias, Nullable ct) ``` #### Parameters @@ -60,15 +60,17 @@ Optional CancellationToken. #### Returns -[Task<Model>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-The matching Model, or null if no model with the given alias exists. +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The matching IModel, or null if no model with the given alias exists. ### **GetModelVariantAsync(String, Nullable<CancellationToken>)** Lookup a model variant by its unique model id. + NOTE: This will return an IModel with a single variant. Use GetModelAsync to get an IModel with all available + variants. ```csharp -Task GetModelVariantAsync(string modelId, Nullable ct) +Task GetModelVariantAsync(string modelId, Nullable ct) ``` #### Parameters @@ -81,15 +83,15 @@ Optional CancellationToken. #### Returns -[Task<ModelVariant>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-The matching ModelVariant, or null if no variant with the given id exists. +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The matching IModel, or null if no variant with the given id exists. ### **GetCachedModelsAsync(Nullable<CancellationToken>)** Get a list of currently downloaded models from the model cache. ```csharp -Task> GetCachedModelsAsync(Nullable ct) +Task> GetCachedModelsAsync(Nullable ct) ``` #### Parameters @@ -99,15 +101,15 @@ Optional CancellationToken. #### Returns -[Task<List<ModelVariant>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of ModelVariant instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. ### **GetLoadedModelsAsync(Nullable<CancellationToken>)** Get a list of the currently loaded models. ```csharp -Task> GetLoadedModelsAsync(Nullable ct) +Task> GetLoadedModelsAsync(Nullable ct) ``` #### Parameters @@ -117,5 +119,27 @@ Optional CancellationToken. #### Returns -[Task<List<ModelVariant>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of ModelVariant instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. + +### **GetLatestVersionAsync(IModel, Nullable<CancellationToken>)** + +Get the latest version of a model. + This is used to check if a newer version of a model is available in the catalog for download. + +```csharp +Task GetLatestVersionAsync(IModel model, Nullable ct) +``` + +#### Parameters + +`model` [IModel](./microsoft.ai.foundry.local.imodel.md)
+The model to check for the latest version. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional CancellationToken. + +#### Returns + +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The latest version of the model. Will match the input if it is the latest version. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md index d5d2b4370..861386a87 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md @@ -30,6 +30,28 @@ public abstract string Alias { get; } [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+### **Info** + +```csharp +public abstract ModelInfo Info { get; } +``` + +#### Property Value + +[ModelInfo](./microsoft.ai.foundry.local.modelinfo.md)
+ +### **Variants** + +Variants of the model that are available. Variants of the model are optimized for different devices. + +```csharp +public abstract IReadOnlyList Variants { get; } +``` + +#### Property Value + +[IReadOnlyList<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ireadonlylist-1)
+ ## Methods ### **IsCachedAsync(Nullable<CancellationToken>)** @@ -185,3 +207,22 @@ Optional cancellation token. [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
OpenAI.AudioClient + +### **SelectVariant(IModel)** + +Select a model variant from [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. + An IModel from `Variants` can also be used directly. + +```csharp +void SelectVariant(IModel variant) +``` + +#### Parameters + +`variant` [IModel](./microsoft.ai.foundry.local.imodel.md)
+Model variant to select. Must be one of the variants in [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants). + +#### Exceptions + +[FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
+If variant is not valid for this model. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md index c63b78a44..23cd67a38 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md @@ -15,42 +15,42 @@ Attributes [NullableContextAttribute](https://docs.microsoft.com/en-us/dotnet/ap ### **Variants** ```csharp -public List Variants { get; internal set; } +public IReadOnlyList Variants { get; } ``` #### Property Value -[List<ModelVariant>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.list-1)
+[IReadOnlyList<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ireadonlylist-1)
-### **SelectedVariant** +### **Alias** ```csharp -public ModelVariant SelectedVariant { get; internal set; } +public string Alias { get; set; } ``` #### Property Value -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
+[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
-### **Alias** +### **Id** ```csharp -public string Alias { get; set; } +public string Id { get; } ``` #### Property Value [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
-### **Id** +### **Info** ```csharp -public string Id { get; } +public ModelInfo Info { get; } ``` #### Property Value -[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+[ModelInfo](./microsoft.ai.foundry.local.modelinfo.md)
## Methods @@ -86,17 +86,17 @@ public Task IsLoadedAsync(Nullable ct) [Task<Boolean>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-### **SelectVariant(ModelVariant)** +### **SelectVariant(IModel)** Select a specific model variant from [Model.Variants](./microsoft.ai.foundry.local.model.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. ```csharp -public void SelectVariant(ModelVariant variant) +public void SelectVariant(IModel variant) ``` #### Parameters -`variant` [ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
+`variant` [IModel](./microsoft.ai.foundry.local.imodel.md)
Model variant to select. Must be one of the variants in [Model.Variants](./microsoft.ai.foundry.local.model.md#variants). #### Exceptions @@ -104,29 +104,6 @@ Model variant to select. Must be one of the variants in [Model.Variants](./micro [FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
If variant is not valid for this model. -### **GetLatestVersion(ModelVariant)** - -Get the latest version of the specified model variant. - -```csharp -public ModelVariant GetLatestVersion(ModelVariant variant) -``` - -#### Parameters - -`variant` [ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
-Model variant. - -#### Returns - -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
-ModelVariant for latest version. Same as `variant` if that is the latest version. - -#### Exceptions - -[FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
-If variant is not valid for this model. - ### **GetPathAsync(Nullable<CancellationToken>)** ```csharp diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md index 750253c1b..1716e3b2f 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md @@ -222,6 +222,46 @@ public long CreatedAtUnix { get; set; } [Int64](https://docs.microsoft.com/en-us/dotnet/api/system.int64)
+### **ContextLength** + +```csharp +public Nullable ContextLength { get; set; } +``` + +#### Property Value + +[Nullable<Int64>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +### **InputModalities** + +```csharp +public string InputModalities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **OutputModalities** + +```csharp +public string OutputModalities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **Capabilities** + +```csharp +public string Capabilities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ ## Constructors ### **ModelInfo()** diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md index bcaefc04e..b1b60bd86 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md @@ -71,3 +71,17 @@ Cancellation token. [IAsyncEnumerable<AudioCreateTranscriptionResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.iasyncenumerable-1)
An asynchronous enumerable of transcription responses. + +### **CreateLiveTranscriptionSession()** + +Create a real-time streaming transcription session. + Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. + +```csharp +public LiveAudioTranscriptionSession CreateLiveTranscriptionSession() +``` + +#### Returns + +[LiveAudioTranscriptionSession](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md)
+A streaming session that must be disposed when done. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md index 251e474ce..43e00f6de 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md @@ -51,6 +51,32 @@ Optional cancellation token. [Task<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
Chat completion response. +### **CompleteChatAsync(IEnumerable<ChatMessage>, IEnumerable<ToolDefinition>, Nullable<CancellationToken>)** + +Execute a chat completion request. + + To continue a conversation, add the ChatMessage from the previous response and new prompt to the messages. + +```csharp +public Task CompleteChatAsync(IEnumerable messages, IEnumerable tools, Nullable ct) +``` + +#### Parameters + +`messages` [IEnumerable<ChatMessage>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Chat messages. The system message is automatically added. + +`tools` [IEnumerable<ToolDefinition>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Optional tool definitions to include in the request. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Chat completion response. + ### **CompleteChatStreamingAsync(IEnumerable<ChatMessage>, CancellationToken)** Execute a chat completion request with streamed output. @@ -67,7 +93,33 @@ public IAsyncEnumerable CompleteChatStreamingAsync Chat messages. The system message is automatically added. `ct` [CancellationToken](https://docs.microsoft.com/en-us/dotnet/api/system.threading.cancellationtoken)
-Optional cancellation token. +Cancellation token. + +#### Returns + +[IAsyncEnumerable<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.iasyncenumerable-1)
+Async enumerable of chat completion responses. + +### **CompleteChatStreamingAsync(IEnumerable<ChatMessage>, IEnumerable<ToolDefinition>, CancellationToken)** + +Execute a chat completion request with streamed output. + + To continue a conversation, add the ChatMessage from the previous response and new prompt to the messages. + +```csharp +public IAsyncEnumerable CompleteChatStreamingAsync(IEnumerable messages, IEnumerable tools, CancellationToken ct) +``` + +#### Parameters + +`messages` [IEnumerable<ChatMessage>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Chat messages. The system message is automatically added. + +`tools` [IEnumerable<ToolDefinition>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Optional tool definitions to include in the request. + +`ct` [CancellationToken](https://docs.microsoft.com/en-us/dotnet/api/system.threading.cancellationtoken)
+Cancellation token. #### Returns diff --git a/sdk/cs/src/Catalog.cs b/sdk/cs/src/Catalog.cs index 5cdb050fb..f33dcaff5 100644 --- a/sdk/cs/src/Catalog.cs +++ b/sdk/cs/src/Catalog.cs @@ -240,6 +240,11 @@ private async Task UpdateModels(CancellationToken? ct) _lastFetch = DateTime.Now; } + internal void InvalidateCache() + { + _lastFetch = DateTime.MinValue; + } + public void Dispose() { _lock.Dispose(); diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs index 3fefd305b..37cc81ac8 100644 --- a/sdk/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk/cs/src/Detail/JsonSerializationContext.cs @@ -24,6 +24,8 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(AudioCreateTranscriptionRequest))] [JsonSerializable(typeof(AudioCreateTranscriptionResponse))] [JsonSerializable(typeof(string[]))] // list loaded or cached models +[JsonSerializable(typeof(EpInfo[]))] +[JsonSerializable(typeof(EpDownloadResult))] [JsonSerializable(typeof(JsonElement))] [JsonSerializable(typeof(ResponseFormatExtended))] [JsonSerializable(typeof(ToolChoice))] diff --git a/sdk/cs/src/EpInfo.cs b/sdk/cs/src/EpInfo.cs new file mode 100644 index 000000000..d170ac0ed --- /dev/null +++ b/sdk/cs/src/EpInfo.cs @@ -0,0 +1,45 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local; + +using System.Text.Json.Serialization; + +/// +/// Describes a discoverable execution provider bootstrapper. +/// +public record EpInfo +{ + /// The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + [JsonPropertyName("Name")] + public required string Name { get; init; } + + /// True if this EP has already been successfully downloaded and registered. + [JsonPropertyName("IsRegistered")] + public required bool IsRegistered { get; init; } +} + +/// +/// Result of an explicit EP download and registration operation. +/// +public record EpDownloadResult +{ + /// True if all requested EPs were successfully downloaded and registered. + [JsonPropertyName("Success")] + public required bool Success { get; init; } + + /// Human-readable status message. + [JsonPropertyName("Status")] + public required string Status { get; init; } + + /// Names of EPs that were successfully registered. + [JsonPropertyName("RegisteredEps")] + public required string[] RegisteredEps { get; init; } + + /// Names of EPs that failed to register. + [JsonPropertyName("FailedEps")] + public required string[] FailedEps { get; init; } +} diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index d3e4fb79d..10b51285f 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -97,9 +97,9 @@ public static async Task CreateAsync(Configuration configuration, ILogger logger /// Optional cancellation token. /// The model catalog. /// - /// The catalog is populated on first use. - /// If you are using a WinML build this will trigger a one-off execution provider download if not already done. - /// It is recommended to call first to separate out the two steps. + /// The catalog is populated on first use and returns models based on currently available execution providers. + /// To ensure all hardware-accelerated models are listed, call first to + /// register execution providers, then access the catalog. /// public async Task GetCatalogAsync(CancellationToken? ct = null) { @@ -135,19 +135,94 @@ await Utils.CallWithExceptionHandling(() => StopWebServiceImplAsync(ct), } /// - /// Download and register execution providers. - /// Only relevant when using WinML. - /// - /// Execution provider download can be time consuming due to the size of the packages. - /// Once downloaded, EPs are not re-downloaded unless a new version is available, so this method will be fast - /// on subsequent calls. + /// Discovers all available execution provider bootstrappers. + /// Returns metadata about each EP including whether it is already registered. + /// + /// Array of EP bootstrapper info describing available EPs. + public EpInfo[] DiscoverEps() + { + return Utils.CallWithExceptionHandling(DiscoverEpsImpl, + "Error discovering execution providers.", _logger); + } + + /// + /// Downloads and registers all available execution providers. + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(null, null, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers the specified execution providers. + /// + /// + /// Subset of EP bootstrapper names to download (as returned by ). + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(IEnumerable names, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(names, null, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers all available execution providers, reporting progress. /// + /// + /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + /// /// Optional cancellation token. - public async Task DownloadAndRegisterEpsAsync(CancellationToken? ct = null) + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(Action progressCallback, + CancellationToken? ct = null) { - await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(ct), - "Error downloading and registering execution providers.", _logger) - .ConfigureAwait(false); + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(null, progressCallback, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers the specified execution providers, reporting progress. + /// + /// + /// Subset of EP bootstrapper names to download (as returned by ). + /// + /// + /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(IEnumerable names, + Action progressCallback, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(names, progressCallback, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); } private FoundryLocalManager(Configuration configuration, ILogger logger) @@ -197,6 +272,24 @@ private async Task InitializeAsync(CancellationToken? ct = null) return; } + private EpInfo[] DiscoverEpsImpl() + { + var result = _coreInterop!.ExecuteCommand("discover_eps"); + if (result.Error != null) + { + throw new FoundryLocalException($"Error discovering execution providers: {result.Error}", _logger); + } + + var data = result.Data; + if (string.IsNullOrWhiteSpace(data)) + { + return Array.Empty(); + } + + return JsonSerializer.Deserialize(data, JsonSerializationContext.Default.EpInfoArray) + ?? Array.Empty(); + } + private async Task GetCatalogImplAsync(CancellationToken? ct = null) { // create on first use @@ -259,17 +352,78 @@ private async Task StopWebServiceImplAsync(CancellationToken? ct = null) Urls = null; } - private async Task DownloadAndRegisterEpsImplAsync(CancellationToken? ct = null) + private async Task DownloadAndRegisterEpsImplAsync(IEnumerable? names = null, + Action? progressCallback = null, + CancellationToken? ct = null) { - using var disposable = await asyncLock.LockAsync().ConfigureAwait(false); CoreInteropRequest? input = null; - var result = await _coreInterop!.ExecuteCommandAsync("download_and_register_eps", input, ct).ConfigureAwait(false); + if (names != null) + { + var namesList = string.Join(",", names); + if (!string.IsNullOrEmpty(namesList)) + { + input = new CoreInteropRequest + { + Params = new Dictionary { { "Names", namesList } } + }; + } + } + + ICoreInterop.Response result; + + if (progressCallback != null) + { + var callback = new ICoreInterop.CallbackFn(progressString => + { + var sepIndex = progressString.IndexOf('|'); + if (sepIndex >= 0) + { + var name = progressString[..sepIndex]; + if (double.TryParse(progressString[(sepIndex + 1)..], + System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, + out var percent)) + { + progressCallback(string.IsNullOrEmpty(name) ? "" : name, percent); + } + } + }); + + result = await _coreInterop!.ExecuteCommandWithCallbackAsync("download_and_register_eps", input, + callback, ct).ConfigureAwait(false); + } + else + { + result = await _coreInterop!.ExecuteCommandAsync("download_and_register_eps", input, ct).ConfigureAwait(false); + } + if (result.Error != null) { - throw new FoundryLocalException($"Error downloading and registering execution providers: {result.Error}", _logger); + throw new FoundryLocalException($"Error downloading execution providers: {result.Error}", _logger); } + + EpDownloadResult epResult; + + if (!string.IsNullOrEmpty(result.Data)) + { + epResult = JsonSerializer.Deserialize(result.Data!, JsonSerializationContext.Default.EpDownloadResult) + ?? throw new FoundryLocalException("Failed to deserialize EP download result.", _logger); + } + else + { + epResult = new EpDownloadResult { Success = true, Status = "Completed", RegisteredEps = [], FailedEps = [] }; + } + + // Invalidate the catalog cache if any EP was newly registered so the next access + // re-fetches models with the updated set of available EPs. + if ((epResult.Success || epResult.RegisteredEps.Length > 0) && _catalog != null) + { + _catalog.InvalidateCache(); + } + + return epResult; } protected virtual void Dispose(bool disposing) diff --git a/sdk/cs/src/ICatalog.cs b/sdk/cs/src/ICatalog.cs index b50f8c401..4dca8e7d9 100644 --- a/sdk/cs/src/ICatalog.cs +++ b/sdk/cs/src/ICatalog.cs @@ -31,7 +31,7 @@ public interface ICatalog /// /// Lookup a model variant by its unique model id. - /// NOTE: This will return an IModel with a single variant. Use GetModelAsync to get an IModel with all avaialable + /// NOTE: This will return an IModel with a single variant. Use GetModelAsync to get an IModel with all available /// variants. /// /// Model id. diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index bec1cc22c..e8a7b7551 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -100,8 +100,8 @@ $(FoundryLocalCoreVersion) - 0.9.0-dev-20260325T055840-33ebe7c - 0.9.0-dev-20260325T055742-33ebe7c + 0.9.0-dev-202603310538-f6efa8d3 + 0.9.0-dev-202603310538-f6efa8d3 True diff --git a/sdk/js/README.md b/sdk/js/README.md index 9b08f9ac5..9e56ec523 100644 --- a/sdk/js/README.md +++ b/sdk/js/README.md @@ -34,6 +34,47 @@ When WinML is enabled: > **Note:** The `--winml` flag is only relevant on Windows. On macOS and Linux, the standard installation is used regardless of this flag. +### Explicit EP Management + +You can explicitly discover and download execution providers using the `discoverEps()` and `downloadAndRegisterEps()` methods: + +```typescript +// Discover available EPs and their status +const eps = manager.discoverEps(); +for (const ep of eps) { + console.log(`${ep.name} — registered: ${ep.isRegistered}`); +} + +// Download and register all available EPs +const result = await manager.downloadAndRegisterEps(); +console.log(`Success: ${result.success}, Status: ${result.status}`); + +// Download only specific EPs +const result2 = await manager.downloadAndRegisterEps([eps[0].name]); +``` + +#### Per-EP download progress + +Pass an optional `progressCallback` to receive `(epName, percent)` updates as each EP downloads (`percent` is 0–100): + +```typescript +let currentEp = ''; +await manager.downloadAndRegisterEps((epName, percent) => { + if (epName !== currentEp) { + if (currentEp !== '') { + process.stdout.write('\n'); + } + currentEp = epName; + } + process.stdout.write(`\r ${epName} ${percent.toFixed(1)}%`); + if (percent >= 100) { + process.stdout.write('\n'); + } +}); +``` + +Catalog access does not block on EP downloads. Call `downloadAndRegisterEps()` when you need hardware-accelerated execution providers. + ## Quick Start ```typescript diff --git a/sdk/js/docs/README.md b/sdk/js/docs/README.md index 5e50e636f..0cb39e1bb 100644 --- a/sdk/js/docs/README.md +++ b/sdk/js/docs/README.md @@ -153,6 +153,70 @@ object: string; *** +### EpDownloadResult + +Result of an explicit EP download and registration operation. + +#### Properties + +##### failedEps + +```ts +failedEps: string[]; +``` + +Names of EPs that failed to register. + +##### registeredEps + +```ts +registeredEps: string[]; +``` + +Names of EPs that were successfully registered. + +##### status + +```ts +status: string; +``` + +Human-readable status message. + +##### success + +```ts +success: boolean; +``` + +True if all requested EPs were successfully downloaded and registered. + +*** + +### EpInfo + +Describes a discoverable execution provider bootstrapper. + +#### Properties + +##### isRegistered + +```ts +isRegistered: boolean; +``` + +True if this EP has already been successfully downloaded and registered. + +##### name + +```ts +name: string; +``` + +The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + +*** + ### FoundryLocalConfig Configuration options for the Foundry Local SDK. diff --git a/sdk/js/docs/classes/FoundryLocalManager.md b/sdk/js/docs/classes/FoundryLocalManager.md index dc4908a60..6ca963f76 100644 --- a/sdk/js/docs/classes/FoundryLocalManager.md +++ b/sdk/js/docs/classes/FoundryLocalManager.md @@ -87,26 +87,98 @@ Error - If the web service is not running. *** +### discoverEps() + +```ts +discoverEps(): EpInfo[]; +``` + +Discovers available execution providers (EPs) and their registration status. + +#### Returns + +[`EpInfo`](../README.md#epinfo)[] + +An array of EpInfo describing each available EP. + +*** + ### downloadAndRegisterEps() +#### Call Signature + ```ts -downloadAndRegisterEps(): void; +downloadAndRegisterEps(): Promise; ``` -Download and register execution providers. -Only relevant when using the WinML variant. On non-WinML builds this is a no-op. +Downloads and registers execution providers. -Call this after initialization to trigger EP download before accessing the catalog, -so that hardware-accelerated execution providers (e.g. QNN for NPU) are available -when listing and loading models. +##### Returns -#### Returns +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> -`void` +A promise that resolves with an EpDownloadResult describing the outcome. -#### Throws +#### Call Signature + +```ts +downloadAndRegisterEps(names): Promise; +``` + +Downloads and registers execution providers. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `names` | `string`[] | Array of EP names to download. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +#### Call Signature + +```ts +downloadAndRegisterEps(progressCallback): Promise; +``` + +Downloads and registers execution providers, reporting progress. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `progressCallback` | (`epName`, `percent`) => `void` | Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +#### Call Signature + +```ts +downloadAndRegisterEps(names, progressCallback): Promise; +``` + +Downloads and registers execution providers, reporting progress. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `names` | `string`[] | Array of EP names to download. | +| `progressCallback` | (`epName`, `percent`) => `void` | Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> -Error - If execution provider download or registration fails. +A promise that resolves with an EpDownloadResult describing the outcome. *** diff --git a/sdk/js/examples/chat-completion.ts b/sdk/js/examples/chat-completion.ts index a9e2d59a4..f18b989cd 100644 --- a/sdk/js/examples/chat-completion.ts +++ b/sdk/js/examples/chat-completion.ts @@ -18,6 +18,17 @@ async function main() { }); console.log('✓ SDK initialized successfully'); + const availableEps = manager.discoverEps(); + console.log(`\nAvailable execution providers: ${availableEps.map((ep) => ep.name).join(', ')}`); + + console.log('\nDownloading and registering execution providers...'); + const downloadResult = await manager.downloadAndRegisterEps(); + if (downloadResult.success) { + console.log('✓ All execution providers registered successfully'); + } else { + console.log(`⚠️ Some execution providers failed to download and/or register: ${downloadResult.failedEps.join(', ')}`); + } + // Explore available models console.log('\nFetching available models...'); const catalog = manager.catalog; @@ -37,7 +48,7 @@ async function main() { console.log(` - ${cachedModel.alias}`); } - const modelAlias = 'MODEL_ALIAS'; // Replace with a valid model alias from the list above + const modelAlias = 'qwen2.5-0.5b'; // Load the model first console.log(`\nLoading model ${modelAlias}...`); diff --git a/sdk/js/src/catalog.ts b/sdk/js/src/catalog.ts index bf2ae5c99..2efba66ab 100644 --- a/sdk/js/src/catalog.ts +++ b/sdk/js/src/catalog.ts @@ -31,6 +31,11 @@ export class Catalog { return this._name; } + /** @internal */ + invalidateCache(): void { + this.lastFetch = 0; + } + private async updateModels(): Promise { // TODO: make this configurable if ((Date.now() - this.lastFetch) < 6 * 60 * 60 * 1000) { // 6 hours diff --git a/sdk/js/src/detail/coreInterop.ts b/sdk/js/src/detail/coreInterop.ts index 3116faa97..9b723e847 100644 --- a/sdk/js/src/detail/coreInterop.ts +++ b/sdk/js/src/detail/coreInterop.ts @@ -188,7 +188,7 @@ export class CoreInterop { } } - public executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise { + public executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise { const cmdBuf = koffi.alloc('char', command.length + 1); koffi.encode(cmdBuf, 'char', command, command.length + 1); @@ -202,7 +202,7 @@ export class CoreInterop { callback(chunk); }, koffi.pointer(CallbackType)); - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const req = { Command: koffi.address(cmdBuf), CommandLength: command.length, @@ -226,7 +226,8 @@ export class CoreInterop { const errorMsg = koffi.decode(res.Error, 'char', res.ErrorLength); reject(new Error(`Command '${command}' failed: ${errorMsg}`)); } else { - resolve(); + const responseData = res.Data ? koffi.decode(res.Data, 'char', res.DataLength) : ''; + resolve(responseData); } } finally { // Free the heap-allocated response strings using koffi.free() diff --git a/sdk/js/src/foundryLocalManager.ts b/sdk/js/src/foundryLocalManager.ts index 6da0bcc79..f22acdc0d 100644 --- a/sdk/js/src/foundryLocalManager.ts +++ b/sdk/js/src/foundryLocalManager.ts @@ -3,6 +3,7 @@ import { CoreInterop } from './detail/coreInterop.js'; import { ModelLoadManager } from './detail/modelLoadManager.js'; import { Catalog } from './catalog.js'; import { ResponsesClient } from './openai/responsesClient.js'; +import { EpInfo, EpDownloadResult } from './types.js'; /** * The main entry point for the Foundry Local SDK. @@ -61,23 +62,6 @@ export class FoundryLocalManager { return this._urls; } - /** - * Download and register execution providers. - * Only relevant when using the WinML variant. On non-WinML builds this is a no-op. - * - * Call this after initialization to trigger EP download before accessing the catalog, - * so that hardware-accelerated execution providers (e.g. QNN for NPU) are available - * when listing and loading models. - * - * @throws Error - If execution provider download or registration fails. - */ - public downloadAndRegisterEps(): void { - try { - this.coreInterop.executeCommand("download_and_register_eps"); - } catch (error) { - throw new Error(`Error downloading and registering execution providers: ${error}`); - } - } /** * Starts the local web service. @@ -112,6 +96,122 @@ export class FoundryLocalManager { return this._urls.length > 0; } + /** + * Discovers available execution providers (EPs) and their registration status. + * @returns An array of EpInfo describing each available EP. + */ + public discoverEps(): EpInfo[] { + const response = this.coreInterop.executeCommand("discover_eps"); + type RawEpInfo = { + Name: string; + IsRegistered: boolean; + }; + + try { + const raw = JSON.parse(response) as RawEpInfo[]; + return raw.map((ep) => ({ + name: ep.Name, + isRegistered: ep.IsRegistered + })); + } catch (error) { + throw new Error(`Failed to decode JSON response from discover_eps: ${error}. Response was: ${response}`); + } + } + + /** + * Downloads and registers execution providers. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(): Promise; + /** + * Downloads and registers execution providers. + * @param names - Array of EP names to download. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(names: string[]): Promise; + /** + * Downloads and registers execution providers, reporting progress. + * @param progressCallback - Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(progressCallback: (epName: string, percent: number) => void): Promise; + /** + * Downloads and registers execution providers, reporting progress. + * @param names - Array of EP names to download. + * @param progressCallback - Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(names: string[], progressCallback: (epName: string, percent: number) => void): Promise; + public async downloadAndRegisterEps( + namesOrCallback?: string[] | ((epName: string, percent: number) => void), + progressCallback?: (epName: string, percent: number) => void + ): Promise { + let names: string[] | undefined; + if (typeof namesOrCallback === 'function') { + progressCallback = namesOrCallback; + } else { + names = namesOrCallback; + } + + const params: { Params?: { Names: string } } = {}; + if (names && names.length > 0) { + params.Params = { Names: names.join(",") }; + } + + type RawEpDownloadResult = { + Success: boolean; + Status: string; + RegisteredEps: string[]; + FailedEps: string[]; + }; + + let response: string; + + if (progressCallback) { + response = await this.coreInterop.executeCommandStreaming( + "download_and_register_eps", + Object.keys(params).length > 0 ? params : undefined, + (chunk: string) => { + const sepIndex = chunk.indexOf('|'); + if (sepIndex >= 0) { + const epName = chunk.substring(0, sepIndex); + const percent = parseFloat(chunk.substring(sepIndex + 1)); + if (!isNaN(percent)) { + progressCallback(epName || '', percent); + } + } + } + ); + } else { + response = await this.coreInterop.executeCommandStreaming( + "download_and_register_eps", + Object.keys(params).length > 0 ? params : undefined, + () => {} // no-op callback + ); + } + + let epResult: EpDownloadResult; + try { + const raw = JSON.parse(response) as RawEpDownloadResult; + epResult = { + success: raw.Success, + status: raw.Status, + registeredEps: raw.RegisteredEps, + failedEps: raw.FailedEps + }; + } catch (error) { + throw new Error(`Failed to decode JSON response from download_and_register_eps: ${error}. Response was: ${response}`); + } + + // Invalidate the catalog cache if any EP was newly registered so the next access + // re-fetches models with the updated set of available EPs. + if (epResult.success || epResult.registeredEps.length > 0) { + this._catalog.invalidateCache(); + } + + return epResult; + } + /** * Creates a ResponsesClient for interacting with the Responses API. * The web service must be started first via `startWebService()`. diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index 40a9110bc..521ae34b4 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -67,6 +67,30 @@ export interface ToolChoice { name?: string; } +// ============================================================================ +// Execution Provider Types +// ============================================================================ + +/** Describes a discoverable execution provider bootstrapper. */ +export interface EpInfo { + /** The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). */ + name: string; + /** True if this EP has already been successfully downloaded and registered. */ + isRegistered: boolean; +} + +/** Result of an explicit EP download and registration operation. */ +export interface EpDownloadResult { + /** True if all requested EPs were successfully downloaded and registered. */ + success: boolean; + /** Human-readable status message. */ + status: string; + /** Names of EPs that were successfully registered. */ + registeredEps: string[]; + /** Names of EPs that failed to register. */ + failedEps: string[]; +} + // ============================================================================ // Responses API Types // Aligned with OpenAI Responses API / OpenResponses spec and diff --git a/sdk/js/test/foundryLocalManager.test.ts b/sdk/js/test/foundryLocalManager.test.ts index 5ab400439..48adcff40 100644 --- a/sdk/js/test/foundryLocalManager.test.ts +++ b/sdk/js/test/foundryLocalManager.test.ts @@ -16,4 +16,66 @@ describe('Foundry Local Manager Tests', () => { // We don't assert the exact name as it might change, but we ensure it exists expect(catalog.name).to.be.a('string'); }); + + it('downloadAndRegisterEps should call command without params when names are omitted', async function() { + const manager = getTestManager() as any; + const calls: unknown[][] = []; + const originalExecuteCommandStreaming = manager.coreInterop.executeCommandStreaming; + + manager.coreInterop.executeCommandStreaming = (...args: unknown[]) => { + calls.push(args); + return Promise.resolve(JSON.stringify({ + Success: true, + Status: 'All providers registered', + RegisteredEps: ['CUDAExecutionProvider'], + FailedEps: [] + })); + }; + + try { + const result = await manager.downloadAndRegisterEps(); + expect(calls.length).to.equal(1); + expect(calls[0][0]).to.equal('download_and_register_eps'); + expect(calls[0][1]).to.be.undefined; + expect(result).to.deep.equal({ + success: true, + status: 'All providers registered', + registeredEps: ['CUDAExecutionProvider'], + failedEps: [] + }); + } finally { + manager.coreInterop.executeCommandStreaming = originalExecuteCommandStreaming; + } + }); + + it('downloadAndRegisterEps should send Names param when subset is provided', async function() { + const manager = getTestManager() as any; + const calls: unknown[][] = []; + const originalExecuteCommandStreaming = manager.coreInterop.executeCommandStreaming; + + manager.coreInterop.executeCommandStreaming = (...args: unknown[]) => { + calls.push(args); + return Promise.resolve(JSON.stringify({ + Success: false, + Status: 'Some providers failed', + RegisteredEps: ['CUDAExecutionProvider'], + FailedEps: ['OpenVINOExecutionProvider'] + })); + }; + + try { + const result = await manager.downloadAndRegisterEps(['CUDAExecutionProvider', 'OpenVINOExecutionProvider']); + expect(calls.length).to.equal(1); + expect(calls[0][0]).to.equal('download_and_register_eps'); + expect(calls[0][1]).to.deep.equal({ Params: { Names: 'CUDAExecutionProvider,OpenVINOExecutionProvider' } }); + expect(result).to.deep.equal({ + success: false, + status: 'Some providers failed', + registeredEps: ['CUDAExecutionProvider'], + failedEps: ['OpenVINOExecutionProvider'] + }); + } finally { + manager.coreInterop.executeCommandStreaming = originalExecuteCommandStreaming; + } + }); }); diff --git a/sdk/python/README.md b/sdk/python/README.md index ace19bac5..4c1fb84a1 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -18,7 +18,7 @@ Two package variants are published — choose the one that matches your target h | Variant | Package | Native backends | |---|---|---| -| Standard (cross-platform) | `foundry-local-sdk` | CPU / DirectML / CUDA | +| Standard (cross-platform) | `foundry-local-sdk` | CPU / WebGPU / CUDA | | WinML (Windows only) | `foundry-local-sdk-winml` | Windows ML + all standard backends | ```bash @@ -70,6 +70,46 @@ foundry-local-install --winml --verbose > **Note:** The standard and WinML native packages use different PyPI package names (`foundry-local-core` vs `foundry-local-core-winml`) so they can coexist in the same pip index, but they should not be installed in the same Python environment simultaneously. +## Explicit EP Management + +You can explicitly discover and download execution providers (EPs): + +```python +# Discover available EPs and registration status +eps = manager.discover_eps() +for ep in eps: + print(f"{ep.name} - registered: {ep.is_registered}") + +# Download and register all available EPs +result = manager.download_and_register_eps() +print(f"Success: {result.success}, Status: {result.status}") + +# Download only specific EPs +result2 = manager.download_and_register_eps([eps[0].name]) +``` + +### Per-EP download progress + +Pass a `progress_callback` to receive `(ep_name, percent)` updates as each EP downloads (`percent` is 0–100): + +```python +current_ep = "" + +def on_progress(ep_name: str, percent: float) -> None: + global current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name} {percent:5.1f}%", end="", flush=True) + if percent >= 100: + print() + +manager.download_and_register_eps(progress_callback=on_progress) +``` + +Catalog access does not block on EP downloads. Call `download_and_register_eps()` when you need hardware-accelerated execution providers. + ## Quick Start ```python @@ -225,6 +265,8 @@ manager.stop_web_service() |---|---| | `Configuration` | SDK configuration (app name, cache dir, log level, web service settings) | | `FoundryLocalManager` | Singleton entry point – initialization, catalog access, web service | +| `EpInfo` | Discoverable execution provider info (`name`, `is_registered`) | +| `EpDownloadResult` | Result of EP download/registration (`success`, `status`, `registered_eps`, `failed_eps`) | | `Catalog` | Model discovery – listing, lookup by alias/ID, cached/loaded queries | | `Model` | Groups variants under one alias – select, load, unload, create clients | | `ModelVariant` | Specific model variant – download, cache, load/unload, create clients | diff --git a/sdk/python/examples/chat_completion.py b/sdk/python/examples/chat_completion.py index 60eefd5ee..c0c580480 100644 --- a/sdk/python/examples/chat_completion.py +++ b/sdk/python/examples/chat_completion.py @@ -19,6 +19,15 @@ def main(): FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance + # Discover available EPs and register them explicitly when needed. + eps = manager.discover_eps() + print("Available execution providers:") + for ep in eps: + print(f" - {ep.name} (registered: {ep.is_registered})") + + ep_result = manager.download_and_register_eps() + print(f"EP registration success: {ep_result.success} ({ep_result.status})") + # 2. Print available models in the catalog and cache models = manager.catalog.list_models() print("Available models in catalog:") diff --git a/sdk/python/requirements-winml.txt b/sdk/python/requirements-winml.txt index 0fb9f9c2d..9a3990b70 100644 --- a/sdk/python/requirements-winml.txt +++ b/sdk/python/requirements-winml.txt @@ -2,6 +2,6 @@ pydantic>=2.0.0 requests>=2.32.4 openai>=2.24.0 # WinML native binary packages from the ORT-Nightly PyPI feed. -foundry-local-core-winml +foundry-local-core-winml==0.9.0.dev20260331004032 onnxruntime-core==1.24.3 onnxruntime-genai-core==0.12.1 \ No newline at end of file diff --git a/sdk/python/src/catalog.py b/sdk/python/src/catalog.py index 767a9f087..afccd85b3 100644 --- a/sdk/python/src/catalog.py +++ b/sdk/python/src/catalog.py @@ -80,8 +80,12 @@ def _update_models(self): self._model_id_to_model_variant[variant.id] = variant - self._last_fetch = datetime.datetime.now() self._models = models + self._last_fetch = datetime.datetime.now() + + def _invalidate_cache(self): + with self._lock: + self._last_fetch = datetime.datetime.min def list_models(self) -> List[Model]: """ diff --git a/sdk/python/src/ep_types.py b/sdk/python/src/ep_types.py new file mode 100644 index 000000000..42d84acfb --- /dev/null +++ b/sdk/python/src/ep_types.py @@ -0,0 +1,24 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from typing import List + +from pydantic import BaseModel, Field + + +class EpInfo(BaseModel): + """Metadata describing a discoverable execution provider (EP).""" + + name: str = Field(alias="Name") + is_registered: bool = Field(alias="IsRegistered") + + +class EpDownloadResult(BaseModel): + """Result of an explicit EP download and registration operation.""" + + success: bool = Field(alias="Success") + status: str = Field(alias="Status") + registered_eps: List[str] = Field(alias="RegisteredEps") + failed_eps: List[str] = Field(alias="FailedEps") diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index 4c02a127b..a649f8e56 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -9,10 +9,15 @@ import logging import threading +from typing import Callable, List, Optional + +from pydantic import TypeAdapter + from .catalog import Catalog from .configuration import Configuration +from .ep_types import EpDownloadResult, EpInfo from .logging_helper import set_default_logger_severity -from .detail.core_interop import CoreInterop +from .detail.core_interop import CoreInterop, InteropRequest from .detail.model_load_manager import ModelLoadManager from .exception import FoundryLocalException @@ -71,17 +76,90 @@ def _initialize(self): self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url) self.catalog = Catalog(self._model_load_manager, self._core_interop) - def download_and_register_eps(self) -> None: - """Download and register execution providers. - Only relevant when using WinML. + def discover_eps(self) -> list[EpInfo]: + """Discover available execution providers and their registration status. + + Returns: + List of ``EpInfo`` entries for all discoverable EPs. Raises: - FoundryLocalException: If execution provider download or registration fails. + FoundryLocalException: If EP discovery fails or response JSON is invalid. """ - result = self._core_interop.execute_command("download_and_register_eps") + response = self._core_interop.execute_command("discover_eps") + if response.error is not None: + raise FoundryLocalException(f"Error discovering execution providers: {response.error}") + + try: + adapter = TypeAdapter(List[EpInfo]) + return adapter.validate_json(response.data or "[]") + except Exception as e: + raise FoundryLocalException( + f"Failed to decode JSON response from discover_eps: {e}. Response was: {response.data}" + ) from e + + def download_and_register_eps( + self, + names: Optional[list[str]] = None, + progress_callback: Optional[Callable[[str, float], None]] = None, + ) -> EpDownloadResult: + """Download and register execution providers. - if result.error is not None: - raise FoundryLocalException(f"Error downloading and registering execution providers: {result.error}") + Args: + names: Optional subset of EP names to download. If omitted or empty, + all discoverable EPs are downloaded. + progress_callback: Optional callback ``(ep_name: str, percent: float) -> None`` + invoked as each EP downloads. ``percent`` is 0-100. + + Returns: + ``EpDownloadResult`` describing operation status and per-EP outcomes. + + Raises: + FoundryLocalException: If the operation fails or response JSON is invalid. + """ + request = None + if names is not None and len(names) > 0: + request = InteropRequest(params={"Names": ",".join(names)}) + + if progress_callback is not None: + def _on_chunk(chunk: str) -> None: + sep = chunk.find("|") + if sep >= 0: + ep_name = chunk[:sep] or "" + try: + percent = float(chunk[sep + 1:]) + progress_callback(ep_name, percent) + except ValueError: + pass + + response = self._core_interop.execute_command_with_callback( + "download_and_register_eps", request, _on_chunk + ) + else: + response = self._core_interop.execute_command("download_and_register_eps", request) + + if response.error is not None: + raise FoundryLocalException(f"Error downloading execution providers: {response.error}") + + if response.data: + try: + adapter = TypeAdapter(EpDownloadResult) + ep_result = adapter.validate_json(response.data) + except Exception as e: + raise FoundryLocalException( + "Failed to decode JSON response from download_and_register_eps: " + f"{e}. Response was: {response.data}" + ) from e + else: + ep_result = EpDownloadResult( + Success=True, Status="Completed", RegisteredEps=[], FailedEps=[] + ) + + # Invalidate the catalog cache if any EP was newly registered so the next access + # re-fetches models with the updated set of available EPs. + if ep_result.success or len(ep_result.registered_eps) > 0: + self.catalog._invalidate_cache() + + return ep_result def start_web_service(self): """Start the optional web service. diff --git a/sdk/python/test/test_foundry_local_manager.py b/sdk/python/test/test_foundry_local_manager.py index b0a9c4e2a..315288912 100644 --- a/sdk/python/test/test_foundry_local_manager.py +++ b/sdk/python/test/test_foundry_local_manager.py @@ -7,6 +7,22 @@ from __future__ import annotations +class _Response: + def __init__(self, data=None, error=None): + self.data = data + self.error = error + + +class _FakeCoreInterop: + def __init__(self, responses): + self._responses = responses + self.calls = [] + + def execute_command(self, command_name, command_input=None): + self.calls.append((command_name, command_input)) + return self._responses[command_name] + + class TestFoundryLocalManager: """Foundry Local Manager Tests.""" @@ -20,3 +36,48 @@ def test_should_return_catalog(self, manager): assert catalog is not None assert isinstance(catalog.name, str) assert len(catalog.name) > 0 + + def test_discover_eps_returns_ep_info(self, manager): + original_core = manager._core_interop + manager._core_interop = _FakeCoreInterop( + { + "discover_eps": _Response( + data='[{"Name":"CUDAExecutionProvider","IsRegistered":true}]', + error=None, + ) + } + ) + + try: + eps = manager.discover_eps() + finally: + manager._core_interop = original_core + + assert isinstance(eps, list) + assert len(eps) == 1 + assert eps[0].name == "CUDAExecutionProvider" + assert eps[0].is_registered is True + + def test_download_and_register_eps_returns_result(self, manager): + original_core = manager._core_interop + manager._core_interop = _FakeCoreInterop( + { + "download_and_register_eps": _Response( + data=( + '{"Success":true,"Status":"ok",' + '"RegisteredEps":["CUDAExecutionProvider"],"FailedEps":[]}' + ), + error=None, + ) + } + ) + + try: + result = manager.download_and_register_eps(["CUDAExecutionProvider"]) + finally: + manager._core_interop = original_core + + assert result.success is True + assert result.status == "ok" + assert result.registered_eps == ["CUDAExecutionProvider"] + assert result.failed_eps == [] diff --git a/sdk/rust/README.md b/sdk/rust/README.md index d76a75898..aa848b035 100644 --- a/sdk/rust/README.md +++ b/sdk/rust/README.md @@ -60,6 +60,56 @@ foundry-local-sdk = { version = "0.1", features = ["winml"] } > **Note:** The `winml` feature is only relevant on Windows. On macOS and Linux, the standard build is used regardless. No code changes are needed — your application code stays the same. +### Explicit EP Management + +You can explicitly discover and download execution providers: + +```rust +use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; + +let manager = FoundryLocalManager::create(FoundryLocalConfig::new("my_app"))?; + +// Discover available EPs and their status +let eps = manager.discover_eps()?; +for ep in &eps { + println!("{} — registered: {}", ep.name, ep.is_registered); +} + +// Download and register all available EPs +let result = manager.download_and_register_eps(None).await?; +println!("Success: {}, Status: {}", result.success, result.status); + +// Download only specific EPs +let result = manager.download_and_register_eps(Some(&[eps[0].name.as_str()])).await?; +``` + +#### Per-EP download progress + +Use `download_and_register_eps_with_progress` to receive typed `(ep_name, percent)` updates +as each EP downloads (`percent` is 0.0–100.0): + +```rust +use std::sync::{Arc, Mutex}; + +let current_ep = Arc::new(Mutex::new(String::new())); +let ep = Arc::clone(¤t_ep); +manager.download_and_register_eps_with_progress(None, move |ep_name: &str, percent: f64| { + let mut current = ep.lock().unwrap(); + if ep_name != current.as_str() { + if !current.is_empty() { + println!(); + } + *current = ep_name.to_string(); + } + print!("\r {} {:5.1}%", ep_name, percent); + if percent >= 100.0 { + println!(); + } +}).await?; +``` + +Catalog access does not block on EP downloads. Call `download_and_register_eps` when you need hardware-accelerated execution providers. + ## Quick Start ```rust diff --git a/sdk/rust/src/catalog.rs b/sdk/rust/src/catalog.rs index 9e04c9433..d9d5bb51a 100644 --- a/sdk/rust/src/catalog.rs +++ b/sdk/rust/src/catalog.rs @@ -87,6 +87,11 @@ impl Catalog { &self.name } + /// Invalidate the catalog cache so the next access re-fetches models. + pub(crate) fn invalidate_cache(&self) { + self.invalidator.invalidate(); + } + /// Refresh the catalog from the native core if the cache has expired or /// has been explicitly invalidated (e.g. after a download or removal). pub async fn update_models(&self) -> Result<()> { diff --git a/sdk/rust/src/foundry_local_manager.rs b/sdk/rust/src/foundry_local_manager.rs index 9cf2477f7..0c22ef154 100644 --- a/sdk/rust/src/foundry_local_manager.rs +++ b/sdk/rust/src/foundry_local_manager.rs @@ -13,6 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger}; use crate::detail::core_interop::CoreInterop; use crate::detail::ModelLoadManager; use crate::error::{FoundryLocalError, Result}; +use crate::types::{EpDownloadResult, EpInfo}; /// Global singleton holder — only stores a successfully initialised manager. static INSTANCE: OnceLock = OnceLock::new(); @@ -134,17 +135,92 @@ impl FoundryLocalManager { Ok(()) } + /// Discover available execution providers and their registration status. + pub fn discover_eps(&self) -> Result> { + let raw = self.core.execute_command("discover_eps", None)?; + let eps: Vec = serde_json::from_str(&raw)?; + Ok(eps) + } + /// Download and register execution providers. /// - /// Only relevant when using the WinML variant. On non-WinML builds this - /// is a no-op. Call this after initialisation to trigger EP download - /// before accessing the catalog, so that hardware-accelerated execution - /// providers (e.g. QNN for NPU) are available when listing and loading - /// models. - pub async fn download_and_register_eps(&self) -> Result<()> { - self.core - .execute_command_async("download_and_register_eps".into(), None) - .await?; - Ok(()) + /// If `names` is `None` or empty, all available EPs are downloaded. + /// Otherwise only the named EPs are downloaded and registered. + pub async fn download_and_register_eps( + &self, + names: Option<&[&str]>, + ) -> Result { + self.download_and_register_eps_impl(names, None::) + .await + } + + /// Download and register execution providers, reporting per-EP progress. + /// + /// If `names` is `None` or empty, all available EPs are downloaded. + /// Otherwise only the named EPs are downloaded and registered. + /// + /// `progress_callback` receives `(ep_name, percent)` where `percent` + /// ranges from 0.0 to 100.0 as each EP downloads. + pub async fn download_and_register_eps_with_progress( + &self, + names: Option<&[&str]>, + progress_callback: F, + ) -> Result + where + F: FnMut(&str, f64) + Send + 'static, + { + self.download_and_register_eps_impl(names, Some(progress_callback)) + .await + } + + async fn download_and_register_eps_impl( + &self, + names: Option<&[&str]>, + progress_callback: Option, + ) -> Result + where + F: FnMut(&str, f64) + Send + 'static, + { + let params = match names { + Some(n) if !n.is_empty() => Some(json!({ "Params": { "Names": n.join(",") } })), + _ => None, + }; + + let raw = match progress_callback { + Some(cb) => { + let mut callback = cb; + let wrapper = move |chunk: &str| { + if let Some(sep) = chunk.find('|') { + let name = &chunk[..sep]; + if let Ok(percent) = chunk[sep + 1..].parse::() { + callback(if name.is_empty() { "" } else { name }, percent); + } + } + }; + + self.core + .execute_command_streaming_async( + "download_and_register_eps".into(), + params, + wrapper, + ) + .await? + } + None => { + self.core + .execute_command_async("download_and_register_eps".into(), params) + .await? + } + }; + + let result: EpDownloadResult = serde_json::from_str(&raw)?; + + // Invalidate the catalog cache if any EP was newly registered so the next + // access re-fetches models with the updated set of available EPs. + if result.success || !result.registered_eps.is_empty() { + self.catalog.invalidate_cache(); + } + + Ok(result) } } diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs index c6d6e6c43..c12feef15 100644 --- a/sdk/rust/src/lib.rs +++ b/sdk/rust/src/lib.rs @@ -20,8 +20,8 @@ pub use self::foundry_local_manager::FoundryLocalManager; pub use self::model::Model; pub use self::model_variant::ModelVariant; pub use self::types::{ - ChatResponseFormat, ChatToolChoice, DeviceType, ModelInfo, ModelSettings, Parameter, - PromptTemplate, Runtime, + ChatResponseFormat, ChatToolChoice, DeviceType, EpDownloadResult, EpInfo, ModelInfo, + ModelSettings, Parameter, PromptTemplate, Runtime, }; // Re-export OpenAI request types so callers can construct typed messages. diff --git a/sdk/rust/src/types.rs b/sdk/rust/src/types.rs index bab2f9c83..28b37ed24 100644 --- a/sdk/rust/src/types.rs +++ b/sdk/rust/src/types.rs @@ -125,3 +125,27 @@ pub enum ChatToolChoice { /// Model must call the named function. Function(String), } + +/// Information about an available execution provider bootstrapper. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct EpInfo { + /// The name of the execution provider. + pub name: String, + /// Whether this EP is currently registered and ready for use. + pub is_registered: bool, +} + +/// Result of a download-and-register execution-provider operation. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct EpDownloadResult { + /// Whether all requested EPs were successfully registered. + pub success: bool, + /// Human-readable status message. + pub status: String, + /// Names of EPs that were successfully registered. + pub registered_eps: Vec, + /// Names of EPs that failed to register. + pub failed_eps: Vec, +} diff --git a/www/src/routes/models/service.ts b/www/src/routes/models/service.ts index de49a539b..75e2901c1 100644 --- a/www/src/routes/models/service.ts +++ b/www/src/routes/models/service.ts @@ -188,7 +188,6 @@ export class FoundryModelService { device: 'GPU', executionProviders: [ 'CUDAExecutionProvider', // NVIDIA CUDA - 'DmlExecutionProvider', // DirectML (Windows) 'TensorrtExecutionProvider', // NVIDIA TensorRT 'NvTensorRTRTXExecutionProvider', // NVIDIA TensorRT RTX (TRTRTX) 'WebGpuExecutionProvider', // WebGPU