|
16 | 16 | - Support all OpenAI API endpoints including completions, chat, embeddings, images, assistants and more. |
17 | 17 | - Regularly tested for compatibility with popular custom providers like OpenRouter/DeepSeek/Ollama/LM Studio and many others |
18 | 18 | - Microsoft.Extensions.AI `IChatClient` and `IEmbeddingGenerator` support for OpenAI and all CustomProviders |
| 19 | +- Alias-aware routed `IChatClient` builder with provider fallback and manual 429 cooldown tracking |
| 20 | +- `FreeLLM` package for free-first chat routing across OpenAI-compatible providers and Gemini with OpenAI-compatible and MEAI surfaces |
19 | 21 |
|
20 | 22 | ## Documentation |
21 | 23 | Examples and documentation can be found here: https://tryagi.github.io/OpenAI/ |
@@ -228,6 +230,111 @@ var embeddings = await generator.GenerateAsync( |
228 | 230 | new EmbeddingGenerationOptions { ModelId = "text-embedding-3-small" }); |
229 | 231 | ``` |
230 | 232 |
|
| 233 | +### Routed chat client |
| 234 | + |
| 235 | +For OpenAI-compatible providers, you can build one routed `IChatClient` with aliases like `smart`, `smart-any`, `fast`, and `cheap`. |
| 236 | + |
| 237 | +```csharp |
| 238 | +using Microsoft.Extensions.AI; |
| 239 | +using tryAGI.OpenAI; |
| 240 | + |
| 241 | +using var routed = new OpenAiRoutedChatClientBuilder() |
| 242 | + .AddProvider("cerebras", CustomProviders.Cerebras("CEREBRAS_API_KEY"), provider => provider |
| 243 | + .AddModel("qwen-3-235b-a22b-thinking-2507", model => model |
| 244 | + .AsSmart(priority: 100) |
| 245 | + .AsSmartAny(priority: 100) |
| 246 | + .SupportsToolCalls() |
| 247 | + .SupportsStructuredOutputs() |
| 248 | + .IsRecurringFree())) |
| 249 | + .AddProvider("groq", CustomProviders.Groq("GROQ_API_KEY"), provider => provider |
| 250 | + .AddModel("llama-3.3-70b-versatile", model => model |
| 251 | + .AsFast(priority: 100) |
| 252 | + .AsCheap(priority: 70) |
| 253 | + .AsSmartAny(priority: 60) |
| 254 | + .SupportsToolCalls())) |
| 255 | + .AddProvider("openrouter", CustomProviders.OpenRouter("OPENROUTER_API_KEY"), provider => provider |
| 256 | + .WithRateLimitCooldown(TimeSpan.FromMinutes(2)) |
| 257 | + .AddModel("openrouter/free", model => model |
| 258 | + .AsCheap(priority: 100) |
| 259 | + .AsSmartAny(priority: 40))) |
| 260 | + .Build(); |
| 261 | + |
| 262 | +IChatClient chatClient = routed; |
| 263 | + |
| 264 | +var response = await chatClient.GetResponseAsync( |
| 265 | + "Explain the tradeoffs of vector search vs keyword search.", |
| 266 | + new ChatOptions { ModelId = OpenAiModelAliases.Smart }); |
| 267 | + |
| 268 | +Console.WriteLine(response.Messages[0].Text); |
| 269 | +``` |
| 270 | + |
| 271 | +Notes: |
| 272 | +- This router is for providers exposed through `CustomProviders`, i.e. OpenAI-compatible endpoints. |
| 273 | +- Provider cooldowns are tracked automatically from `429` responses and common rate-limit headers. |
| 274 | +- If `smart` is exhausted, the router also considers models tagged with `smart-any`. |
| 275 | + |
| 276 | +### FreeLLM |
| 277 | + |
| 278 | +`FreeLLM` is a separate package in this repo. It depends on `tryAGI.OpenAI` and `Google_Gemini`, and gives you one routed chat client across OpenAI-compatible providers and Gemini. |
| 279 | + |
| 280 | +```xml |
| 281 | +<PackageReference Include="FreeLLM" Version="x.y.z" /> |
| 282 | +``` |
| 283 | + |
| 284 | +```csharp |
| 285 | +using Microsoft.Extensions.AI; |
| 286 | +using FreeLLM; |
| 287 | +using tryAGI.OpenAI; |
| 288 | + |
| 289 | +using var client = new FreeLlmClientBuilder() |
| 290 | + // Curated defaults are applied automatically for popular providers. |
| 291 | + .AddCerebras("CEREBRAS_API_KEY") |
| 292 | + .AddGemini("GEMINI_API_KEY", provider => provider |
| 293 | + .WithPriority(320) |
| 294 | + .AddModel("gemini-2.5-flash", model => model |
| 295 | + .AsSmart(priority: 190) |
| 296 | + .AsSmartAny(priority: 190) |
| 297 | + .AsFast(priority: 140)) |
| 298 | + .AddModel("gemini-2.5-flash-lite", model => model |
| 299 | + .AsCheap(priority: 220))) |
| 300 | + .AddOpenRouter("OPENROUTER_API_KEY", provider => provider |
| 301 | + .WithPriority(90) |
| 302 | + .RemoveModel("openrouter/free") |
| 303 | + .AddModel("openrouter/free", model => model |
| 304 | + .AsCheap(priority: 250))) |
| 305 | + .Build(); |
| 306 | + |
| 307 | +// OpenAI-compatible chat completions API |
| 308 | +var raw = await client.Chat.CreateChatCompletionAsync(new CreateChatCompletionRequest |
| 309 | +{ |
| 310 | + Value2 = new CreateChatCompletionRequestVariant2 |
| 311 | + { |
| 312 | + Model = FreeLlmModelAliases.Smart, |
| 313 | + Messages = ["Explain vector search vs keyword search."], |
| 314 | + }, |
| 315 | +}); |
| 316 | + |
| 317 | +// Microsoft.Extensions.AI API |
| 318 | +IChatClient chatClient = client; |
| 319 | +var meai = await chatClient.GetResponseAsync( |
| 320 | + "Explain vector search vs keyword search.", |
| 321 | + new ChatOptions { ModelId = FreeLlmModelAliases.SmartAny }); |
| 322 | + |
| 323 | +Console.WriteLine(raw.Choices[0].Message.Content); |
| 324 | +Console.WriteLine(meai.Messages[0].Text); |
| 325 | +``` |
| 326 | + |
| 327 | +Notes: |
| 328 | +- `FreeLlmModelAliases` includes `smart`, `smart-any`, `fast`, and `cheap`. |
| 329 | +- Convenience methods for Gemini, Cerebras, SambaNova, OpenRouter, GitHub Models, Groq, and NVIDIA register curated default models and priorities. |
| 330 | +- Use `provider.WithPriority(...)` to bias whole providers, and `model.AsSmart(...)`, `model.AsCheap(...)`, `model.AsFast(...)`, and `model.AsSmartAny(...)` to tune alias-specific model priority. |
| 331 | +- Use `provider.ClearModels()` or pass `useDefaultModels: false` to a convenience method if you want a fully manual model list. |
| 332 | +- `provider.AddModel("existing-model", ...)` updates preset models in place, so you can override defaults without duplicating registrations. |
| 333 | +- `client.Chat` preserves raw OpenAI-compatible requests for OpenAI-compatible providers and translates supported chat-completions requests to Gemini when a Gemini model wins routing. |
| 334 | +- Gemini translation currently supports `CreateChatCompletionRequestVariant2`, single-choice text chat, JSON response formats, and data-URI images. |
| 335 | +- Raw OpenAI tool schemas/functions, audio/modalities, logprobs, web search, prediction, and remote image URLs are not translated to Gemini; use the MEAI surface for Gemini tool calling. |
| 336 | +- Provider cooldowns and last-seen rate-limit data are available through `client.GetProviderStatuses()`. |
| 337 | + |
231 | 338 | ### Constants |
232 | 339 | All `tryGetXXX` methods return `null` if the value is not found. |
233 | 340 | There also non-try methods that throw an exception if the value is not found. |
|
0 commit comments