Skip to content

Commit aba6c2e

Browse files
committed
Add new cloud models and XAI reasoning support
Introduce multiple new cloud model definitions and update Groq model constants, plus implement streaming parsing in XaiService. Changes include: - Added Gemini NanoBanana and several xAI models (Grok 4.20/4.1 variants, GrokImagine image/pro) and new Groq models (Llama3_3_70b, GptOss120b). Renamed constant Llama3_1_8bInstant -> Llama3_1_8b and added DeepSeek Chat; increased DeepSeek Reasoner token limit. - Updated example and integration tests to use the renamed Groq model constant. - Added ProcessChatCompletionChunk in XaiService to handle streaming content, incremental reasoning deltas, and encrypted reasoning blobs; added JSON helper types and System.Text.Json imports to support parsing. These changes add support for new backends/models and improve handling of xAI streaming/ reasoning responses.
1 parent fdbcb84 commit aba6c2e

File tree

5 files changed

+161
-6
lines changed

5 files changed

+161
-6
lines changed

Examples/Examples/Chat/ChatExampleGroqCloud.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public async Task Start()
1212
Console.WriteLine("(GroqCloud) ChatExample is running!");
1313

1414
await AIHub.Chat()
15-
.WithModel(Models.Groq.Llama3_1_8bInstant)
15+
.WithModel(Models.Groq.Llama3_1_8b)
1616
.WithMessage("Which color do people like the most?")
1717
.CompleteAsync(interactive: true);
1818
}

MaIN.Core.IntegrationTests/BackendParamsTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public async Task GroqCloud_Should_RespondWithParams()
106106
SkipIfMissingKey(LLMApiRegistry.GetEntry(BackendType.GroqCloud)?.ApiKeyEnvName!);
107107

108108
var result = await AIHub.Chat()
109-
.WithModel(Models.Groq.Llama3_1_8bInstant)
109+
.WithModel(Models.Groq.Llama3_1_8b)
110110
.WithMessage(TestQuestion)
111111
.WithInferenceParams(new GroqCloudInferenceParams
112112
{
@@ -278,7 +278,7 @@ public async Task GroqCloud_Should_ThrowWhenGivenWrongParams()
278278
{
279279
await Assert.ThrowsAsync<InvalidBackendParamsException>(() =>
280280
AIHub.Chat()
281-
.WithModel(Models.Groq.Llama3_1_8bInstant)
281+
.WithModel(Models.Groq.Llama3_1_8b)
282282
.WithMessage(TestQuestion)
283283
.WithInferenceParams(new OpenAiInferenceParams())
284284
.CompleteAsync());

src/MaIN.Domain/Models/Concrete/CloudModels.cs

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,13 @@ public sealed record GeminiImagen4_0FastGenerate() : CloudModel(
110110
4000,
111111
"Google's fast image generation model via Gemini API"), IImageGenerationModel;
112112

113+
public sealed record GeminiNanoBanana() : CloudModel(
114+
Models.Gemini.NanoBanana,
115+
BackendType.Gemini,
116+
"Gemini 2.5 Flash Image (NanoBanana)",
117+
130000,
118+
"Google’s high-speed, high-fidelity image generation via Gemini API."), IImageGenerationModel;
119+
113120
// ===== Vertex AI Models =====
114121

115122
public sealed record VertexGemini2_5Pro() : CloudModel(
@@ -148,6 +155,50 @@ public sealed record VertexImagen4_0Generate() : CloudModel(
148155

149156
// ===== xAI Models =====
150157

158+
public sealed record Grok4_20Reasoning() : CloudModel(
159+
Models.Xai.Grok4_20Reasoning,
160+
BackendType.Xai,
161+
"Grok 4.20 reasoning",
162+
2_000_000,
163+
"A xai flagship model, offering fast, agentic tool use with low hallucination and strong prompt adherence for precise, reliable responses."), IVisionModel, IReasoningModel
164+
{
165+
public string? MMProjectName => null;
166+
public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
167+
public string? AdditionalPrompt => null;
168+
}
169+
170+
public sealed record Grok4_20NonReasoning() : CloudModel(
171+
Models.Xai.Grok4_20NonReasoning,
172+
BackendType.Xai,
173+
"Grok 4.20 non reasoning",
174+
2_000_000,
175+
"A xai flagship model, offering fast, agentic tool use with low hallucination and strong prompt adherence for precise, reliable responses."), IVisionModel
176+
{
177+
public string? MMProjectName => null;
178+
}
179+
180+
public sealed record Grok4_1FastReasoning() : CloudModel(
181+
Models.Xai.Grok4_1FastReasoning,
182+
BackendType.Xai,
183+
"Grok 4.1 fast reasoning",
184+
2_000_000,
185+
"A xai multimodal model optimized specifically for high-performance agentic tool calling"), IVisionModel, IReasoningModel
186+
{
187+
public string? MMProjectName => null;
188+
public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
189+
public string? AdditionalPrompt => null;
190+
}
191+
192+
public sealed record Grok4_1Fast() : CloudModel(
193+
Models.Xai.Grok4_1FastNonReasoning,
194+
BackendType.Xai,
195+
"Grok 4.1 fast",
196+
2_000_000,
197+
"A xai multimodal model optimized specifically for high-performance agentic tool calling"), IVisionModel
198+
{
199+
public string? MMProjectName => null;
200+
}
201+
151202
public sealed record Grok3Beta() : CloudModel(
152203
Models.Xai.Grok3Beta,
153204
BackendType.Xai,
@@ -165,35 +216,76 @@ public sealed record GrokImage() : CloudModel(
165216
4000,
166217
"xAI image generation model"), IImageGenerationModel;
167218

219+
public sealed record GrokImagineImage() : CloudModel(
220+
Models.Xai.GrokImagineImage,
221+
BackendType.Xai,
222+
"Grok Imagine Image",
223+
4000,
224+
"xAI image generation model"), IImageGenerationModel, IVisionModel
225+
{
226+
public string? MMProjectName => null;
227+
}
228+
229+
public sealed record GrokImagineImagePro() : CloudModel(
230+
Models.Xai.GrokImagineImagePro,
231+
BackendType.Xai,
232+
"Grok Imagine Image Pro",
233+
4000,
234+
"xAI image generation model"), IImageGenerationModel, IVisionModel
235+
{
236+
public string? MMProjectName => null;
237+
}
238+
168239
// ===== GroqCloud Models =====
169240

170241
public sealed record Llama3_1_8bInstant() : CloudModel(
171-
Models.Groq.Llama3_1_8bInstant,
242+
Models.Groq.Llama3_1_8b,
172243
BackendType.GroqCloud,
173244
"Llama 3.1 8B Instant",
174245
8192,
175246
"Meta Llama 3.1 8B model optimized for fast inference on Groq hardware");
176247

248+
public sealed record Llama3_3_70bVersatile() : CloudModel(
249+
Models.Groq.Llama3_3_70b,
250+
BackendType.GroqCloud,
251+
"Llama 3.3 70B Versatile",
252+
130_000,
253+
"Meta's efficient, high-performance multilingual language model");
254+
177255
public sealed record GptOss20b() : CloudModel(
178256
Models.Groq.GptOss20b,
179257
BackendType.GroqCloud,
180258
"GPT OSS 20B",
181259
8192,
182260
"Open-source 20B parameter GPT model running on Groq infrastructure");
183261

262+
public sealed record GptOss120b() : CloudModel(
263+
Models.Groq.GptOss120b,
264+
BackendType.GroqCloud,
265+
"GPT OSS 120B",
266+
130_000,
267+
"Open-source 120B parameter GPT model running on Groq infrastructure");
268+
184269
// ===== DeepSeek Models =====
185270

186271
public sealed record DeepSeekReasoner() : CloudModel(
187272
Models.DeepSeek.Reasoner,
188273
BackendType.DeepSeek,
189274
"DeepSeek Reasoner",
190-
64000,
275+
128_000,
191276
"DeepSeek reasoning-focused model for complex problem solving"), IReasoningModel
192277
{
193278
public Func<string, ThinkingState, LLMTokenValue>? ReasonFunction => null;
194279
public string? AdditionalPrompt => null;
195280
}
196281

282+
public sealed record DeepSeekChat() : CloudModel(
283+
Models.DeepSeek.Chat,
284+
BackendType.DeepSeek,
285+
"DeepSeek Chat",
286+
128_000,
287+
"DeepSeek model for complex problem solving");
288+
197289
// ===== Ollama Models =====
198290

199291
public sealed record OllamaGemma3_4b() : CloudModel(

src/MaIN.Domain/Models/Models.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,32 @@ public static class Gemini
2727
public const string Gemini2_5Flash = "gemini-2.5-flash";
2828
public const string Gemini2_0Flash = "gemini-2.0-flash";
2929
public const string Imagen4_0_FastGenerate = "imagen-4.0-fast-generate-001";
30+
public const string NanoBanana = "gemini-2.5-flash-image";
3031
}
3132

3233
public static class Xai
3334
{
35+
public const string Grok4_20Reasoning = "grok-4.20-reasoning";
36+
public const string Grok4_20NonReasoning = "grok-4.20-non-reasoning";
37+
public const string Grok4_1FastReasoning = "grok-4-1-fast-reasoning";
38+
public const string Grok4_1FastNonReasoning = "grok-4-1-fast-non-reasoning";
39+
public const string GrokImagineImage = "grok-imagine-image";
40+
public const string GrokImagineImagePro = "grok-imagine-image-pro";
3441
public const string Grok3Beta = "grok-3-beta";
3542
public const string GrokImage = "grok-2-image";
3643
}
3744

3845
public static class Groq
3946
{
40-
public const string Llama3_1_8bInstant = "llama-3.1-8b-instant";
47+
public const string Llama3_1_8b = "llama-3.1-8b-instant";
48+
public const string Llama3_3_70b = "llama-3.3-70b-versatile";
4149
public const string GptOss20b = "openai/gpt-oss-20b";
50+
public const string GptOss120b = "openai/gpt-oss-120b";
4251
}
4352

4453
public static class DeepSeek
4554
{
55+
public const string Chat = "deepseek-chat";
4656
public const string Reasoner = "deepseek-reasoner";
4757
}
4858

src/MaIN.Services/Services/LLMService/XaiService.cs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
using MaIN.Services.Services.LLMService.Memory;
77
using Microsoft.Extensions.Logging;
88
using System.Text;
9+
using System.Text.Json;
10+
using System.Text.Json.Serialization;
911
using MaIN.Domain.Exceptions;
12+
using MaIN.Domain.Models;
13+
using MaIN.Domain.Models.Abstract;
1014
using MaIN.Domain.Models.Concrete;
1115
using MaIN.Domain.Configuration.BackendInferenceParams;
1216

@@ -54,6 +58,30 @@ protected override void ApplyBackendParams(Dictionary<string, object> requestBod
5458
if (p.PresencePenalty.HasValue) requestBody["presence_penalty"] = p.PresencePenalty.Value;
5559
}
5660

61+
protected override LLMTokenValue? ProcessChatCompletionChunk(string data)
62+
{
63+
var chunk = JsonSerializer.Deserialize<XaiCompletionChunk>(data,
64+
new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
65+
66+
// Streaming delta — regular content
67+
var content = chunk?.Choices?.FirstOrDefault()?.Delta?.Content;
68+
if (!string.IsNullOrEmpty(content))
69+
return new LLMTokenValue { Text = content, Type = TokenType.Message };
70+
71+
// Streaming delta — incremental reasoning (grok-4.20-reasoning style)
72+
var deltaReasoning = chunk?.Choices?.FirstOrDefault()?.Delta?.ReasoningContent;
73+
if (!string.IsNullOrEmpty(deltaReasoning))
74+
return new LLMTokenValue { Text = deltaReasoning, Type = TokenType.Reason };
75+
76+
// Final completion event — encrypted reasoning blob (grok-4-1-fast-reasoning style)
77+
// message.content is intentionally ignored (already assembled from streaming chunks above)
78+
var encryptedReasoning = chunk?.Reasoning?.EncryptedContent;
79+
if (!string.IsNullOrEmpty(encryptedReasoning))
80+
return new LLMTokenValue { Text = encryptedReasoning, Type = TokenType.Reason };
81+
82+
return null;
83+
}
84+
5785
public override async Task<ChatResult?> AskMemory(
5886
Chat chat,
5987
ChatMemoryOptions memoryOptions,
@@ -91,4 +119,29 @@ private string ComposeMessage(Message lastMsg, string[] filePaths)
91119
stringBuilder.Append(lastMsg.Content);
92120
return stringBuilder.ToString();
93121
}
122+
}
123+
124+
file class XaiCompletionChunk
125+
{
126+
public List<XaiChoiceChunk>? Choices { get; set; }
127+
public XaiReasoning? Reasoning { get; set; }
128+
}
129+
130+
file class XaiChoiceChunk
131+
{
132+
public XaiDelta? Delta { get; set; }
133+
}
134+
135+
file class XaiDelta
136+
{
137+
public string? Content { get; set; }
138+
139+
[JsonPropertyName("reasoning_content")]
140+
public string? ReasoningContent { get; set; }
141+
}
142+
143+
file class XaiReasoning
144+
{
145+
[JsonPropertyName("encrypted_content")]
146+
public string? EncryptedContent { get; set; }
94147
}

0 commit comments

Comments
 (0)