Skip to content

Commit 7e192e6

Browse files
committed
Support MMProj files & extract image uploads
Add support for multimodal projector (.mmproj) names across the stack and ensure uploaded image files are treated as images. Implemented MMProjectName on several local vision models and added MmProjName property to InferPage settings, backend profiles, Utils, and ServiceConstants. Settings UI now exposes an MMProj File input for local unregistered vision models and saves/loads it per backend. LLMService now extracts image files from message.Files early (ChatHelper.ExtractImageFromFiles), resolves mmproj name from the model or chat properties to load LLava weights, and requires loaded weights before processing image messages. ChatHelper.ExtractImageFromFiles moves image files into message.Images and cleans up Files so they aren't misrouted to RAG/memory.
1 parent 91894b2 commit 7e192e6

File tree

9 files changed

+100
-11
lines changed

9 files changed

+100
-11
lines changed

src/MaIN.Domain/Models/Concrete/LocalModels.cs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,32 @@ public sealed record Gemma3_4b() : LocalModel(
1818
new Uri("https://huggingface.co/Inza124/Gemma3-4b/resolve/main/gemma3-4b.gguf?download=true"),
1919
"Gemma3 4B",
2020
8192,
21-
"Balanced 4B model for writing, analysis, and mathematical reasoning");
21+
"Balanced 4B model for writing, analysis, and mathematical reasoning"), IVisionModel
22+
{
23+
public string MMProjectName => "mmproj-model-gemma3-4b.gguf";
24+
}
2225

2326
public sealed record Gemma3_12b() : LocalModel(
2427
"gemma3-12b",
2528
"Gemma3-12b.gguf",
2629
new Uri("https://huggingface.co/Inza124/Gemma3-12b/resolve/main/gemma3-12b.gguf?download=true"),
2730
"Gemma3 12B",
2831
8192,
29-
"Large 12B model for complex analysis, research, and creative writing");
32+
"Large 12B model for complex analysis, research, and creative writing"), IVisionModel
33+
{
34+
public string MMProjectName => "mmproj-model-gemma3-12b.gguf";
35+
}
3036

3137
public sealed record Gemma3n_e4b() : LocalModel(
3238
"gemma3n-e4b",
3339
"Gemma3n-e4b.gguf",
3440
new Uri("https://huggingface.co/Inza124/Gemma-3n-e4b/resolve/main/gemma-3n-e4b.gguf?download=true"),
3541
"Gemma3n E4B",
3642
8192,
37-
"Compact 4B model optimized for efficient reasoning and general-purpose tasks");
43+
"Compact 4B model optimized for efficient reasoning and general-purpose tasks"), IVisionModel
44+
{
45+
public string MMProjectName => "mmproj-model-gemma3n-e4b.gguf";
46+
}
3847

3948
// ===== Llama Family =====
4049

src/MaIN.InferPage/Components/Pages/Home.razor

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
@using MaIN.Domain.Exceptions
1313
@using MaIN.Domain.Models
1414
@using MaIN.Domain.Models.Abstract
15+
@using MaIN.Services
16+
@using MaIN.Services.Constants
1517
@using Markdig
1618
@using Message = MaIN.Domain.Entities.Message
1719
@using MessageType = MaIN.Domain.Entities.MessageType
@@ -396,6 +398,7 @@
396398
settings.HasVision,
397399
settings.HasReasoning,
398400
settings.HasImageGen,
401+
settings.MmProjName,
399402
MaINSettings,
400403
apiKey);
401404
}
@@ -592,6 +595,11 @@
592595
Chat.ModelId = Utils.Model!;
593596
Chat.ImageGen = Utils.ImageGen;
594597

598+
// Sync mmproj setting for local unregistered vision models
599+
Chat.Properties.Remove(ServiceConstants.Properties.MmProjNameProperty);
600+
if (Utils.BackendType == BackendType.Self && !string.IsNullOrEmpty(Utils.MmProjName))
601+
Chat.Properties.AddProperty(ServiceConstants.Properties.MmProjNameProperty, Utils.MmProjName);
602+
595603
bool wasAtBottom = await JS.InvokeAsync<bool>("scrollManager.isAtBottom", "messages-container");
596604

597605
StateHasChanged();

src/MaIN.InferPage/Components/Pages/Settings.razor

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@
115115
}
116116
</div>
117117
}
118+
119+
@if (_selectedBackend?.BackendType == BackendType.Self && !_isRegisteredModel && _manualVision)
120+
{
121+
<div class="settings-field">
122+
<label class="settings-label">MMProj File</label>
123+
<FluentTextField @bind-Value="_mmProjName"
124+
Placeholder="e.g., mmproj-model-f16.gguf"
125+
Style="width: 100%;" />
126+
<span class="api-key-hint">Multimodal projector file (must be in the models folder)</span>
127+
</div>
128+
}
118129
</div>
119130

120131
<div class="settings-footer">
@@ -194,6 +205,7 @@
194205
private bool _manualVision;
195206
private bool _manualReasoning;
196207
private bool _manualImageGen;
208+
private string? _mmProjName;
197209

198210
private bool RequiresApiKey => _selectedBackend?.RequiresApiKey == true;
199211
private bool CanSave => !string.IsNullOrWhiteSpace(_modelName)
@@ -251,6 +263,7 @@
251263
_manualVision = settings.HasVision;
252264
_manualReasoning = settings.HasReasoning;
253265
_manualImageGen = settings.HasImageGen;
266+
_mmProjName = settings.MmProjName;
254267

255268
OnModelNameChanged();
256269
}
@@ -288,6 +301,7 @@
288301
_manualVision = profile.Vision;
289302
_manualReasoning = profile.Reasoning;
290303
_manualImageGen = profile.ImageGen;
304+
_mmProjName = profile.MmProjName;
291305
}
292306
}
293307
else
@@ -382,12 +396,13 @@
382396
HasVision = hasVision,
383397
HasReasoning = hasReasoning,
384398
HasImageGen = hasImageGen,
385-
ModelPath = _modelPath
399+
ModelPath = _modelPath,
400+
MmProjName = _mmProjName
386401
};
387402
await SettingsStorage.SaveSettingsAsync(settings);
388403

389404
await SettingsStorage.SaveProfileForBackendAsync(
390-
backendKey, _modelName, hasVision, hasReasoning, hasImageGen);
405+
backendKey, _modelName, hasVision, hasReasoning, hasImageGen, _mmProjName);
391406

392407
// Resolve API key: use new input, or fall back to saved key
393408
string? apiKey = null;
@@ -411,6 +426,7 @@
411426
hasVision,
412427
hasReasoning,
413428
hasImageGen,
429+
_mmProjName,
414430
MaINSettings,
415431
apiKey);
416432

src/MaIN.InferPage/Services/InferPageSettings.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ public class InferPageSettings
99
public bool HasReasoning { get; set; }
1010
public bool HasImageGen { get; set; }
1111
public string? ModelPath { get; set; }
12+
public string? MmProjName { get; set; }
1213
}

src/MaIN.InferPage/Services/SettingsService.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ public async Task<bool> HasSettingsAsync()
2626
private const string BackendProfilesKey = "inferpage-backend-profiles";
2727

2828
public async Task SaveProfileForBackendAsync(string backend, string model,
29-
bool vision, bool reasoning, bool imageGen)
29+
bool vision, bool reasoning, bool imageGen, string? mmProjName = null)
3030
{
3131
var profiles = await js.InvokeAsync<Dictionary<string, BackendProfile>?>(
3232
"settingsManager.load", BackendProfilesKey) ?? new();
33-
profiles[backend] = new BackendProfile(model, vision, reasoning, imageGen);
33+
profiles[backend] = new BackendProfile(model, vision, reasoning, imageGen, mmProjName);
3434
await js.InvokeVoidAsync("settingsManager.save", BackendProfilesKey, profiles);
3535
}
3636

@@ -55,4 +55,4 @@ private async Task<Dictionary<string, string>> LoadDictAsync(string storageKey)
5555
=> await js.InvokeAsync<Dictionary<string, string>?>("settingsManager.load", storageKey) ?? new();
5656
}
5757

58-
public record BackendProfile(string Model, bool Vision, bool Reasoning, bool ImageGen);
58+
public record BackendProfile(string Model, bool Vision, bool Reasoning, bool ImageGen, string? MmProjName = null);

src/MaIN.InferPage/Utils.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ public static class Utils
1919
public static bool? ManualVision { get; set; }
2020
public static bool? ManualReasoning { get; set; }
2121
public static bool? ManualImageGen { get; set; }
22+
public static string? MmProjName { get; set; }
2223

2324
// registry → manual override → fallback set (null = no fallback)
2425
private static bool GetCapability<T>(bool? manual, HashSet<string>? fallback = null)
@@ -42,6 +43,7 @@ public static void ApplySettings(
4243
bool hasVision,
4344
bool hasReasoning,
4445
bool hasImageGen,
46+
string? mmProjName,
4547
MaINSettings mainSettings,
4648
string? apiKey)
4749
{
@@ -69,6 +71,7 @@ public static void ApplySettings(
6971
ManualVision = hasVision;
7072
ManualReasoning = hasReasoning;
7173
ManualImageGen = hasImageGen;
74+
MmProjName = string.IsNullOrWhiteSpace(mmProjName) ? null : mmProjName.Trim();
7275

7376
mainSettings.BackendType = backendType;
7477

src/MaIN.Services/Constants/ServiceConstants.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ public static class Properties
6262
public const string PreProcessProperty = "Pre_Process";
6363
public const string DisableCacheProperty = "DisableCache";
6464
public const string AgentIdProperty = "AgentId";
65+
public const string MmProjNameProperty = "MmProjName";
6566
}
6667

6768
public static class Defaults

src/MaIN.Services/Services/LLMService/LLMService.cs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ public LLMService(
6262

6363
var lastMsg = chat.Messages.Last();
6464

65+
await ChatHelper.ExtractImageFromFiles(lastMsg);
66+
6567
if (ChatHelper.HasFiles(lastMsg))
6668
{
6769
var memoryOptions = ChatHelper.ExtractMemoryOptions(lastMsg);
@@ -235,8 +237,10 @@ private async Task<List<LLMTokenValue>> ProcessChatRequest(
235237
: await ModelLoader.GetOrLoadModelAsync(modelsPath, modelKey);
236238

237239
var visionModel = model as IVisionModel;
238-
var llavaWeights = visionModel?.MMProjectName is not null
239-
? await LLavaWeights.LoadFromFileAsync(ResolvePath(null, visionModel.MMProjectName), cancellationToken)
240+
var mmProjName = visionModel?.MMProjectName
241+
?? (chat.Properties.TryGetValue(ServiceConstants.Properties.MmProjNameProperty, out var p) ? p : null);
242+
var llavaWeights = mmProjName is not null
243+
? await LLavaWeights.LoadFromFileAsync(ResolvePath(null, mmProjName), cancellationToken)
240244
: null;
241245

242246
using var executor = new BatchedExecutor(llmModel, parameters);
@@ -299,7 +303,7 @@ private ModelParams CreateModelParameters(Chat chat, string modelKey, string? cu
299303
? executor.Create()
300304
: executor.Load(chat.ConversationState!);
301305

302-
if (lastMsg.Image != null)
306+
if (lastMsg.Image != null && llavaWeights != null)
303307
{
304308
await ProcessImageMessage(conversation, lastMsg, llmModel, llavaWeights, executor, cancellationToken);
305309
}

src/MaIN.Services/Services/LLMService/Utils/ChatHelper.cs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,53 @@ namespace MaIN.Services.Services.LLMService.Utils;
1313
/// </summary>
1414
public static class ChatHelper
1515
{
16+
private static readonly HashSet<string> ImageExtensions =
17+
[
18+
".jpg", ".jpeg", ".png", ".gif", ".webp",
19+
".bmp", ".tiff", ".tif", ".heic", ".heif", ".avif"
20+
];
21+
22+
/// <summary>
23+
/// Extracts image files from message.Files into message.Images and removes them from Files.
24+
/// This must be called before HasFiles() so images are not mistakenly routed to the RAG/memory path.
25+
/// </summary>
26+
public static async Task ExtractImageFromFiles(Message message)
27+
{
28+
if (message.Files == null || message.Files.Count == 0)
29+
return;
30+
31+
var imageFiles = message.Files
32+
.Where(f => ImageExtensions.Contains(f.Extension.ToLowerInvariant()))
33+
.ToList();
34+
35+
if (imageFiles.Count == 0)
36+
return;
37+
38+
var imageBytesList = new List<byte[]>();
39+
foreach (var imageFile in imageFiles)
40+
{
41+
if (imageFile.StreamContent != null)
42+
{
43+
using var ms = new MemoryStream();
44+
imageFile.StreamContent.Position = 0;
45+
await imageFile.StreamContent.CopyToAsync(ms);
46+
imageBytesList.Add(ms.ToArray());
47+
}
48+
else if (imageFile.Path != null)
49+
{
50+
imageBytesList.Add(await File.ReadAllBytesAsync(imageFile.Path));
51+
}
52+
53+
message.Files.Remove(imageFile);
54+
}
55+
56+
message.Images = imageBytesList;
57+
58+
if (message.Files.Count == 0)
59+
message.Files = null;
60+
}
61+
62+
1663
/// <summary>
1764
/// Generates final prompt including additional prompt if needed
1865
/// </summary>

0 commit comments

Comments
 (0)