Skip to content

Commit 1041cc5

Browse files
committed
Fix vision for local models
1 parent 7e192e6 commit 1041cc5

File tree

4 files changed

+28
-21
lines changed

4 files changed

+28
-21
lines changed

src/MaIN.Core/MaIN.Core.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
<ItemGroup>
1010
<PackageReference Include="AsyncKeyedLock" Version="8.0.2" />
11-
<PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.25.0" />
11+
<PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.26.0" />
1212
<PackageReference Include="Tesseract.Data.English" Version="4.0.0" />
1313
</ItemGroup>
1414

src/MaIN.Domain/MaIN.Domain.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
</PropertyGroup>
99

1010
<ItemGroup>
11-
<PackageReference Include="LLamaSharp" Version="0.25.0" />
11+
<PackageReference Include="LLamaSharp" Version="0.26.0" />
1212
</ItemGroup>
1313

1414
</Project>

src/MaIN.Services/MaIN.Services.csproj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
<ItemGroup>
1616
<PackageReference Include="GTranslate" Version="2.3.1" />
1717
<PackageReference Include="HtmlAgilityPack" Version="1.12.4" />
18-
<PackageReference Include="llamasharp.backend.cpu" Version="0.25.0" />
19-
<PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.25.0" />
20-
<PackageReference Include="LLamaSharp.kernel-memory" Version="0.25.0" />
18+
<PackageReference Include="llamasharp.backend.cpu" Version="0.26.0" />
19+
<PackageReference Include="LLamaSharp.Backend.Cuda12" Version="0.26.0" />
20+
<PackageReference Include="LLamaSharp.kernel-memory" Version="0.26.0" />
2121
<PackageReference Include="Microsoft.Extensions.Http" Version="10.0.1" />
2222
<PackageReference Include="Microsoft.KernelMemory" Version="0.98.250508.3" />
2323
<PackageReference Include="Microsoft.SemanticKernel.Connectors.Google" Version="1.64.0-alpha" />

src/MaIN.Services/Services/LLMService/LLMService.cs

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,19 @@ private async Task<List<LLMTokenValue>> ProcessChatRequest(
239239
var visionModel = model as IVisionModel;
240240
var mmProjName = visionModel?.MMProjectName
241241
?? (chat.Properties.TryGetValue(ServiceConstants.Properties.MmProjNameProperty, out var p) ? p : null);
242-
var llavaWeights = mmProjName is not null
243-
? await LLavaWeights.LoadFromFileAsync(ResolvePath(null, mmProjName), cancellationToken)
244-
: null;
245-
246-
using var executor = new BatchedExecutor(llmModel, parameters);
242+
243+
MtmdWeights? mtmdWeights = null;
244+
if (mmProjName is not null && lastMsg.Image != null)
245+
{
246+
var mmProjPath = ResolvePath(null, mmProjName);
247+
mtmdWeights = await MtmdWeights.LoadFromFileAsync(
248+
mmProjPath, llmModel, MtmdContextParams.Default(), cancellationToken);
249+
}
250+
251+
using var executor = new BatchedExecutor(llmModel, parameters, mtmdWeights);
247252

248253
var (conversation, isComplete, hasFailed) = await LLMService.InitializeConversation(
249-
chat, lastMsg, model, llmModel, llavaWeights, executor, cancellationToken);
254+
chat, lastMsg, model, llmModel, mtmdWeights, executor, cancellationToken);
250255

251256
if (!isComplete)
252257
{
@@ -271,6 +276,7 @@ private async Task<List<LLMTokenValue>> ProcessChatRequest(
271276
}
272277
}
273278

279+
mtmdWeights?.Dispose();
274280
return tokens;
275281
}
276282

@@ -294,7 +300,7 @@ private ModelParams CreateModelParameters(Chat chat, string modelKey, string? cu
294300
Message lastMsg,
295301
LocalModel model,
296302
LLamaWeights llmModel,
297-
LLavaWeights? llavaWeights,
303+
MtmdWeights? mtmdWeights,
298304
BatchedExecutor executor,
299305
CancellationToken cancellationToken)
300306
{
@@ -303,9 +309,9 @@ private ModelParams CreateModelParameters(Chat chat, string modelKey, string? cu
303309
? executor.Create()
304310
: executor.Load(chat.ConversationState!);
305311

306-
if (lastMsg.Image != null && llavaWeights != null)
312+
if (lastMsg.Image != null && mtmdWeights != null)
307313
{
308-
await ProcessImageMessage(conversation, lastMsg, llmModel, llavaWeights, executor, cancellationToken);
314+
await ProcessImageMessage(conversation, lastMsg, mtmdWeights, executor, cancellationToken);
309315
}
310316
else
311317
{
@@ -317,21 +323,22 @@ private ModelParams CreateModelParameters(Chat chat, string modelKey, string? cu
317323

318324
private static async Task ProcessImageMessage(Conversation conversation,
319325
Message lastMsg,
320-
LLamaWeights llmModel,
321-
LLavaWeights? llavaWeights,
326+
MtmdWeights mtmdWeights,
322327
BatchedExecutor executor,
323328
CancellationToken cancellationToken)
324329
{
325-
var imageEmbeddings = llavaWeights?.CreateImageEmbeddings(lastMsg.Image!);
326-
conversation.Prompt(imageEmbeddings!);
330+
using var imageEmbed = mtmdWeights.LoadMedia(lastMsg.Image!);
331+
332+
var mediaMarker = NativeApi.MtmdDefaultMarker() ?? "<image>";
333+
conversation.Prompt(
334+
$"USER: {mediaMarker}\n{lastMsg.Content}\nASSISTANT:",
335+
new ReadOnlySpan<SafeMtmdEmbed>(new[] { imageEmbed }),
336+
addBos: true);
327337

328338
while (executor.BatchedTokenCount > 0)
329339
{
330340
await executor.Infer(cancellationToken);
331341
}
332-
333-
var prompt = llmModel.Tokenize($"USER: {lastMsg.Content}\nASSISTANT:", true, false, Encoding.UTF8);
334-
conversation.Prompt(prompt);
335342
}
336343

337344
private static void ProcessTextMessage(Conversation conversation,

0 commit comments

Comments
 (0)