Skip to content

Commit 435fe73

Browse files
authored
Merge pull request SciSharp#1261 from SignalRT/mtmd_implementation
Mtmd implementation
2 parents d075ef6 + 855b8c5 commit 435fe73

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3306
-1328
lines changed

.github/workflows/compile.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ concurrency:
1717

1818
env:
1919
# Compiler defines common to all platforms
20-
COMMON_DEFINE: -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DLLAMA_CURL=OFF
20+
COMMON_DEFINE: -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_CURL=OFF -DBUILD_SHARED_LIBS=ON
2121

2222
jobs:
2323
compile-linux:
@@ -460,7 +460,7 @@ jobs:
460460
matrix:
461461
include:
462462
- build: 'arm64'
463-
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON'
463+
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_METAL_USE_BF16=ON'
464464
- build: 'x64'
465465
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
466466
- build: 'x64-rosetta2'
@@ -544,9 +544,9 @@ jobs:
544544
matrix:
545545
include:
546546
- build: 'x86_64'
547-
defines: '-DANDROID_ABI=x86_64 -DCMAKE_C_FLAGS=-march=x86-64 -DCMAKE_CXX_FLAGS=-march=x86-64'
547+
defines: '-DANDROID_ABI=x86_64 -DCMAKE_C_FLAGS=-march=x86-64 -DCMAKE_CXX_FLAGS=-march=x86-64 -DGGML_OPENMP=OFF -DLLAMA_BUILD_SERVER=OFF'
548548
- build: 'arm64-v8a'
549-
defines: '-DANDROID_ABI=arm64-v8a -DCMAKE_C_FLAGS=-march=armv8.7a -DCMAKE_C_FLAGS=-march=armv8.7a'
549+
defines: '-DANDROID_ABI=arm64-v8a -DCMAKE_C_FLAGS=-march=armv8.7a -DCMAKE_C_FLAGS=-march=armv8.7a -DGGML_OPENMP=OFF -DLLAMA_BUILD_SERVER=OFF'
550550
runs-on: ubuntu-24.04
551551
steps:
552552
- uses: actions/checkout@v4
@@ -733,7 +733,7 @@ jobs:
733733
cp artifacts/ggml-cpu-bin-android-arm64-v8a.so/libggml-cpu.so deps/android-arm64-v8a/libggml-cpu.so
734734
cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
735735
cp artifacts/mtmd-bin-android-arm64-v8a.so/libmtmd.so deps/android-arm64-v8a/libmtmd.so
736-
736+
737737
cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
738738
cp artifacts/ggml-base-bin-android-x86_64.so/libggml-base.so deps/android-x86_64/libggml-base.so
739739
cp artifacts/ggml-cpu-bin-android-x86_64.so/libggml-cpu.so deps/android-x86_64/libggml-cpu.so

LLama.Examples/ExampleRunner.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public class ExampleRunner
1515
{ "Chat Session: Automatic conversation", TalkToYourself.Run },
1616
{ "Chat Session: Chinese characters", ChatChineseGB2312.Run },
1717
{ "Executor: Interactive mode chat", InteractiveModeExecute.Run },
18-
{ "Executor: Llava Interactive mode chat", LlavaInteractiveModeExecute.Run },
18+
{ "Executor: Mtmd Interactive mode chat", MtmdInteractiveModeExecute.Run },
1919
{ "Executor: Instruct mode chat", InstructModeExecute.Run },
2020
{ "Executor: Stateless mode chat", StatelessModeExecute.Run },
2121
{ "Save and Load: chat session", SaveAndLoadSession.Run },
@@ -33,7 +33,7 @@ public class ExampleRunner
3333
{ "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
3434
{ "Batched Executor: Fork", BatchedExecutorFork.Run },
3535
{ "Batched Executor: Rewind", BatchedExecutorRewind.Run },
36-
{ "Batched Executor: LLava", BatchedExecutorLLava.Run },
36+
{ "Batched Executor: Mtmd", BatchedExecutorMtmd.Run },
3737
{ "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
3838
{ "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
3939
{ "Custom Sampling Pipeline", CustomSampler.Run },

LLama.Examples/Examples/BatchedExecutorLLava.cs

Lines changed: 0 additions & 91 deletions
This file was deleted.
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using LLama.Batched;
5+
using LLama.Common;
6+
using LLama.Exceptions;
7+
using LLama.Native;
8+
using LLama.Sampling;
9+
using Spectre.Console;
10+
11+
namespace LLama.Examples.Examples;
12+
13+
/// <summary>
14+
/// Demonstrates how to evaluate an image with MTMD helpers and continue generation by
15+
/// manually scheduling batches, similar to what the batched executor does internally.
16+
/// </summary>
17+
public class BatchedExecutorMtmd
18+
{
19+
/// <summary>
20+
/// Number of completion tokens to generate after sending the image prompt.
21+
/// </summary>
22+
public const int TokenCount = 100;
23+
24+
public static async Task Run()
25+
{
26+
// Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
27+
var parameters = new ModelParams(UserSettings.GetModelPath());
28+
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
29+
var mtmdParams = MtmdContextParams.Default(); // reuse llama.cpp defaults for helper settings
30+
mtmdParams.UseGpu = false;
31+
var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";
32+
33+
using var mtmd = await MtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams); // multimodal helper weights
34+
35+
using var executor = new BatchedExecutor(model, parameters, mtmd); // drives batched token + chunk evaluation
36+
37+
// Prepend the media marker so the helper knows where to inject the encoded image tokens.
38+
var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
39+
var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
40+
var promptText = string.Concat(marker, promptSuffix);
41+
42+
var imagePath = UserSettings.GetImagePath();
43+
AnsiConsole.Write(new CanvasImage(imagePath));
44+
45+
var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
46+
47+
// Simple low-temperature sampler keeps the demo deterministic-ish.
48+
var sampler = new DefaultSamplingPipeline
49+
{
50+
Temperature = 0.1f
51+
};
52+
53+
// Stream decoded text to the console as soon as tokens arrive.
54+
var decoder = new StreamingTokenDecoder(executor.Context)
55+
{
56+
DecodeSpecialTokens = false
57+
};
58+
59+
try
60+
{
61+
// Each conversation tracks its own KV cache sequence IDs.
62+
var conversation = executor.Create();
63+
// Load the media embed explicitly so ownership is clear.
64+
using var embed = mtmd.LoadMedia( imagePath)
65+
?? throw new RuntimeError($"Failed to load media '{imagePath}'.");
66+
// Schedule the multimodal prompt with explicit embeds.
67+
conversation.Prompt(promptText, new[] { embed }, addBos: true);
68+
69+
Console.ForegroundColor = ConsoleColor.Yellow;
70+
Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
71+
Console.ResetColor();
72+
73+
var remaining = TokenCount;
74+
75+
// Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
76+
async Task<bool> ProcessNextAsync()
77+
{
78+
var decodeResult = await executor.Infer();
79+
if (decodeResult == DecodeResult.NoKvSlot) // KV cache exhausted – surface to the user
80+
{
81+
Console.ForegroundColor = ConsoleColor.Red;
82+
Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
83+
Console.ResetColor();
84+
return false;
85+
}
86+
87+
if (decodeResult != DecodeResult.Ok)
88+
throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");
89+
90+
if (!conversation.RequiresSampling) // another conversation may still be queued
91+
return true;
92+
93+
var token = conversation.Sample(sampler); // pull logits (or -1 for mtmd chunk) and sample
94+
if (token.IsEndOfGeneration(vocab))
95+
return false;
96+
97+
decoder.Add(token);
98+
var delta = decoder.Read();
99+
if (!string.IsNullOrEmpty(delta))
100+
Console.Write(delta);
101+
102+
sampler.Accept(token); // keep sampler state in sync
103+
conversation.Prompt(token); // feed the accepted token back into the batch
104+
remaining--;
105+
return remaining > 0;
106+
}
107+
108+
while (remaining > 0 && await ProcessNextAsync()) // continue until EOS or budget is reached
109+
{
110+
}
111+
112+
Console.WriteLine();
113+
}
114+
catch (IOException ex)
115+
{
116+
Console.ForegroundColor = ConsoleColor.Red;
117+
Console.WriteLine($"Could not load media '{imagePath}': {ex.Message}");
118+
Console.ResetColor();
119+
}
120+
catch (RuntimeError ex)
121+
{
122+
Console.ForegroundColor = ConsoleColor.Red;
123+
Console.WriteLine($"MTMD processing failed: {ex.Message}");
124+
Console.ResetColor();
125+
}
126+
}
127+
}

0 commit comments

Comments
 (0)