MINIFICPP-2719 - Fix template use

adamdebreceni · adamdebreceni · commit 09c3416b0d38 · 2026-04-30T10:03:25.000+02:00
diff --git a/cmake/LlamaCpp.cmake b/cmake/LlamaCpp.cmake
@@ -38,8 +38,8 @@ set(PC ${Bash_EXECUTABLE}  -c "set -x &&\
 
 
 FetchContent_Declare(llamacpp
-        URL https://github.com/ggml-org/llama.cpp/archive/refs/tags/b7836.tar.gz
-        URL_HASH SHA256=3d384e7e8b3bc3cd31abddedf684a6e201405c1d932cafb3c4a5277d872b0614
+        URL https://github.com/ggml-org/llama.cpp/archive/refs/tags/b8944.tar.gz
+        URL_HASH SHA256=ca231c8aca086f56bad3ed371f6dc5b01e971e812a8ddf67564f087390c0e781
         PATCH_COMMAND "${PC}"
         SYSTEM
 )
@@ -50,5 +50,7 @@ set(LLAMACPP_INCLUDE_DIRS
     "${llamacpp_SOURCE_DIR}/include"
     "${llamacpp_SOURCE_DIR}/ggml/include"
     "${llamacpp_SOURCE_DIR}/tools"
+    "${llamacpp_SOURCE_DIR}/common"
+    "${llamacpp_SOURCE_DIR}/vendor"
     CACHE STRING "" FORCE
 )
diff --git a/extensions/llamacpp/CMakeLists.txt b/extensions/llamacpp/CMakeLists.txt
@@ -31,7 +31,7 @@ add_minifi_library(minifi-llamacpp SHARED ${SOURCES})
 target_include_directories(minifi-llamacpp PUBLIC "${CMAKE_SOURCE_DIR}/extensions/llamacpp")
 target_include_directories(minifi-llamacpp PUBLIC "${LLAMACPP_INCLUDE_DIRS}")
 
-target_link_libraries(minifi-llamacpp minifi-cpp-extension-lib llama mtmd)
+target_link_libraries(minifi-llamacpp minifi-cpp-extension-lib llama mtmd llama-common)
 
 register_c_api_extension(minifi-llamacpp "LLAMACPP EXTENSION" LLAMACPP-EXTENSION "Provides llama.cpp support" "extensions/llamacpp/tests")
 
diff --git a/extensions/llamacpp/processors/DefaultLlamaContext.cpp b/extensions/llamacpp/processors/DefaultLlamaContext.cpp
@@ -52,6 +52,8 @@ DefaultLlamaContext::DefaultLlamaContext(const std::filesystem::path& model_path
     throw Exception(ExceptionType::PROCESS_SCHEDULE_EXCEPTION, fmt::format("Failed to load model from '{}'", model_path.string()));
   }
 
+  chat_template_ = common_chat_templates_init(llama_model_, "");
+
   llama_context_params ctx_params = llama_context_default_params();
   ctx_params.n_ctx = llama_ctx_params.n_ctx;
   ctx_params.n_batch = llama_ctx_params.n_batch;
@@ -108,27 +110,16 @@ DefaultLlamaContext::~DefaultLlamaContext() {
 }
 
 std::optional<std::string> DefaultLlamaContext::applyTemplate(const std::vector<LlamaChatMessage>& messages) {
-  std::vector<llama_chat_message> llama_messages;
-  llama_messages.reserve(messages.size());
-  std::transform(messages.begin(), messages.end(), std::back_inserter(llama_messages),
-                 [](const LlamaChatMessage& msg) { return llama_chat_message{.role = msg.role.c_str(), .content = msg.content.c_str()}; });
-  std::string text;
-  text.resize(DEFAULT_BUFFER_SIZE);
-  const char * chat_template = llama_model_chat_template(llama_model_, nullptr);
-  int32_t res_size = llama_chat_apply_template(chat_template, llama_messages.data(), llama_messages.size(), true, text.data(), gsl::narrow<int32_t>(text.size()));
-  if (res_size < 0) {
+  if (!chat_template_) {
     return std::nullopt;
   }
-  if (res_size > gsl::narrow<int32_t>(text.size())) {
-    text.resize(res_size);
-    res_size = llama_chat_apply_template(chat_template, llama_messages.data(), llama_messages.size(), true, text.data(), gsl::narrow<int32_t>(text.size()));
-    if (res_size < 0) {
-      return std::nullopt;
-    }
+  common_chat_templates_inputs inputs;
+  for (auto& msg : messages) {
+    inputs.messages.push_back(common_chat_msg{.role = msg.role, .content = msg.content});
   }
-  text.resize(res_size);
+  inputs.enable_thinking = false;  // TODO(adebreceni): MINIFICPP-2800 common_chat_templates_support_enable_thinking(chat_template_.get());
 
-  return text;
+  return common_chat_templates_apply(chat_template_.get(), inputs).prompt;
 }
 
 namespace {
diff --git a/extensions/llamacpp/processors/DefaultLlamaContext.h b/extensions/llamacpp/processors/DefaultLlamaContext.h
@@ -19,6 +19,7 @@
 #include "LlamaContext.h"
 #include "llama.h"
 #include "LlamaBackendInitializer.h"
+#include "chat.h"
 #include "mtmd/mtmd.h"
 #include "minifi-cpp/core/logging/Logger.h"
 
@@ -40,6 +41,7 @@ class DefaultLlamaContext : public LlamaContext {
  private:
   const LlamaBackendInitializer& llama_context_initializer_ = LlamaBackendInitializer::get();
   llama_model* llama_model_{};
+  common_chat_templates_ptr chat_template_;
   llama_context* llama_ctx_{};
   mtmd_context* multimodal_ctx_{};
   llama_sampler* llama_sampler_{};
diff --git a/thirdparty/llamacpp/mtmd-fix.patch b/thirdparty/llamacpp/mtmd-fix.patch
@@ -1,18 +1,27 @@
-diff --color=auto -rupN llamacpp-src-original/CMakeLists.txt llamacpp-src-patched/CMakeLists.txt
---- llamacpp-src-original/CMakeLists.txt	2026-01-25 21:19:47
-+++ llamacpp-src-patched/CMakeLists.txt	2026-02-18 13:15:46
-@@ -212,6 +212,7 @@ add_subdirectory(src)
+diff --color=auto -rupN llama.cpp-b8944/CMakeLists.txt llama.cpp-b8944-patched/CMakeLists.txt
+--- llama.cpp-b8944/CMakeLists.txt	2026-04-27 08:30:55
++++ llama.cpp-b8944-patched/CMakeLists.txt	2026-04-27 13:49:25
+@@ -191,6 +191,7 @@ add_subdirectory(src)
  #
  
  add_subdirectory(src)
 +add_subdirectory(tools/mtmd)
  
  #
  # utils, programs, examples and tests
-diff --color=auto -rupN llamacpp-src-original/tools/mtmd/CMakeLists.txt llamacpp-src-patched/tools/mtmd/CMakeLists.txt
---- llamacpp-src-original/tools/mtmd/CMakeLists.txt	2026-01-25 21:19:47
-+++ llamacpp-src-patched/tools/mtmd/CMakeLists.txt	2026-02-18 13:13:40
-@@ -80,16 +80,3 @@ endif()
+diff --color=auto -rupN llama.cpp-b8944/common/ngram-mod.cpp llama.cpp-b8944-patched/common/ngram-mod.cpp
+--- llama.cpp-b8944/common/ngram-mod.cpp	2026-04-27 08:30:55
++++ llama.cpp-b8944-patched/common/ngram-mod.cpp	2026-04-30 08:28:08
+@@ -1,4 +1,5 @@
+ #include "ngram-mod.h"
++#include <algorithm>
+ 
+ //
+ // common_ngram_mod
+diff --color=auto -rupN llama.cpp-b8944/tools/mtmd/CMakeLists.txt llama.cpp-b8944-patched/tools/mtmd/CMakeLists.txt
+--- llama.cpp-b8944/tools/mtmd/CMakeLists.txt	2026-04-27 08:30:55
++++ llama.cpp-b8944-patched/tools/mtmd/CMakeLists.txt	2026-04-27 13:50:45
+@@ -101,20 +101,6 @@ endif()
      endif()
  endif()
  
@@ -27,5 +36,9 @@ diff --color=auto -rupN llamacpp-src-original/tools/mtmd/CMakeLists.txt llamacpp
 -if(LLAMA_TOOLS_INSTALL)
 -    install(TARGETS ${TARGET} RUNTIME)
 -endif()
--target_link_libraries  (${TARGET} PRIVATE common mtmd Threads::Threads)
+-target_link_libraries  (${TARGET} PRIVATE llama-common mtmd Threads::Threads)
 -target_compile_features(${TARGET} PRIVATE cxx_std_17)
+-
+ # mtmd-debug tool
+ add_executable(llama-mtmd-debug debug/mtmd-debug.cpp)
+ set_target_properties(llama-mtmd-debug PROPERTIES OUTPUT_NAME llama-mtmd-debug)