bernardladenthin
diff --git a/‎.github/workflows/clang-format.yml‎
Lines changed: 34 additions & 0 deletions b/‎.github/workflows/clang-format.yml‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 12 additions & 1 deletion b/‎CLAUDE.md‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎src/main/cpp/jllama.cpp‎
Lines changed: 169 additions & 201 deletions b/‎src/main/cpp/jllama.cpp‎
Lines changed: 169 additions & 201 deletions
diff --git a/‎src/main/cpp/jni_helpers.hpp‎
Lines changed: 25 additions & 44 deletions b/‎src/main/cpp/jni_helpers.hpp‎
Lines changed: 25 additions & 44 deletions
diff --git a/‎src/main/cpp/json_helpers.hpp‎
Lines changed: 15 additions & 21 deletions b/‎src/main/cpp/json_helpers.hpp‎
Lines changed: 15 additions & 21 deletions
diff --git a/‎src/main/cpp/log_helpers.hpp‎
Lines changed: 11 additions & 8 deletions b/‎src/main/cpp/log_helpers.hpp‎
Lines changed: 11 additions & 8 deletions
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+#
+# SPDX-License-Identifier: MIT
+
+name: clang-format
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+
+# Enforces a single, pinned clang-format across all C++ sources so formatting is
+# reproducible between contributors and CI. Bump CLANG_FORMAT_VERSION here and in
+# CLAUDE.md (Code Formatting) together, then reformat the tree with the same version.
+env:
+  CLANG_FORMAT_VERSION: "22.1.5"
+
+jobs:
+  clang-format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - name: Install pinned clang-format
+        run: pip install "clang-format==${CLANG_FORMAT_VERSION}"
+      - name: Check C++ formatting
+        run: |
+          clang-format --version
+          # All hand-written C++ sources; the generated JNI header (src/main/cpp/jllama.h,
+          # produced by `javac -h`) is intentionally excluded.
+          files=$(find src/main/cpp src/test/cpp -type f \( -name '*.cpp' -o -name '*.hpp' \) | sort)
+          echo "Checking:"; echo "$files"
+          clang-format --dry-run --Werror $files
@@ -392,10 +392,21 @@ not track the loader's own Java package). This is the same
 `spotbugs-exclude.xml`, PIT `targetClasses`, and `CMakeLists.txt` OSInfo repairs.
 
 ### Code Formatting
+
+C++ formatting is **enforced in CI** (`.github/workflows/clang-format.yml`) with a **pinned**
+clang-format — currently **22.1.5**, installed via `pip install clang-format==22.1.5`. Format with
+that exact version before committing; a different clang-format version reflows code differently and
+will fail the check.
+
 ```bash
-clang-format -i src/main/cpp/*.cpp src/main/cpp/*.hpp   # Format C++ code
+pip install "clang-format==22.1.5"
+clang-format -i src/main/cpp/*.cpp src/main/cpp/*.hpp src/test/cpp/*.cpp   # Format C++ code
 ```
 
+The generated JNI header `src/main/cpp/jllama.h` (produced by `javac -h`) is intentionally excluded.
+To bump the enforced version, update the pin in **both** the workflow (`CLANG_FORMAT_VERSION`) and
+this line, then reformat the whole tree with the new version in the same commit.
+
 ### Javadoc — must build cleanly before `mvn package`
 
 The release packaging job runs `mvn package` with the `release` profile, which attaches
 
@@ -50,23 +50,23 @@ struct server_response_reader;
 // worker thread.  Stored as the Java-side `ctx` (jlong) pointer.
 // ---------------------------------------------------------------------------
 struct jllama_context {
-    server_context    server;                 // value member (pimpl inside)
-    std::thread       worker;
-    bool              vocab_only        = false;
+    server_context server; // value member (pimpl inside)
+    std::thread worker;
+    bool vocab_only = false;
     std::atomic<bool> worker_ready{false};
 
     // Cached after load_model() — valid for the lifetime of this context.
-    const llama_vocab *vocab             = nullptr;
+    const llama_vocab *vocab = nullptr;
     // Non-null only in vocab-only mode (bypasses server_context entirely).
-    llama_model       *vocab_only_model  = nullptr;
+    llama_model *vocab_only_model = nullptr;
 
     // Saved copy of common_params used to load the model.
     // Required by server_task::params_from_json_cmpl which takes common_params&.
-    common_params      params;
+    common_params params;
 
     // Per-streaming-task response readers, keyed by task id.
     // Guarded by readers_mutex.
-    std::mutex         readers_mutex;
+    std::mutex readers_mutex;
     std::map<int, std::unique_ptr<server_response_reader>> readers;
 };
 
@@ -80,9 +80,7 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 
 // Guard: throw and return false if the model was loaded without embedding
 // support enabled. Used by every JNI entry point that produces embeddings.
-[[nodiscard]] inline bool require_embedding_support(JNIEnv         *env,
-                                                     bool            embedding_enabled,
-                                                     jclass          error_class) {
+[[nodiscard]] inline bool require_embedding_support(JNIEnv *env, bool embedding_enabled, jclass error_class) {
     if (embedding_enabled) {
         return true;
     }
@@ -101,9 +99,7 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 // already deleted (or never fully initialised), which is a valid no-op for
 // a destructor-style call.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline jllama_context *get_jllama_context_impl(JNIEnv   *env,
-                                                              jobject   obj,
-                                                              jfieldID  field_id) {
+[[nodiscard]] inline jllama_context *get_jllama_context_impl(JNIEnv *env, jobject obj, jfieldID field_id) {
     const jlong handle = env->GetLongField(obj, field_id);
     if (handle == 0) {
         return nullptr;
@@ -117,10 +113,8 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 // Checks that `data` contains the given key.  Returns true if present.
 // On missing key: throws "<field> is required" via JNI and returns false.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline bool require_json_field_impl(JNIEnv               *env,
-                                                   const nlohmann::json &data,
-                                                   const char           *field,
-                                                   jclass                error_class) {
+[[nodiscard]] inline bool require_json_field_impl(JNIEnv *env, const nlohmann::json &data, const char *field,
+                                                  jclass error_class) {
     if (data.contains(field)) {
         return true;
     }
@@ -135,10 +129,9 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 // Reads a Java int array into a std::vector<int32_t> and releases the JNI
 // array elements with JNI_ABORT (read-only — no writeback needed).
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline std::vector<int32_t> jint_array_to_tokens_impl(
-        JNIEnv *env, jintArray array) {
+[[nodiscard]] inline std::vector<int32_t> jint_array_to_tokens_impl(JNIEnv *env, jintArray array) {
     const jsize length = env->GetArrayLength(array);
-    jint *elements     = env->GetIntArrayElements(array, nullptr);
+    jint *elements = env->GetIntArrayElements(array, nullptr);
     std::vector<int32_t> tokens(elements, elements + length);
     env->ReleaseIntArrayElements(array, elements, JNI_ABORT);
     return tokens;
@@ -170,9 +163,7 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 // construction to results_to_json (json_helpers.hpp) and serialisation to
 // json_to_jstring_impl.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline jstring results_to_jstring_impl(
-        JNIEnv                                    *env,
-        const std::vector<server_task_result_ptr> &results) {
+[[nodiscard]] inline jstring results_to_jstring_impl(JNIEnv *env, const std::vector<server_task_result_ptr> &results) {
     return json_to_jstring_impl(env, results_to_json(results));
 }
 
@@ -184,13 +175,9 @@ inline void erase_reader(jllama_context *jctx, int id_task) {
 // On allocation failure: throws via JNI with oom_class and returns nullptr.
 // ---------------------------------------------------------------------------
 template <typename JArray, typename JElem, typename CppElem>
-[[nodiscard]] inline JArray vec_to_jarray_impl(
-        JNIEnv                     *env,
-        const std::vector<CppElem> &values,
-        jclass                      oom_class,
-        const char                 *oom_msg,
-        JArray (JNIEnv_::*alloc)(jsize),
-        void (JNIEnv_::*copy)(JArray, jsize, jsize, const JElem *)) {
+[[nodiscard]] inline JArray vec_to_jarray_impl(JNIEnv *env, const std::vector<CppElem> &values, jclass oom_class,
+                                               const char *oom_msg, JArray (JNIEnv_::*alloc)(jsize),
+                                               void (JNIEnv_::*copy)(JArray, jsize, jsize, const JElem *)) {
     const jsize len = static_cast<jsize>(values.size());
     JArray arr = (env->*alloc)(len);
     if (arr == nullptr) {
@@ -202,21 +189,15 @@ template <typename JArray, typename JElem, typename CppElem>
 }
 
 // Converts a float vector to a Java jfloatArray.
-[[nodiscard]] inline jfloatArray embedding_to_jfloat_array_impl(
-        JNIEnv                   *env,
-        const std::vector<float> &values,
-        jclass                    oom_class) {
-    return vec_to_jarray_impl<jfloatArray, jfloat>(
-            env, values, oom_class, "could not allocate embedding",
-            &JNIEnv_::NewFloatArray, &JNIEnv_::SetFloatArrayRegion);
+[[nodiscard]] inline jfloatArray embedding_to_jfloat_array_impl(JNIEnv *env, const std::vector<float> &values,
+                                                                jclass oom_class) {
+    return vec_to_jarray_impl<jfloatArray, jfloat>(env, values, oom_class, "could not allocate embedding",
+                                                   &JNIEnv_::NewFloatArray, &JNIEnv_::SetFloatArrayRegion);
 }
 
 // Converts a token vector to a Java jintArray.
-[[nodiscard]] inline jintArray tokens_to_jint_array_impl(
-        JNIEnv                       *env,
-        const std::vector<int32_t>   &tokens,
-        jclass                        oom_class) {
-    return vec_to_jarray_impl<jintArray, jint>(
-            env, tokens, oom_class, "could not allocate token memory",
-            &JNIEnv_::NewIntArray, &JNIEnv_::SetIntArrayRegion);
+[[nodiscard]] inline jintArray tokens_to_jint_array_impl(JNIEnv *env, const std::vector<int32_t> &tokens,
+                                                         jclass oom_class) {
+    return vec_to_jarray_impl<jintArray, jint>(env, tokens, oom_class, "could not allocate token memory",
+                                               &JNIEnv_::NewIntArray, &JNIEnv_::SetIntArrayRegion);
 }
@@ -51,8 +51,7 @@
 // jni_helpers.hpp, and directly in receiveCompletionJson, embed, and
 // handleRerank in jllama.cpp.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline std::string get_result_error_message(
-        const server_task_result_ptr &result) {
+[[nodiscard]] inline std::string get_result_error_message(const server_task_result_ptr &result) {
     return result->to_json()["message"].get<std::string>();
 }
 
@@ -68,8 +67,7 @@
 // This mirrors the OpenAI API convention used by handleCompletions,
 // handleCompletionsOai, handleChatCompletions, and handleInfill.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline json results_to_json(
-        const std::vector<server_task_result_ptr> &results) {
+[[nodiscard]] inline json results_to_json(const std::vector<server_task_result_ptr> &results) {
     if (results.size() == 1) {
         return results[0]->to_json();
     }
@@ -87,19 +85,14 @@
 // Each element contains the original document text (looked up via the
 // result's "index" field), the index, and the relevance score.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline json rerank_results_to_json(
-        const std::vector<server_task_result_ptr> &results,
-        const std::vector<std::string>            &documents) {
+[[nodiscard]] inline json rerank_results_to_json(const std::vector<server_task_result_ptr> &results,
+                                                 const std::vector<std::string> &documents) {
     json arr = json::array();
     for (const auto &result : results) {
         const auto out = result->to_json();
-        int   index = out["index"].get<int>();
+        int index = out["index"].get<int>();
         float score = out["score"].get<float>();
-        arr.push_back({
-            {"document", documents[index]},
-            {"index",    index},
-            {"score",    score}
-        });
+        arr.push_back({{"document", documents[index]}, {"index", index}, {"score", score}});
     }
     return arr;
 }
@@ -119,8 +112,12 @@
         return false;
     }
     const std::string format = body.at("encoding_format").get<std::string>();
-    if (format == "base64") { return true; }
-    if (format == "float")  { return false; }
+    if (format == "base64") {
+        return true;
+    }
+    if (format == "float") {
+        return false;
+    }
     throw std::invalid_argument("encoding_format must be \"float\" or \"base64\"");
 }
 
@@ -135,8 +132,7 @@
 //   when "content" was used — the caller must downgrade oaicompat to NONE.
 // Throws std::invalid_argument if neither "input" nor "content" is present.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline json extract_embedding_prompt(const json &body,
-                                                    bool       &force_no_oaicompat) {
+[[nodiscard]] inline json extract_embedding_prompt(const json &body, bool &force_no_oaicompat) {
     force_no_oaicompat = false;
     if (body.count("input") != 0) {
         return body.at("input");
@@ -168,8 +164,7 @@
 // Returns float          — validated value in [0.0, 1.0].
 // Throws std::invalid_argument — present but outside [0.0, 1.0].
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline std::optional<float>
-parse_slot_prompt_similarity(const json &config) {
+[[nodiscard]] inline std::optional<float> parse_slot_prompt_similarity(const json &config) {
     if (!config.contains("slot_prompt_similarity")) {
         return std::nullopt;
     }
@@ -189,8 +184,7 @@ parse_slot_prompt_similarity(const json &config) {
 // Returns int            — validated value > 0.
 // Throws std::invalid_argument("<key> must be greater than 0") — present but ≤ 0.
 // ---------------------------------------------------------------------------
-[[nodiscard]] inline std::optional<int>
-parse_positive_int_config(const json &config, const char *key) {
+[[nodiscard]] inline std::optional<int> parse_positive_int_config(const json &config, const char *key) {
     if (!config.contains(key)) {
         return std::nullopt;
     }
 
@@ -20,21 +20,24 @@
 // fall-through to mirror llama.cpp's own log routing.
 [[nodiscard]] inline const char *log_level_name(ggml_log_level level) {
     switch (level) {
-    case GGML_LOG_LEVEL_ERROR: return "ERROR";
-    case GGML_LOG_LEVEL_WARN:  return "WARN";
-    case GGML_LOG_LEVEL_DEBUG: return "DEBUG";
+    case GGML_LOG_LEVEL_ERROR:
+        return "ERROR";
+    case GGML_LOG_LEVEL_WARN:
+        return "WARN";
+    case GGML_LOG_LEVEL_DEBUG:
+        return "DEBUG";
     case GGML_LOG_LEVEL_INFO:
-    default:                   return "INFO";
+    default:
+        return "INFO";
     }
 }
 
 // Pure variant taking an explicit timestamp so tests are deterministic.
-[[nodiscard]] inline std::string format_log_as_json(
-        ggml_log_level level, const char *text, std::time_t timestamp) {
+[[nodiscard]] inline std::string format_log_as_json(ggml_log_level level, const char *text, std::time_t timestamp) {
     nlohmann::json log_obj = {
         {"timestamp", timestamp},
-        {"level",     log_level_name(level)},
-        {"message",   text ? text : ""},
+        {"level", log_level_name(level)},
+        {"message", text ? text : ""},
     };
     return log_obj.dump();
 }