bernardladenthin
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/main/cpp/train_engine.cpp‎
Lines changed: 165 additions & 0 deletions b/‎src/main/cpp/train_engine.cpp‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎src/main/cpp/train_engine.h‎
Lines changed: 44 additions & 0 deletions b/‎src/main/cpp/train_engine.h‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎src/main/java/net/ladenthin/llama/LlamaTrainer.java‎
Lines changed: 65 additions & 0 deletions b/‎src/main/java/net/ladenthin/llama/LlamaTrainer.java‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎src/main/java/net/ladenthin/llama/args/Optimizer.java‎
Lines changed: 33 additions & 0 deletions b/‎src/main/java/net/ladenthin/llama/args/Optimizer.java‎
Lines changed: 33 additions & 0 deletions
@@ -308,6 +308,7 @@ endif()
 add_library(jllama SHARED
     src/main/cpp/jllama.cpp
     src/main/cpp/tts_engine.cpp
+    src/main/cpp/train_engine.cpp
     ${JLLAMA_TTS_GEN_CPP}
     src/main/cpp/utils.hpp
     ${llama.cpp_SOURCE_DIR}/tools/server/server-common.cpp
 
@@ -0,0 +1,165 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+#include "train_engine.h"
+
+#include "common.h"
+#include "ggml-opt.h"
+#include "llama.h"
+
+#include <nlohmann/json.hpp>
+
+#include <jni.h>
+
+#include <exception>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using json = nlohmann::json;
+
+namespace jllama_train {
+
+bool finetune(const finetune_config &cfg, std::string &err) {
+    common_params params;
+    params.escape = false;
+    params.model.path = cfg.model_path;
+    params.out_file = cfg.output_path;
+    params.n_ctx = cfg.n_ctx;
+    params.n_gpu_layers = cfg.n_gpu_layers;
+    params.val_split = cfg.val_split;
+    if (cfg.n_batch > 0) {
+        params.n_batch = cfg.n_batch;
+    }
+    if (cfg.n_ubatch > 0) {
+        params.n_ubatch = cfg.n_ubatch;
+    }
+
+    params.optimizer =
+        cfg.optimizer == 1 ? GGML_OPT_OPTIMIZER_TYPE_SGD : GGML_OPT_OPTIMIZER_TYPE_ADAMW;
+    params.lr.lr0 = cfg.learning_rate;
+    params.lr.lr_min = cfg.lr_min;
+    params.lr.decay_epochs = cfg.decay_epochs;
+    params.lr.wd = cfg.weight_decay;
+    params.lr.epochs = static_cast<unsigned>(cfg.epochs > 0 ? cfg.epochs : 1);
+    params.lr.init(); // required after setting lr fields, before the optimizer reads get_lr()
+
+    // The corpus is either read from a file or supplied inline.
+    if (!cfg.training_file.empty()) {
+        std::ifstream in(cfg.training_file, std::ios::binary);
+        if (!in) {
+            err = "cannot open training file: " + cfg.training_file;
+            return false;
+        }
+        params.prompt.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
+    } else {
+        params.prompt = cfg.training_text;
+    }
+
+    // Training needs writable weights (mmap yields read-only pointers) and an f32 KV cache
+    // (OUT_PROD has no f16 support) — same forced settings as upstream finetune.cpp.
+    params.use_mmap = false;
+    params.cache_type_k = GGML_TYPE_F32;
+    params.cache_type_v = GGML_TYPE_F32;
+
+    llama_backend_init();
+    llama_numa_init(params.numa);
+
+    common_init_result_ptr llama_init = common_init_from_params(params);
+    llama_model *model = llama_init->model();
+    llama_context *ctx = llama_init->context();
+    if (model == nullptr || ctx == nullptr) {
+        err = "failed to load model for training: " + cfg.model_path;
+        return false;
+    }
+
+    std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
+    if (tokens.size() < 2) {
+        err = "training corpus produced too few tokens (need at least 2)";
+        return false;
+    }
+
+    ggml_opt_dataset_t dataset = common_opt_dataset_init(ctx, tokens, llama_n_ctx(ctx) / 2);
+
+    llama_opt_params lopt_params = {
+        /*n_ctx_train     =*/0,
+        /*param_filter    =*/llama_opt_param_filter_all,
+        /*param_filter_ud =*/nullptr,
+        /*get_opt_pars    =*/common_opt_lr_pars,
+        /*get_opt_pars_ud =*/&params.lr,
+        /*optimizer_type  =*/params.optimizer,
+    };
+    llama_opt_init(ctx, model, lopt_params);
+
+    const int64_t idata_split = ggml_opt_dataset_ndata(dataset) * (1.0f - params.val_split);
+
+    ggml_opt_result_t result_train = ggml_opt_result_init();
+    ggml_opt_result_t result_eval = ggml_opt_result_init();
+
+    for (params.lr.epoch = 0; params.lr.epoch < params.lr.epochs; ++params.lr.epoch) {
+        llama_opt_epoch(ctx, dataset, result_train, result_eval, idata_split,
+                        ggml_opt_epoch_callback_progress_bar, ggml_opt_epoch_callback_progress_bar);
+        ggml_opt_result_reset(result_train);
+        ggml_opt_result_reset(result_eval);
+    }
+
+    ggml_opt_result_free(result_train);
+    ggml_opt_result_free(result_eval);
+    ggml_opt_dataset_free(dataset);
+
+    llama_model_save_to_file(model, params.out_file.c_str());
+
+    // Deliberately NOT calling llama_backend_free(): other live llama contexts in this JVM
+    // (e.g. an inference LlamaModel) may still depend on the initialized backend.
+    return true;
+}
+
+} // namespace jllama_train
+
+extern "C" JNIEXPORT jstring JNICALL
+Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(JNIEnv *env, jclass, jstring jconfig) {
+    std::string config_json;
+    if (jconfig != nullptr) {
+        const char *c = env->GetStringUTFChars(jconfig, nullptr);
+        if (c != nullptr) {
+            config_json = c;
+            env->ReleaseStringUTFChars(jconfig, c);
+        }
+    }
+
+    jllama_train::finetune_config cfg;
+    try {
+        const json j = json::parse(config_json);
+        cfg.model_path = j.value("model_path", std::string());
+        cfg.training_text = j.value("training_text", std::string());
+        cfg.training_file = j.value("training_file", std::string());
+        cfg.output_path = j.value("output_path", std::string());
+        cfg.epochs = j.value("epochs", 2);
+        cfg.learning_rate = j.value("learning_rate", 1e-5f);
+        cfg.lr_min = j.value("lr_min", -1.0f);
+        cfg.decay_epochs = j.value("decay_epochs", -1.0f);
+        cfg.weight_decay = j.value("weight_decay", 0.0f);
+        cfg.optimizer = j.value("optimizer", 0);
+        cfg.n_ctx = j.value("n_ctx", 0);
+        cfg.n_gpu_layers = j.value("n_gpu_layers", -1);
+        cfg.val_split = j.value("val_split", 0.05f);
+        cfg.n_batch = j.value("n_batch", 0);
+        cfg.n_ubatch = j.value("n_ubatch", 0);
+    } catch (const std::exception &e) {
+        return env->NewStringUTF((std::string("invalid training config: ") + e.what()).c_str());
+    }
+
+    std::string err;
+    try {
+        if (jllama_train::finetune(cfg, err)) {
+            return env->NewStringUTF(""); // empty == success
+        }
+    } catch (const std::exception &e) {
+        err = e.what();
+    } catch (...) {
+        err = "unknown C++ exception during fine-tuning";
+    }
+    return env->NewStringUTF(err.c_str());
+}
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+//
+// Native fine-tuning engine (proof-of-concept): a self-contained wrapper over llama.cpp's
+// ggml-opt training path (llama_opt_init / llama_opt_epoch), mirroring upstream
+// examples/training/finetune.cpp. Loads its own model + context (independent of the inference
+// server_context in jllama.cpp), fine-tunes on a text corpus, and writes a new GGUF via
+// llama_model_save_to_file. Kept out of jllama.cpp so the JNI layer stays thin.
+
+#ifndef JLLAMA_TRAIN_ENGINE_H
+#define JLLAMA_TRAIN_ENGINE_H
+
+#include <string>
+
+namespace jllama_train {
+
+// One fine-tuning run's inputs.
+struct finetune_config {
+    std::string model_path;    // base GGUF to fine-tune
+    std::string training_text; // corpus supplied inline (used when training_file is empty)
+    std::string training_file; // corpus read from this path instead of training_text
+    std::string output_path;   // where the fine-tuned GGUF is written
+    int         epochs;        // number of passes over the corpus (>= 1)
+    float       learning_rate; // lr at the first epoch
+    float       lr_min;        // minimum lr for decay; < 0 = no decay
+    float       decay_epochs;  // decay lr0 -> lr_min over this many epochs; <= 0 = disabled
+    float       weight_decay;  // weight decay; 0 = disabled
+    int         optimizer;     // ggml_opt_optimizer_type: 0 = AdamW, 1 = SGD
+    int         n_ctx;         // context size; 0 = the model's trained context
+    int         n_gpu_layers;  // layers offloaded to the GPU; -1 = auto
+    float       val_split;     // fraction of the corpus held out for validation
+    int         n_batch;       // logical batch size; 0 = native default
+    int         n_ubatch;      // physical (micro) batch size; 0 = native default
+};
+
+// Run one fine-tuning job end to end. Returns true on success; on failure returns false and sets
+// `err`. Not re-entrant; intended to be called off the JVM's critical threads (it blocks for the
+// full training run).
+bool finetune(const finetune_config &cfg, std::string &err);
+
+} // namespace jllama_train
+
+#endif // JLLAMA_TRAIN_ENGINE_H
@@ -0,0 +1,65 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import java.nio.file.Path;
+import net.ladenthin.llama.exception.LlamaException;
+import net.ladenthin.llama.loader.LlamaLoader;
+import net.ladenthin.llama.parameters.TrainingParameters;
+
+/**
+ * In-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
+ * ({@code llama_opt_init} / {@code llama_opt_epoch}) the same way the upstream
+ * {@code examples/training/finetune.cpp} tool does. Loads its own model and context (independent of
+ * {@link LlamaModel}), fine-tunes on a text corpus, and writes a new GGUF.
+ *
+ * <p>Configure a run with {@link TrainingParameters} and pass it to {@link #finetune(TrainingParameters)}.
+ * Full-model fine-tuning is compute- and memory-intensive and blocks for the whole run; upstream
+ * training support is itself experimental.
+ */
+public final class LlamaTrainer {
+
+    static {
+        LlamaLoader.initialize();
+    }
+
+    private LlamaTrainer() {}
+
+    /**
+     * Run one fine-tuning job to completion.
+     *
+     * @param parameters the training configuration (model, corpus, output, optimizer, schedule, ...)
+     * @throws LlamaException if the model cannot be loaded or training fails
+     */
+    public static void finetune(TrainingParameters parameters) {
+        String error = finetuneNative(parameters.toJson());
+        if (error != null && !error.isEmpty()) {
+            throw new LlamaException(error);
+        }
+    }
+
+    /**
+     * Convenience fine-tune with inline text and otherwise-default settings.
+     *
+     * @param model the base GGUF model to fine-tune
+     * @param trainingText the training corpus (tokenized in-process)
+     * @param output the path the fine-tuned GGUF is written to
+     * @param epochs number of passes over the corpus (at least 1)
+     * @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
+     * @throws LlamaException if the model cannot be loaded or training fails
+     */
+    public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
+        finetune(
+                TrainingParameters.builder()
+                        .modelPath(model)
+                        .trainingText(trainingText)
+                        .outputPath(output)
+                        .epochs(epochs)
+                        .learningRate(learningRate)
+                        .build());
+    }
+
+    private static native String finetuneNative(String configJson);
+}
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama.args;
+
+/**
+ * Optimizer used by {@link net.ladenthin.llama.LlamaTrainer} fine-tuning, mapping to llama.cpp's
+ * {@code ggml_opt_optimizer_type}.
+ */
+public enum Optimizer {
+
+    /** Adam with decoupled weight decay ({@code GGML_OPT_OPTIMIZER_TYPE_ADAMW}). The default. */
+    ADAMW(0),
+
+    /** Stochastic gradient descent ({@code GGML_OPT_OPTIMIZER_TYPE_SGD}). */
+    SGD(1);
+
+    private final int nativeValue;
+
+    Optimizer(int nativeValue) {
+        this.nativeValue = nativeValue;
+    }
+
+    /**
+     * The integer value passed to the native layer (matches the {@code ggml_opt_optimizer_type} enum).
+     *
+     * @return the native optimizer-type ordinal
+     */
+    public int getNativeValue() {
+        return nativeValue;
+    }
+}