Add configurable FineTuner API (TrainingParameters + Optimizer)

vaiju1981 · vaiju1981 · commit 07f7788b5cf7 · 2026-07-01T13:26:50.000-07:00
Build a real fine-tuning surface on top of the training POC: replace the
fixed-arg LlamaTrainer.finetune with a TrainingParameters builder serialized as
JSON across the JNI boundary (the same idiom as ModelParameters /
InferenceParameters).

Exposes the training knobs the ggml-opt path supports:
- corpus from inline text OR a file (read natively)
- optimizer selection (AdamW / SGD) via the Optimizer enum
- learning-rate schedule (lr0, lr_min, decay_epochs, weight_decay)
- validation split, context size, GPU layers, logical/physical batch sizes

train_engine parses the JSON config, applies the knobs to common_params, and
otherwise runs the same llama_opt_init / llama_opt_epoch loop as the POC.

Verified: the native library builds and links against b9842, finetuneNative is
exported, and the library loads; TrainingParameters builder-&gt;JSON unit tests
pass (3); the model-gated integration test now drives the run through
TrainingParameters and self-skips without a model.

Progress callbacks (native-&gt;Java per-epoch loss) are the planned next step: the
ggml_opt_epoch_callback has no userdata slot, so that needs a thread-local
trampoline and is kept out of this change until it can be exercised end to end.
diff --git a/src/main/cpp/train_engine.cpp b/src/main/cpp/train_engine.cpp
@@ -8,25 +8,55 @@
 #include "ggml-opt.h"
 #include "llama.h"
 
+#include <nlohmann/json.hpp>
+
 #include <jni.h>
 
 #include <exception>
+#include <fstream>
+#include <iterator>
 #include <string>
 #include <vector>
 
+using json = nlohmann::json;
+
 namespace jllama_train {
 
 bool finetune(const finetune_config &cfg, std::string &err) {
     common_params params;
     params.escape = false;
     params.model.path = cfg.model_path;
-    params.prompt = cfg.training_text;
     params.out_file = cfg.output_path;
     params.n_ctx = cfg.n_ctx;
     params.n_gpu_layers = cfg.n_gpu_layers;
+    params.val_split = cfg.val_split;
+    if (cfg.n_batch > 0) {
+        params.n_batch = cfg.n_batch;
+    }
+    if (cfg.n_ubatch > 0) {
+        params.n_ubatch = cfg.n_ubatch;
+    }
+
+    params.optimizer =
+        cfg.optimizer == 1 ? GGML_OPT_OPTIMIZER_TYPE_SGD : GGML_OPT_OPTIMIZER_TYPE_ADAMW;
     params.lr.lr0 = cfg.learning_rate;
+    params.lr.lr_min = cfg.lr_min;
+    params.lr.decay_epochs = cfg.decay_epochs;
+    params.lr.wd = cfg.weight_decay;
     params.lr.epochs = static_cast<unsigned>(cfg.epochs > 0 ? cfg.epochs : 1);
-    params.lr.init(); // required after setting lr fields, before get_lr() is used by the optimizer
+    params.lr.init(); // required after setting lr fields, before the optimizer reads get_lr()
+
+    // The corpus is either read from a file or supplied inline.
+    if (!cfg.training_file.empty()) {
+        std::ifstream in(cfg.training_file, std::ios::binary);
+        if (!in) {
+            err = "cannot open training file: " + cfg.training_file;
+            return false;
+        }
+        params.prompt.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
+    } else {
+        params.prompt = cfg.training_text;
+    }
 
     // Training needs writable weights (mmap yields read-only pointers) and an f32 KV cache
     // (OUT_PROD has no f16 support) — same forced settings as upstream finetune.cpp.
@@ -47,7 +77,7 @@ bool finetune(const finetune_config &cfg, std::string &err) {
 
     std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
     if (tokens.size() < 2) {
-        err = "training text produced too few tokens (need at least 2)";
+        err = "training corpus produced too few tokens (need at least 2)";
         return false;
     }
 
@@ -88,29 +118,38 @@ bool finetune(const finetune_config &cfg, std::string &err) {
 
 } // namespace jllama_train
 
-extern "C" JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(
-    JNIEnv *env, jclass, jstring jmodel, jstring jtext, jstring jout, jint epochs, jfloat learningRate,
-    jint nCtx, jint nGpuLayers) {
-    const auto to_str = [env](jstring s) -> std::string {
-        if (s == nullptr) {
-            return "";
-        }
-        const char *c = env->GetStringUTFChars(s, nullptr);
-        std::string out = c != nullptr ? c : "";
+extern "C" JNIEXPORT jstring JNICALL
+Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(JNIEnv *env, jclass, jstring jconfig) {
+    std::string config_json;
+    if (jconfig != nullptr) {
+        const char *c = env->GetStringUTFChars(jconfig, nullptr);
         if (c != nullptr) {
-            env->ReleaseStringUTFChars(s, c);
+            config_json = c;
+            env->ReleaseStringUTFChars(jconfig, c);
         }
-        return out;
-    };
+    }
 
     jllama_train::finetune_config cfg;
-    cfg.model_path = to_str(jmodel);
-    cfg.training_text = to_str(jtext);
-    cfg.output_path = to_str(jout);
-    cfg.epochs = static_cast<int>(epochs);
-    cfg.learning_rate = static_cast<float>(learningRate);
-    cfg.n_ctx = static_cast<int>(nCtx);
-    cfg.n_gpu_layers = static_cast<int>(nGpuLayers);
+    try {
+        const json j = json::parse(config_json);
+        cfg.model_path = j.value("model_path", std::string());
+        cfg.training_text = j.value("training_text", std::string());
+        cfg.training_file = j.value("training_file", std::string());
+        cfg.output_path = j.value("output_path", std::string());
+        cfg.epochs = j.value("epochs", 2);
+        cfg.learning_rate = j.value("learning_rate", 1e-5f);
+        cfg.lr_min = j.value("lr_min", -1.0f);
+        cfg.decay_epochs = j.value("decay_epochs", -1.0f);
+        cfg.weight_decay = j.value("weight_decay", 0.0f);
+        cfg.optimizer = j.value("optimizer", 0);
+        cfg.n_ctx = j.value("n_ctx", 0);
+        cfg.n_gpu_layers = j.value("n_gpu_layers", -1);
+        cfg.val_split = j.value("val_split", 0.05f);
+        cfg.n_batch = j.value("n_batch", 0);
+        cfg.n_ubatch = j.value("n_ubatch", 0);
+    } catch (const std::exception &e) {
+        return env->NewStringUTF((std::string("invalid training config: ") + e.what()).c_str());
+    }
 
     std::string err;
     try {
diff --git a/src/main/cpp/train_engine.h b/src/main/cpp/train_engine.h
@@ -18,12 +18,20 @@ namespace jllama_train {
 // One fine-tuning run's inputs.
 struct finetune_config {
     std::string model_path;    // base GGUF to fine-tune
-    std::string training_text; // corpus (tokenized in-process)
+    std::string training_text; // corpus supplied inline (used when training_file is empty)
+    std::string training_file; // corpus read from this path instead of training_text
     std::string output_path;   // where the fine-tuned GGUF is written
     int         epochs;        // number of passes over the corpus (>= 1)
-    float       learning_rate; // AdamW lr at the first epoch
+    float       learning_rate; // lr at the first epoch
+    float       lr_min;        // minimum lr for decay; < 0 = no decay
+    float       decay_epochs;  // decay lr0 -> lr_min over this many epochs; <= 0 = disabled
+    float       weight_decay;  // weight decay; 0 = disabled
+    int         optimizer;     // ggml_opt_optimizer_type: 0 = AdamW, 1 = SGD
     int         n_ctx;         // context size; 0 = the model's trained context
     int         n_gpu_layers;  // layers offloaded to the GPU; -1 = auto
+    float       val_split;     // fraction of the corpus held out for validation
+    int         n_batch;       // logical batch size; 0 = native default
+    int         n_ubatch;      // physical (micro) batch size; 0 = native default
 };
 
 // Run one fine-tuning job end to end. Returns true on success; on failure returns false and sets
diff --git a/src/main/java/net/ladenthin/llama/LlamaTrainer.java b/src/main/java/net/ladenthin/llama/LlamaTrainer.java
@@ -7,18 +7,17 @@
 import java.nio.file.Path;
 import net.ladenthin.llama.exception.LlamaException;
 import net.ladenthin.llama.loader.LlamaLoader;
+import net.ladenthin.llama.parameters.TrainingParameters;
 
 /**
- * Proof-of-concept in-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
+ * In-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
  * ({@code llama_opt_init} / {@code llama_opt_epoch}) the same way the upstream
  * {@code examples/training/finetune.cpp} tool does. Loads its own model and context (independent of
  * {@link LlamaModel}), fine-tunes on a text corpus, and writes a new GGUF.
  *
- * <p><strong>Status: proof of concept.</strong> Full-model fine-tuning is compute- and
- * memory-intensive and blocks for the whole run; upstream training support is itself experimental.
- * This surface is intentionally minimal so the native path (which links ggml-opt into
- * {@code libjllama} with no extra dependency) can be exercised end to end before a richer
- * {@code FineTuner} API is designed.
+ * <p>Configure a run with {@link TrainingParameters} and pass it to {@link #finetune(TrainingParameters)}.
+ * Full-model fine-tuning is compute- and memory-intensive and blocks for the whole run; upstream
+ * training support is itself experimental.
  */
 public final class LlamaTrainer {
 
@@ -29,48 +28,38 @@ public final class LlamaTrainer {
     private LlamaTrainer() {}
 
     /**
-     * Fine-tune {@code model} on {@code trainingText} for {@code epochs} passes, writing the result
-     * to {@code output}. Uses the model's trained context size and automatic GPU-layer selection.
+     * Run one fine-tuning job to completion.
      *
-     * @param model the base GGUF model to fine-tune
-     * @param trainingText the training corpus (tokenized in-process)
-     * @param output the path the fine-tuned GGUF is written to
-     * @param epochs number of passes over the corpus (at least 1)
-     * @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
+     * @param parameters the training configuration (model, corpus, output, optimizer, schedule, ...)
      * @throws LlamaException if the model cannot be loaded or training fails
      */
-    public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
-        finetune(model, trainingText, output, epochs, learningRate, 0, -1);
+    public static void finetune(TrainingParameters parameters) {
+        String error = finetuneNative(parameters.toJson());
+        if (error != null && !error.isEmpty()) {
+            throw new LlamaException(error);
+        }
     }
 
     /**
-     * Fine-tune {@code model} on {@code trainingText}, with explicit context size and GPU offload.
+     * Convenience fine-tune with inline text and otherwise-default settings.
      *
      * @param model the base GGUF model to fine-tune
      * @param trainingText the training corpus (tokenized in-process)
      * @param output the path the fine-tuned GGUF is written to
      * @param epochs number of passes over the corpus (at least 1)
      * @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
-     * @param nCtx context size in tokens, or {@code 0} to use the model's trained context
-     * @param nGpuLayers number of layers to offload to the GPU, or {@code -1} for automatic
      * @throws LlamaException if the model cannot be loaded or training fails
      */
-    public static void finetune(
-            Path model, String trainingText, Path output, int epochs, float learningRate, int nCtx, int nGpuLayers) {
-        String error =
-                finetuneNative(
-                        model.toString(), trainingText, output.toString(), epochs, learningRate, nCtx, nGpuLayers);
-        if (error != null && !error.isEmpty()) {
-            throw new LlamaException(error);
-        }
+    public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
+        finetune(
+                TrainingParameters.builder()
+                        .modelPath(model)
+                        .trainingText(trainingText)
+                        .outputPath(output)
+                        .epochs(epochs)
+                        .learningRate(learningRate)
+                        .build());
     }
 
-    private static native String finetuneNative(
-            String modelPath,
-            String trainingText,
-            String outputPath,
-            int epochs,
-            float learningRate,
-            int nCtx,
-            int nGpuLayers);
+    private static native String finetuneNative(String configJson);
 }
diff --git a/src/main/java/net/ladenthin/llama/args/Optimizer.java b/src/main/java/net/ladenthin/llama/args/Optimizer.java
@@ -0,0 +1,33 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama.args;
+
+/**
+ * Optimizer used by {@link net.ladenthin.llama.LlamaTrainer} fine-tuning, mapping to llama.cpp's
+ * {@code ggml_opt_optimizer_type}.
+ */
+public enum Optimizer {
+
+    /** Adam with decoupled weight decay ({@code GGML_OPT_OPTIMIZER_TYPE_ADAMW}). The default. */
+    ADAMW(0),
+
+    /** Stochastic gradient descent ({@code GGML_OPT_OPTIMIZER_TYPE_SGD}). */
+    SGD(1);
+
+    private final int nativeValue;
+
+    Optimizer(int nativeValue) {
+        this.nativeValue = nativeValue;
+    }
+
+    /**
+     * The integer value passed to the native layer (matches the {@code ggml_opt_optimizer_type} enum).
+     *
+     * @return the native optimizer-type ordinal
+     */
+    public int getNativeValue() {
+        return nativeValue;
+    }
+}
diff --git a/src/main/java/net/ladenthin/llama/parameters/TrainingParameters.java b/src/main/java/net/ladenthin/llama/parameters/TrainingParameters.java
@@ -0,0 +1,103 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama.parameters;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.nio.file.Path;
+import lombok.Builder;
+import lombok.Getter;
+import net.ladenthin.llama.args.Optimizer;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Immutable configuration for a {@link net.ladenthin.llama.LlamaTrainer} fine-tuning run.
+ *
+ * <p>Build with {@link #builder()}; only {@code modelPath} and {@code outputPath} are required, and
+ * exactly one of {@code trainingText} / {@code trainingFile} should be set. All other fields default
+ * to values that mirror upstream llama.cpp's fine-tuning defaults. The configuration is serialized to
+ * JSON via {@link #toJson()} and parsed by the native layer, the same way {@link ModelParameters} and
+ * {@link InferenceParameters} cross the JNI boundary.
+ */
+@Builder
+@Getter
+public final class TrainingParameters {
+
+    // Base GGUF model to fine-tune.
+    private final Path modelPath;
+
+    // Training corpus supplied inline; mutually exclusive with trainingFile.
+    private final @Nullable String trainingText;
+
+    // Training corpus read from a file by the native layer; mutually exclusive with trainingText.
+    private final @Nullable Path trainingFile;
+
+    // Destination path for the fine-tuned GGUF.
+    private final Path outputPath;
+
+    // Number of passes over the corpus (at least 1).
+    @Builder.Default private final int epochs = 2;
+
+    // Learning rate at the first epoch.
+    @Builder.Default private final float learningRate = 1e-5f;
+
+    // Minimum learning rate for decay, or -1 to disable decay.
+    @Builder.Default private final float lrMin = -1f;
+
+    // If > 0, decay the learning rate from learningRate to lrMin over this many epochs.
+    @Builder.Default private final float decayEpochs = -1f;
+
+    // Weight decay (0 disables it).
+    @Builder.Default private final float weightDecay = 0f;
+
+    // Optimizer algorithm.
+    @Builder.Default private final Optimizer optimizer = Optimizer.ADAMW;
+
+    // Context size in tokens, or 0 to use the model's trained context.
+    @Builder.Default private final int nCtx = 0;
+
+    // Layers to offload to the GPU, or -1 for automatic.
+    @Builder.Default private final int nGpuLayers = -1;
+
+    // Fraction of the corpus held out for validation.
+    @Builder.Default private final float valSplit = 0.05f;
+
+    // Logical batch size, or 0 to use the native default.
+    @Builder.Default private final int nBatch = 0;
+
+    // Physical (micro) batch size, or 0 to use the native default.
+    @Builder.Default private final int nUbatch = 0;
+
+    private static final ObjectMapper MAPPER = new ObjectMapper();
+
+    /**
+     * Serialize this configuration to the JSON object the native fine-tuning layer expects.
+     *
+     * @return a compact JSON string
+     */
+    public String toJson() {
+        ObjectNode node = MAPPER.createObjectNode();
+        node.put("model_path", modelPath.toString());
+        if (trainingText != null) {
+            node.put("training_text", trainingText);
+        }
+        if (trainingFile != null) {
+            node.put("training_file", trainingFile.toString());
+        }
+        node.put("output_path", outputPath.toString());
+        node.put("epochs", epochs);
+        node.put("learning_rate", learningRate);
+        node.put("lr_min", lrMin);
+        node.put("decay_epochs", decayEpochs);
+        node.put("weight_decay", weightDecay);
+        node.put("optimizer", optimizer.getNativeValue());
+        node.put("n_ctx", nCtx);
+        node.put("n_gpu_layers", nGpuLayers);
+        node.put("val_split", valSplit);
+        node.put("n_batch", nBatch);
+        node.put("n_ubatch", nUbatch);
+        return node.toString();
+    }
+}
diff --git a/src/test/java/net/ladenthin/llama/LlamaTrainerIntegrationTest.java b/src/test/java/net/ladenthin/llama/LlamaTrainerIntegrationTest.java
diff --git a/src/test/java/net/ladenthin/llama/parameters/TrainingParametersTest.java b/src/test/java/net/ladenthin/llama/parameters/TrainingParametersTest.java