Skip to content

Commit 07f7788

Browse files
committed
Add configurable FineTuner API (TrainingParameters + Optimizer)
Build a real fine-tuning surface on top of the training POC: replace the fixed-arg LlamaTrainer.finetune with a TrainingParameters builder serialized as JSON across the JNI boundary (the same idiom as ModelParameters / InferenceParameters). Exposes the training knobs the ggml-opt path supports: - corpus from inline text OR a file (read natively) - optimizer selection (AdamW / SGD) via the Optimizer enum - learning-rate schedule (lr0, lr_min, decay_epochs, weight_decay) - validation split, context size, GPU layers, logical/physical batch sizes train_engine parses the JSON config, applies the knobs to common_params, and otherwise runs the same llama_opt_init / llama_opt_epoch loop as the POC. Verified: the native library builds and links against b9842, finetuneNative is exported, and the library loads; TrainingParameters builder->JSON unit tests pass (3); the model-gated integration test now drives the run through TrainingParameters and self-skips without a model. Progress callbacks (native->Java per-epoch loss) are the planned next step: the ggml_opt_epoch_callback has no userdata slot, so that needs a thread-local trampoline and is kept out of this change until it can be exercised end to end.
1 parent 2b9fc24 commit 07f7788

7 files changed

Lines changed: 322 additions & 59 deletions

File tree

src/main/cpp/train_engine.cpp

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,55 @@
88
#include "ggml-opt.h"
99
#include "llama.h"
1010

11+
#include <nlohmann/json.hpp>
12+
1113
#include <jni.h>
1214

1315
#include <exception>
16+
#include <fstream>
17+
#include <iterator>
1418
#include <string>
1519
#include <vector>
1620

21+
using json = nlohmann::json;
22+
1723
namespace jllama_train {
1824

1925
bool finetune(const finetune_config &cfg, std::string &err) {
2026
common_params params;
2127
params.escape = false;
2228
params.model.path = cfg.model_path;
23-
params.prompt = cfg.training_text;
2429
params.out_file = cfg.output_path;
2530
params.n_ctx = cfg.n_ctx;
2631
params.n_gpu_layers = cfg.n_gpu_layers;
32+
params.val_split = cfg.val_split;
33+
if (cfg.n_batch > 0) {
34+
params.n_batch = cfg.n_batch;
35+
}
36+
if (cfg.n_ubatch > 0) {
37+
params.n_ubatch = cfg.n_ubatch;
38+
}
39+
40+
params.optimizer =
41+
cfg.optimizer == 1 ? GGML_OPT_OPTIMIZER_TYPE_SGD : GGML_OPT_OPTIMIZER_TYPE_ADAMW;
2742
params.lr.lr0 = cfg.learning_rate;
43+
params.lr.lr_min = cfg.lr_min;
44+
params.lr.decay_epochs = cfg.decay_epochs;
45+
params.lr.wd = cfg.weight_decay;
2846
params.lr.epochs = static_cast<unsigned>(cfg.epochs > 0 ? cfg.epochs : 1);
29-
params.lr.init(); // required after setting lr fields, before get_lr() is used by the optimizer
47+
params.lr.init(); // required after setting lr fields, before the optimizer reads get_lr()
48+
49+
// The corpus is either read from a file or supplied inline.
50+
if (!cfg.training_file.empty()) {
51+
std::ifstream in(cfg.training_file, std::ios::binary);
52+
if (!in) {
53+
err = "cannot open training file: " + cfg.training_file;
54+
return false;
55+
}
56+
params.prompt.assign(std::istreambuf_iterator<char>(in), std::istreambuf_iterator<char>());
57+
} else {
58+
params.prompt = cfg.training_text;
59+
}
3060

3161
// Training needs writable weights (mmap yields read-only pointers) and an f32 KV cache
3262
// (OUT_PROD has no f16 support) — same forced settings as upstream finetune.cpp.
@@ -47,7 +77,7 @@ bool finetune(const finetune_config &cfg, std::string &err) {
4777

4878
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
4979
if (tokens.size() < 2) {
50-
err = "training text produced too few tokens (need at least 2)";
80+
err = "training corpus produced too few tokens (need at least 2)";
5181
return false;
5282
}
5383

@@ -88,29 +118,38 @@ bool finetune(const finetune_config &cfg, std::string &err) {
88118

89119
} // namespace jllama_train
90120

91-
extern "C" JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(
92-
JNIEnv *env, jclass, jstring jmodel, jstring jtext, jstring jout, jint epochs, jfloat learningRate,
93-
jint nCtx, jint nGpuLayers) {
94-
const auto to_str = [env](jstring s) -> std::string {
95-
if (s == nullptr) {
96-
return "";
97-
}
98-
const char *c = env->GetStringUTFChars(s, nullptr);
99-
std::string out = c != nullptr ? c : "";
121+
extern "C" JNIEXPORT jstring JNICALL
122+
Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(JNIEnv *env, jclass, jstring jconfig) {
123+
std::string config_json;
124+
if (jconfig != nullptr) {
125+
const char *c = env->GetStringUTFChars(jconfig, nullptr);
100126
if (c != nullptr) {
101-
env->ReleaseStringUTFChars(s, c);
127+
config_json = c;
128+
env->ReleaseStringUTFChars(jconfig, c);
102129
}
103-
return out;
104-
};
130+
}
105131

106132
jllama_train::finetune_config cfg;
107-
cfg.model_path = to_str(jmodel);
108-
cfg.training_text = to_str(jtext);
109-
cfg.output_path = to_str(jout);
110-
cfg.epochs = static_cast<int>(epochs);
111-
cfg.learning_rate = static_cast<float>(learningRate);
112-
cfg.n_ctx = static_cast<int>(nCtx);
113-
cfg.n_gpu_layers = static_cast<int>(nGpuLayers);
133+
try {
134+
const json j = json::parse(config_json);
135+
cfg.model_path = j.value("model_path", std::string());
136+
cfg.training_text = j.value("training_text", std::string());
137+
cfg.training_file = j.value("training_file", std::string());
138+
cfg.output_path = j.value("output_path", std::string());
139+
cfg.epochs = j.value("epochs", 2);
140+
cfg.learning_rate = j.value("learning_rate", 1e-5f);
141+
cfg.lr_min = j.value("lr_min", -1.0f);
142+
cfg.decay_epochs = j.value("decay_epochs", -1.0f);
143+
cfg.weight_decay = j.value("weight_decay", 0.0f);
144+
cfg.optimizer = j.value("optimizer", 0);
145+
cfg.n_ctx = j.value("n_ctx", 0);
146+
cfg.n_gpu_layers = j.value("n_gpu_layers", -1);
147+
cfg.val_split = j.value("val_split", 0.05f);
148+
cfg.n_batch = j.value("n_batch", 0);
149+
cfg.n_ubatch = j.value("n_ubatch", 0);
150+
} catch (const std::exception &e) {
151+
return env->NewStringUTF((std::string("invalid training config: ") + e.what()).c_str());
152+
}
114153

115154
std::string err;
116155
try {

src/main/cpp/train_engine.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,20 @@ namespace jllama_train {
1818
// One fine-tuning run's inputs.
1919
struct finetune_config {
2020
std::string model_path; // base GGUF to fine-tune
21-
std::string training_text; // corpus (tokenized in-process)
21+
std::string training_text; // corpus supplied inline (used when training_file is empty)
22+
std::string training_file; // corpus read from this path instead of training_text
2223
std::string output_path; // where the fine-tuned GGUF is written
2324
int epochs; // number of passes over the corpus (>= 1)
24-
float learning_rate; // AdamW lr at the first epoch
25+
float learning_rate; // lr at the first epoch
26+
float lr_min; // minimum lr for decay; < 0 = no decay
27+
float decay_epochs; // decay lr0 -> lr_min over this many epochs; <= 0 = disabled
28+
float weight_decay; // weight decay; 0 = disabled
29+
int optimizer; // ggml_opt_optimizer_type: 0 = AdamW, 1 = SGD
2530
int n_ctx; // context size; 0 = the model's trained context
2631
int n_gpu_layers; // layers offloaded to the GPU; -1 = auto
32+
float val_split; // fraction of the corpus held out for validation
33+
int n_batch; // logical batch size; 0 = native default
34+
int n_ubatch; // physical (micro) batch size; 0 = native default
2735
};
2836

2937
// Run one fine-tuning job end to end. Returns true on success; on failure returns false and sets

src/main/java/net/ladenthin/llama/LlamaTrainer.java

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,17 @@
77
import java.nio.file.Path;
88
import net.ladenthin.llama.exception.LlamaException;
99
import net.ladenthin.llama.loader.LlamaLoader;
10+
import net.ladenthin.llama.parameters.TrainingParameters;
1011

1112
/**
12-
* Proof-of-concept in-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
13+
* In-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
1314
* ({@code llama_opt_init} / {@code llama_opt_epoch}) the same way the upstream
1415
* {@code examples/training/finetune.cpp} tool does. Loads its own model and context (independent of
1516
* {@link LlamaModel}), fine-tunes on a text corpus, and writes a new GGUF.
1617
*
17-
* <p><strong>Status: proof of concept.</strong> Full-model fine-tuning is compute- and
18-
* memory-intensive and blocks for the whole run; upstream training support is itself experimental.
19-
* This surface is intentionally minimal so the native path (which links ggml-opt into
20-
* {@code libjllama} with no extra dependency) can be exercised end to end before a richer
21-
* {@code FineTuner} API is designed.
18+
* <p>Configure a run with {@link TrainingParameters} and pass it to {@link #finetune(TrainingParameters)}.
19+
* Full-model fine-tuning is compute- and memory-intensive and blocks for the whole run; upstream
20+
* training support is itself experimental.
2221
*/
2322
public final class LlamaTrainer {
2423

@@ -29,48 +28,38 @@ public final class LlamaTrainer {
2928
private LlamaTrainer() {}
3029

3130
/**
32-
* Fine-tune {@code model} on {@code trainingText} for {@code epochs} passes, writing the result
33-
* to {@code output}. Uses the model's trained context size and automatic GPU-layer selection.
31+
* Run one fine-tuning job to completion.
3432
*
35-
* @param model the base GGUF model to fine-tune
36-
* @param trainingText the training corpus (tokenized in-process)
37-
* @param output the path the fine-tuned GGUF is written to
38-
* @param epochs number of passes over the corpus (at least 1)
39-
* @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
33+
* @param parameters the training configuration (model, corpus, output, optimizer, schedule, ...)
4034
* @throws LlamaException if the model cannot be loaded or training fails
4135
*/
42-
public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
43-
finetune(model, trainingText, output, epochs, learningRate, 0, -1);
36+
public static void finetune(TrainingParameters parameters) {
37+
String error = finetuneNative(parameters.toJson());
38+
if (error != null && !error.isEmpty()) {
39+
throw new LlamaException(error);
40+
}
4441
}
4542

4643
/**
47-
* Fine-tune {@code model} on {@code trainingText}, with explicit context size and GPU offload.
44+
* Convenience fine-tune with inline text and otherwise-default settings.
4845
*
4946
* @param model the base GGUF model to fine-tune
5047
* @param trainingText the training corpus (tokenized in-process)
5148
* @param output the path the fine-tuned GGUF is written to
5249
* @param epochs number of passes over the corpus (at least 1)
5350
* @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
54-
* @param nCtx context size in tokens, or {@code 0} to use the model's trained context
55-
* @param nGpuLayers number of layers to offload to the GPU, or {@code -1} for automatic
5651
* @throws LlamaException if the model cannot be loaded or training fails
5752
*/
58-
public static void finetune(
59-
Path model, String trainingText, Path output, int epochs, float learningRate, int nCtx, int nGpuLayers) {
60-
String error =
61-
finetuneNative(
62-
model.toString(), trainingText, output.toString(), epochs, learningRate, nCtx, nGpuLayers);
63-
if (error != null && !error.isEmpty()) {
64-
throw new LlamaException(error);
65-
}
53+
public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
54+
finetune(
55+
TrainingParameters.builder()
56+
.modelPath(model)
57+
.trainingText(trainingText)
58+
.outputPath(output)
59+
.epochs(epochs)
60+
.learningRate(learningRate)
61+
.build());
6662
}
6763

68-
private static native String finetuneNative(
69-
String modelPath,
70-
String trainingText,
71-
String outputPath,
72-
int epochs,
73-
float learningRate,
74-
int nCtx,
75-
int nGpuLayers);
64+
private static native String finetuneNative(String configJson);
7665
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama.args;
6+
7+
/**
8+
* Optimizer used by {@link net.ladenthin.llama.LlamaTrainer} fine-tuning, mapping to llama.cpp's
9+
* {@code ggml_opt_optimizer_type}.
10+
*/
11+
public enum Optimizer {
12+
13+
/** Adam with decoupled weight decay ({@code GGML_OPT_OPTIMIZER_TYPE_ADAMW}). The default. */
14+
ADAMW(0),
15+
16+
/** Stochastic gradient descent ({@code GGML_OPT_OPTIMIZER_TYPE_SGD}). */
17+
SGD(1);
18+
19+
private final int nativeValue;
20+
21+
Optimizer(int nativeValue) {
22+
this.nativeValue = nativeValue;
23+
}
24+
25+
/**
26+
* The integer value passed to the native layer (matches the {@code ggml_opt_optimizer_type} enum).
27+
*
28+
* @return the native optimizer-type ordinal
29+
*/
30+
public int getNativeValue() {
31+
return nativeValue;
32+
}
33+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama.parameters;
6+
7+
import com.fasterxml.jackson.databind.ObjectMapper;
8+
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import java.nio.file.Path;
10+
import lombok.Builder;
11+
import lombok.Getter;
12+
import net.ladenthin.llama.args.Optimizer;
13+
import org.jspecify.annotations.Nullable;
14+
15+
/**
16+
* Immutable configuration for a {@link net.ladenthin.llama.LlamaTrainer} fine-tuning run.
17+
*
18+
* <p>Build with {@link #builder()}; only {@code modelPath} and {@code outputPath} are required, and
19+
* exactly one of {@code trainingText} / {@code trainingFile} should be set. All other fields default
20+
* to values that mirror upstream llama.cpp's fine-tuning defaults. The configuration is serialized to
21+
* JSON via {@link #toJson()} and parsed by the native layer, the same way {@link ModelParameters} and
22+
* {@link InferenceParameters} cross the JNI boundary.
23+
*/
24+
@Builder
25+
@Getter
26+
public final class TrainingParameters {
27+
28+
// Base GGUF model to fine-tune.
29+
private final Path modelPath;
30+
31+
// Training corpus supplied inline; mutually exclusive with trainingFile.
32+
private final @Nullable String trainingText;
33+
34+
// Training corpus read from a file by the native layer; mutually exclusive with trainingText.
35+
private final @Nullable Path trainingFile;
36+
37+
// Destination path for the fine-tuned GGUF.
38+
private final Path outputPath;
39+
40+
// Number of passes over the corpus (at least 1).
41+
@Builder.Default private final int epochs = 2;
42+
43+
// Learning rate at the first epoch.
44+
@Builder.Default private final float learningRate = 1e-5f;
45+
46+
// Minimum learning rate for decay, or -1 to disable decay.
47+
@Builder.Default private final float lrMin = -1f;
48+
49+
// If > 0, decay the learning rate from learningRate to lrMin over this many epochs.
50+
@Builder.Default private final float decayEpochs = -1f;
51+
52+
// Weight decay (0 disables it).
53+
@Builder.Default private final float weightDecay = 0f;
54+
55+
// Optimizer algorithm.
56+
@Builder.Default private final Optimizer optimizer = Optimizer.ADAMW;
57+
58+
// Context size in tokens, or 0 to use the model's trained context.
59+
@Builder.Default private final int nCtx = 0;
60+
61+
// Layers to offload to the GPU, or -1 for automatic.
62+
@Builder.Default private final int nGpuLayers = -1;
63+
64+
// Fraction of the corpus held out for validation.
65+
@Builder.Default private final float valSplit = 0.05f;
66+
67+
// Logical batch size, or 0 to use the native default.
68+
@Builder.Default private final int nBatch = 0;
69+
70+
// Physical (micro) batch size, or 0 to use the native default.
71+
@Builder.Default private final int nUbatch = 0;
72+
73+
private static final ObjectMapper MAPPER = new ObjectMapper();
74+
75+
/**
76+
* Serialize this configuration to the JSON object the native fine-tuning layer expects.
77+
*
78+
* @return a compact JSON string
79+
*/
80+
public String toJson() {
81+
ObjectNode node = MAPPER.createObjectNode();
82+
node.put("model_path", modelPath.toString());
83+
if (trainingText != null) {
84+
node.put("training_text", trainingText);
85+
}
86+
if (trainingFile != null) {
87+
node.put("training_file", trainingFile.toString());
88+
}
89+
node.put("output_path", outputPath.toString());
90+
node.put("epochs", epochs);
91+
node.put("learning_rate", learningRate);
92+
node.put("lr_min", lrMin);
93+
node.put("decay_epochs", decayEpochs);
94+
node.put("weight_decay", weightDecay);
95+
node.put("optimizer", optimizer.getNativeValue());
96+
node.put("n_ctx", nCtx);
97+
node.put("n_gpu_layers", nGpuLayers);
98+
node.put("val_split", valSplit);
99+
node.put("n_batch", nBatch);
100+
node.put("n_ubatch", nUbatch);
101+
return node.toString();
102+
}
103+
}

0 commit comments

Comments
 (0)