Skip to content

Commit 2b9fc24

Browse files
committed
Add in-process fine-tuning proof of concept (LlamaTrainer)
Wire llama.cpp's ggml-opt training path into the JNI layer, mirroring upstream examples/training/finetune.cpp: load a model, tokenize a text corpus into a ggml-opt dataset, run llama_opt_init + llama_opt_epoch for N epochs, and write the fine-tuned GGUF via llama_model_save_to_file. - train_engine.{h,cpp} - self-contained native finetune(), independent of the inference server_context (loads its own model + context; forces no-mmap and an f32 KV cache, as training requires) - LlamaTrainer - minimal Java entry point (static finetune(...) overloads) - CMakeLists.txt - compile train_engine.cpp into libjllama The ggml-opt / llama_opt symbols already link into the static libjllama with no build-system change (verified with nm), so this is pure JNI + C++ wiring. The finetuneNative symbol is exported, the library links and loads cleanly, and the Java layer compiles through the strict Error Prone / NullAway pipeline. Scope is deliberately a proof of concept: full-model fine-tuning is compute- and memory-intensive and upstream training support is experimental. The actual training run is exercised by a model-gated integration test that self-skips unless -Dnet.ladenthin.llama.train.model is set. A richer FineTuner API (dataset handling, optimizer / LoRA options, progress callbacks) can build on this base.
1 parent b5ee309 commit 2b9fc24

5 files changed

Lines changed: 282 additions & 0 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ endif()
308308
add_library(jllama SHARED
309309
src/main/cpp/jllama.cpp
310310
src/main/cpp/tts_engine.cpp
311+
src/main/cpp/train_engine.cpp
311312
${JLLAMA_TTS_GEN_CPP}
312313
src/main/cpp/utils.hpp
313314
${llama.cpp_SOURCE_DIR}/tools/server/server-common.cpp

src/main/cpp/train_engine.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
#include "train_engine.h"
6+
7+
#include "common.h"
8+
#include "ggml-opt.h"
9+
#include "llama.h"
10+
11+
#include <jni.h>
12+
13+
#include <exception>
14+
#include <string>
15+
#include <vector>
16+
17+
namespace jllama_train {
18+
19+
bool finetune(const finetune_config &cfg, std::string &err) {
20+
common_params params;
21+
params.escape = false;
22+
params.model.path = cfg.model_path;
23+
params.prompt = cfg.training_text;
24+
params.out_file = cfg.output_path;
25+
params.n_ctx = cfg.n_ctx;
26+
params.n_gpu_layers = cfg.n_gpu_layers;
27+
params.lr.lr0 = cfg.learning_rate;
28+
params.lr.epochs = static_cast<unsigned>(cfg.epochs > 0 ? cfg.epochs : 1);
29+
params.lr.init(); // required after setting lr fields, before get_lr() is used by the optimizer
30+
31+
// Training needs writable weights (mmap yields read-only pointers) and an f32 KV cache
32+
// (OUT_PROD has no f16 support) — same forced settings as upstream finetune.cpp.
33+
params.use_mmap = false;
34+
params.cache_type_k = GGML_TYPE_F32;
35+
params.cache_type_v = GGML_TYPE_F32;
36+
37+
llama_backend_init();
38+
llama_numa_init(params.numa);
39+
40+
common_init_result_ptr llama_init = common_init_from_params(params);
41+
llama_model *model = llama_init->model();
42+
llama_context *ctx = llama_init->context();
43+
if (model == nullptr || ctx == nullptr) {
44+
err = "failed to load model for training: " + cfg.model_path;
45+
return false;
46+
}
47+
48+
std::vector<llama_token> tokens = common_tokenize(ctx, params.prompt, true);
49+
if (tokens.size() < 2) {
50+
err = "training text produced too few tokens (need at least 2)";
51+
return false;
52+
}
53+
54+
ggml_opt_dataset_t dataset = common_opt_dataset_init(ctx, tokens, llama_n_ctx(ctx) / 2);
55+
56+
llama_opt_params lopt_params = {
57+
/*n_ctx_train =*/0,
58+
/*param_filter =*/llama_opt_param_filter_all,
59+
/*param_filter_ud =*/nullptr,
60+
/*get_opt_pars =*/common_opt_lr_pars,
61+
/*get_opt_pars_ud =*/&params.lr,
62+
/*optimizer_type =*/params.optimizer,
63+
};
64+
llama_opt_init(ctx, model, lopt_params);
65+
66+
const int64_t idata_split = ggml_opt_dataset_ndata(dataset) * (1.0f - params.val_split);
67+
68+
ggml_opt_result_t result_train = ggml_opt_result_init();
69+
ggml_opt_result_t result_eval = ggml_opt_result_init();
70+
71+
for (params.lr.epoch = 0; params.lr.epoch < params.lr.epochs; ++params.lr.epoch) {
72+
llama_opt_epoch(ctx, dataset, result_train, result_eval, idata_split,
73+
ggml_opt_epoch_callback_progress_bar, ggml_opt_epoch_callback_progress_bar);
74+
ggml_opt_result_reset(result_train);
75+
ggml_opt_result_reset(result_eval);
76+
}
77+
78+
ggml_opt_result_free(result_train);
79+
ggml_opt_result_free(result_eval);
80+
ggml_opt_dataset_free(dataset);
81+
82+
llama_model_save_to_file(model, params.out_file.c_str());
83+
84+
// Deliberately NOT calling llama_backend_free(): other live llama contexts in this JVM
85+
// (e.g. an inference LlamaModel) may still depend on the initialized backend.
86+
return true;
87+
}
88+
89+
} // namespace jllama_train
90+
91+
extern "C" JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaTrainer_finetuneNative(
92+
JNIEnv *env, jclass, jstring jmodel, jstring jtext, jstring jout, jint epochs, jfloat learningRate,
93+
jint nCtx, jint nGpuLayers) {
94+
const auto to_str = [env](jstring s) -> std::string {
95+
if (s == nullptr) {
96+
return "";
97+
}
98+
const char *c = env->GetStringUTFChars(s, nullptr);
99+
std::string out = c != nullptr ? c : "";
100+
if (c != nullptr) {
101+
env->ReleaseStringUTFChars(s, c);
102+
}
103+
return out;
104+
};
105+
106+
jllama_train::finetune_config cfg;
107+
cfg.model_path = to_str(jmodel);
108+
cfg.training_text = to_str(jtext);
109+
cfg.output_path = to_str(jout);
110+
cfg.epochs = static_cast<int>(epochs);
111+
cfg.learning_rate = static_cast<float>(learningRate);
112+
cfg.n_ctx = static_cast<int>(nCtx);
113+
cfg.n_gpu_layers = static_cast<int>(nGpuLayers);
114+
115+
std::string err;
116+
try {
117+
if (jllama_train::finetune(cfg, err)) {
118+
return env->NewStringUTF(""); // empty == success
119+
}
120+
} catch (const std::exception &e) {
121+
err = e.what();
122+
} catch (...) {
123+
err = "unknown C++ exception during fine-tuning";
124+
}
125+
return env->NewStringUTF(err.c_str());
126+
}

src/main/cpp/train_engine.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
//
5+
// Native fine-tuning engine (proof-of-concept): a self-contained wrapper over llama.cpp's
6+
// ggml-opt training path (llama_opt_init / llama_opt_epoch), mirroring upstream
7+
// examples/training/finetune.cpp. Loads its own model + context (independent of the inference
8+
// server_context in jllama.cpp), fine-tunes on a text corpus, and writes a new GGUF via
9+
// llama_model_save_to_file. Kept out of jllama.cpp so the JNI layer stays thin.
10+
11+
#ifndef JLLAMA_TRAIN_ENGINE_H
12+
#define JLLAMA_TRAIN_ENGINE_H
13+
14+
#include <string>
15+
16+
namespace jllama_train {
17+
18+
// One fine-tuning run's inputs.
19+
struct finetune_config {
20+
std::string model_path; // base GGUF to fine-tune
21+
std::string training_text; // corpus (tokenized in-process)
22+
std::string output_path; // where the fine-tuned GGUF is written
23+
int epochs; // number of passes over the corpus (>= 1)
24+
float learning_rate; // AdamW lr at the first epoch
25+
int n_ctx; // context size; 0 = the model's trained context
26+
int n_gpu_layers; // layers offloaded to the GPU; -1 = auto
27+
};
28+
29+
// Run one fine-tuning job end to end. Returns true on success; on failure returns false and sets
30+
// `err`. Not re-entrant; intended to be called off the JVM's critical threads (it blocks for the
31+
// full training run).
32+
bool finetune(const finetune_config &cfg, std::string &err);
33+
34+
} // namespace jllama_train
35+
36+
#endif // JLLAMA_TRAIN_ENGINE_H
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama;
6+
7+
import java.nio.file.Path;
8+
import net.ladenthin.llama.exception.LlamaException;
9+
import net.ladenthin.llama.loader.LlamaLoader;
10+
11+
/**
12+
* Proof-of-concept in-process fine-tuning entry point, wrapping llama.cpp's ggml-opt training path
13+
* ({@code llama_opt_init} / {@code llama_opt_epoch}) the same way the upstream
14+
* {@code examples/training/finetune.cpp} tool does. Loads its own model and context (independent of
15+
* {@link LlamaModel}), fine-tunes on a text corpus, and writes a new GGUF.
16+
*
17+
* <p><strong>Status: proof of concept.</strong> Full-model fine-tuning is compute- and
18+
* memory-intensive and blocks for the whole run; upstream training support is itself experimental.
19+
* This surface is intentionally minimal so the native path (which links ggml-opt into
20+
* {@code libjllama} with no extra dependency) can be exercised end to end before a richer
21+
* {@code FineTuner} API is designed.
22+
*/
23+
public final class LlamaTrainer {
24+
25+
static {
26+
LlamaLoader.initialize();
27+
}
28+
29+
private LlamaTrainer() {}
30+
31+
/**
32+
* Fine-tune {@code model} on {@code trainingText} for {@code epochs} passes, writing the result
33+
* to {@code output}. Uses the model's trained context size and automatic GPU-layer selection.
34+
*
35+
* @param model the base GGUF model to fine-tune
36+
* @param trainingText the training corpus (tokenized in-process)
37+
* @param output the path the fine-tuned GGUF is written to
38+
* @param epochs number of passes over the corpus (at least 1)
39+
* @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
40+
* @throws LlamaException if the model cannot be loaded or training fails
41+
*/
42+
public static void finetune(Path model, String trainingText, Path output, int epochs, float learningRate) {
43+
finetune(model, trainingText, output, epochs, learningRate, 0, -1);
44+
}
45+
46+
/**
47+
* Fine-tune {@code model} on {@code trainingText}, with explicit context size and GPU offload.
48+
*
49+
* @param model the base GGUF model to fine-tune
50+
* @param trainingText the training corpus (tokenized in-process)
51+
* @param output the path the fine-tuned GGUF is written to
52+
* @param epochs number of passes over the corpus (at least 1)
53+
* @param learningRate the AdamW learning rate at the first epoch (e.g. {@code 1e-5f})
54+
* @param nCtx context size in tokens, or {@code 0} to use the model's trained context
55+
* @param nGpuLayers number of layers to offload to the GPU, or {@code -1} for automatic
56+
* @throws LlamaException if the model cannot be loaded or training fails
57+
*/
58+
public static void finetune(
59+
Path model, String trainingText, Path output, int epochs, float learningRate, int nCtx, int nGpuLayers) {
60+
String error =
61+
finetuneNative(
62+
model.toString(), trainingText, output.toString(), epochs, learningRate, nCtx, nGpuLayers);
63+
if (error != null && !error.isEmpty()) {
64+
throw new LlamaException(error);
65+
}
66+
}
67+
68+
private static native String finetuneNative(
69+
String modelPath,
70+
String trainingText,
71+
String outputPath,
72+
int epochs,
73+
float learningRate,
74+
int nCtx,
75+
int nGpuLayers);
76+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
2+
//
3+
// SPDX-License-Identifier: MIT
4+
5+
package net.ladenthin.llama;
6+
7+
import static org.hamcrest.MatcherAssert.assertThat;
8+
import static org.hamcrest.Matchers.greaterThan;
9+
import static org.hamcrest.Matchers.is;
10+
11+
import java.nio.file.Files;
12+
import java.nio.file.Path;
13+
import java.nio.file.Paths;
14+
import org.junit.jupiter.api.Assumptions;
15+
import org.junit.jupiter.api.Test;
16+
import org.junit.jupiter.api.io.TempDir;
17+
18+
/**
19+
* End-to-end fine-tuning smoke over a real model. Self-skips unless a (small) GGUF is provided via
20+
* {@code -Dnet.ladenthin.llama.train.model=/abs/path/to/model.gguf}. Full-model fine-tuning is
21+
* compute- and memory-intensive, so this is opt-in and never runs in a default build.
22+
*/
23+
class LlamaTrainerIntegrationTest {
24+
25+
@Test
26+
void finetuneWritesAnOutputModel(@TempDir Path tmp) throws Exception {
27+
String modelPath = System.getProperty("net.ladenthin.llama.train.model");
28+
Assumptions.assumeTrue(
29+
modelPath != null && !modelPath.isEmpty() && Files.exists(Paths.get(modelPath)),
30+
"set -Dnet.ladenthin.llama.train.model=/path/to/small.gguf to run the fine-tune smoke");
31+
32+
StringBuilder corpus = new StringBuilder();
33+
for (int i = 0; i < 64; i++) {
34+
corpus.append("The quick brown fox jumps over the lazy dog. ");
35+
}
36+
37+
Path output = tmp.resolve("finetuned.gguf");
38+
LlamaTrainer.finetune(Paths.get(modelPath), corpus.toString(), output, 1, 1e-5f);
39+
40+
assertThat(Files.exists(output), is(true));
41+
assertThat(Files.size(output), greaterThan(0L));
42+
}
43+
}

0 commit comments

Comments
 (0)