Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/main/cpp/jllama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,12 +767,22 @@ JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress
load_model_impl(env, obj, jparams, callback);
}

// Build the special-token id map (a token is -1 / LLAMA_TOKEN_NULL when the model defines none).
static json special_tokens_json(const llama_vocab *vocab) {
return {
{"bos", llama_vocab_bos(vocab)}, {"eos", llama_vocab_eos(vocab)},
{"eot", llama_vocab_eot(vocab)}, {"sep", llama_vocab_sep(vocab)},
{"nl", llama_vocab_nl(vocab)}, {"pad", llama_vocab_pad(vocab)},
};
}

JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
REQUIRE_SERVER_CONTEXT(nullptr);
if (jctx->vocab_only) {
json meta = {
{"vocab_type", llama_vocab_type(jctx->vocab)},
{"n_vocab", llama_vocab_n_tokens(jctx->vocab)},
{"special_tokens", special_tokens_json(jctx->vocab)},
};
return json_to_jstring_impl(env, meta);
}
Expand All @@ -794,6 +804,26 @@ JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(J
{"name", m.model_name},
{"architecture", std::string(arch_buf)},
};
// Resolved default chat template (Jinja); empty when the model ships none.
const char *chat_tmpl = mdl != nullptr ? llama_model_chat_template(mdl, /*name*/ nullptr) : nullptr;
j["chat_template"] = chat_tmpl != nullptr ? std::string(chat_tmpl) : std::string();
j["special_tokens"] = special_tokens_json(jctx->vocab);
// Full GGUF metadata key/value map.
if (mdl != nullptr) {
json meta_map = json::object();
const int meta_count = llama_model_meta_count(mdl);
for (int i = 0; i < meta_count; i++) {
char key_buf[256] = {};
// ponytail: 2 KB/value cap — scalar metadata fits; huge array values
// (tokenizer tokens/merges) truncate rather than bloating the JSON.
char val_buf[2048] = {};
if (llama_model_meta_key_by_index(mdl, i, key_buf, sizeof(key_buf)) >= 0 &&
llama_model_meta_val_str_by_index(mdl, i, val_buf, sizeof(val_buf)) >= 0) {
meta_map[std::string(key_buf)] = std::string(val_buf);
}
}
j["metadata"] = std::move(meta_map);
}
return json_to_jstring_impl(env, j);
}

Expand Down
49 changes: 49 additions & 0 deletions src/main/java/net/ladenthin/llama/value/ModelMeta.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,55 @@ public String getModelName() {
return node.path("name").asText("");
}

/**
* The model's resolved default chat template (Jinja), from GGUF
* {@code tokenizer.chat_template} metadata.
*
* @return the chat template string, or {@code ""} if the model ships none
*/
public String getChatTemplate() {
return node.path("chat_template").asText("");
}

/**
* Beginning-of-sentence token id.
*
* @return the BOS token id, or {@code -1} if the model defines none
*/
public int getBosTokenId() {
return node.at("/special_tokens/bos").asInt(-1);
}

/**
* End-of-sentence token id.
*
* @return the EOS token id, or {@code -1} if the model defines none
*/
public int getEosTokenId() {
return node.at("/special_tokens/eos").asInt(-1);
}

/**
* End-of-turn token id (used by chat- and FIM-aware models).
*
* @return the EOT token id, or {@code -1} if the model defines none
*/
public int getEotTokenId() {
return node.at("/special_tokens/eot").asInt(-1);
}

/**
* Look up a raw GGUF metadata value by key (e.g. {@code "general.architecture"},
* {@code "general.quantization_version"}). Large array metadata (tokenizer tokens/merges)
* is truncated by the native layer, not returned in full.
*
* @param key the GGUF metadata key
* @return the metadata value as a string, or {@code ""} if the key is absent
*/
public String getMetadata(String key) {
return node.path("metadata").path(key).asText("");
}

/**
* Returns the underlying {@link JsonNode} for direct access to any field,
* including fields added in future llama.cpp versions.
Expand Down
26 changes: 26 additions & 0 deletions src/test/java/net/ladenthin/llama/value/ModelMetaTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,30 @@ public void testToStringContainsNewFields() throws Exception {
assertThat(json, containsString("\"llama\""));
assertThat(json, containsString("\"CodeLlama-7B\""));
}

@Test
public void testChatTemplateSpecialTokensAndMetadata() throws Exception {
ModelMeta meta = parse("{\"n_vocab\":32000,"
+ "\"chat_template\":\"{% for m in messages %}{{ m.content }}{% endfor %}\","
+ "\"special_tokens\":{\"bos\":1,\"eos\":2,\"eot\":32000,\"sep\":-1,\"nl\":13,\"pad\":-1},"
+ "\"metadata\":{\"general.architecture\":\"llama\",\"general.quantization_version\":\"2\"}}");

assertThat(meta.getChatTemplate(), containsString("for m in messages"));
assertThat(meta.getBosTokenId(), is(1));
assertThat(meta.getEosTokenId(), is(2));
assertThat(meta.getEotTokenId(), is(32000));
assertThat(meta.getMetadata("general.architecture"), is("llama"));
assertThat(meta.getMetadata("general.quantization_version"), is("2"));
}

@Test
public void testNewGettersDefaultWhenAbsent() throws Exception {
ModelMeta meta = parse("{\"n_vocab\":100}");

assertThat(meta.getChatTemplate(), is(""));
assertThat(meta.getBosTokenId(), is(-1));
assertThat(meta.getEosTokenId(), is(-1));
assertThat(meta.getEotTokenId(), is(-1));
assertThat(meta.getMetadata("general.architecture"), is(""));
}
}
Loading