Expose chat template, special tokens, and full metadata via ModelMeta

vaiju1981 · vaiju1981 · commit ef16decf445e · 2026-07-01T11:33:08.000-07:00
Enrich the existing getModelMetaJson native method and the ModelMeta wrapper
with three read-only introspection fields, so callers can inspect a loaded
model without a second round-trip:

- chat_template  - the model's resolved Jinja template (getChatTemplate)
- special_tokens - bos/eos/eot (plus sep/nl/pad in the JSON) via
  getBosTokenId/getEosTokenId/getEotTokenId
- metadata       - the full GGUF key/value map via getMetadata(key), capped at
  2 KB per value so large array metadata (tokenizer tokens/merges) cannot bloat
  the JSON

No new native method or JNI signature change: the fields ride the existing
getModelMetaJson payload, and absent fields default cleanly on the Java side.

Verified: libjllama builds and links against b9842, loads cleanly
(NativeLibraryLoadSmokeTest), and the ModelMeta unit tests pass (11 total).
diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
@@ -767,12 +767,22 @@ JNIEXPORT void JNICALL Java_net_ladenthin_llama_LlamaModel_loadModelWithProgress
     load_model_impl(env, obj, jparams, callback);
 }
 
+// Build the special-token id map (a token is -1 / LLAMA_TOKEN_NULL when the model defines none).
+static json special_tokens_json(const llama_vocab *vocab) {
+    return {
+        {"bos", llama_vocab_bos(vocab)}, {"eos", llama_vocab_eos(vocab)},
+        {"eot", llama_vocab_eot(vocab)}, {"sep", llama_vocab_sep(vocab)},
+        {"nl", llama_vocab_nl(vocab)},   {"pad", llama_vocab_pad(vocab)},
+    };
+}
+
 JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(JNIEnv *env, jobject obj) {
     REQUIRE_SERVER_CONTEXT(nullptr);
     if (jctx->vocab_only) {
         json meta = {
             {"vocab_type", llama_vocab_type(jctx->vocab)},
             {"n_vocab", llama_vocab_n_tokens(jctx->vocab)},
+            {"special_tokens", special_tokens_json(jctx->vocab)},
         };
         return json_to_jstring_impl(env, meta);
     }
@@ -794,6 +804,26 @@ JNIEXPORT jstring JNICALL Java_net_ladenthin_llama_LlamaModel_getModelMetaJson(J
         {"name", m.model_name},
         {"architecture", std::string(arch_buf)},
     };
+    // Resolved default chat template (Jinja); empty when the model ships none.
+    const char *chat_tmpl = mdl != nullptr ? llama_model_chat_template(mdl, /*name*/ nullptr) : nullptr;
+    j["chat_template"] = chat_tmpl != nullptr ? std::string(chat_tmpl) : std::string();
+    j["special_tokens"] = special_tokens_json(jctx->vocab);
+    // Full GGUF metadata key/value map.
+    if (mdl != nullptr) {
+        json meta_map = json::object();
+        const int meta_count = llama_model_meta_count(mdl);
+        for (int i = 0; i < meta_count; i++) {
+            char key_buf[256] = {};
+            // ponytail: 2 KB/value cap — scalar metadata fits; huge array values
+            // (tokenizer tokens/merges) truncate rather than bloating the JSON.
+            char val_buf[2048] = {};
+            if (llama_model_meta_key_by_index(mdl, i, key_buf, sizeof(key_buf)) >= 0 &&
+                llama_model_meta_val_str_by_index(mdl, i, val_buf, sizeof(val_buf)) >= 0) {
+                meta_map[std::string(key_buf)] = std::string(val_buf);
+            }
+        }
+        j["metadata"] = std::move(meta_map);
+    }
     return json_to_jstring_impl(env, j);
 }
 
diff --git a/src/main/java/net/ladenthin/llama/value/ModelMeta.java b/src/main/java/net/ladenthin/llama/value/ModelMeta.java
@@ -129,6 +129,55 @@ public String getModelName() {
         return node.path("name").asText("");
     }
 
+    /**
+     * The model's resolved default chat template (Jinja), from GGUF
+     * {@code tokenizer.chat_template} metadata.
+     *
+     * @return the chat template string, or {@code ""} if the model ships none
+     */
+    public String getChatTemplate() {
+        return node.path("chat_template").asText("");
+    }
+
+    /**
+     * Beginning-of-sentence token id.
+     *
+     * @return the BOS token id, or {@code -1} if the model defines none
+     */
+    public int getBosTokenId() {
+        return node.at("/special_tokens/bos").asInt(-1);
+    }
+
+    /**
+     * End-of-sentence token id.
+     *
+     * @return the EOS token id, or {@code -1} if the model defines none
+     */
+    public int getEosTokenId() {
+        return node.at("/special_tokens/eos").asInt(-1);
+    }
+
+    /**
+     * End-of-turn token id (used by chat- and FIM-aware models).
+     *
+     * @return the EOT token id, or {@code -1} if the model defines none
+     */
+    public int getEotTokenId() {
+        return node.at("/special_tokens/eot").asInt(-1);
+    }
+
+    /**
+     * Look up a raw GGUF metadata value by key (e.g. {@code "general.architecture"},
+     * {@code "general.quantization_version"}). Large array metadata (tokenizer tokens/merges)
+     * is truncated by the native layer, not returned in full.
+     *
+     * @param key the GGUF metadata key
+     * @return the metadata value as a string, or {@code ""} if the key is absent
+     */
+    public String getMetadata(String key) {
+        return node.path("metadata").path(key).asText("");
+    }
+
     /**
      * Returns the underlying {@link JsonNode} for direct access to any field,
      * including fields added in future llama.cpp versions.
diff --git a/src/test/java/net/ladenthin/llama/value/ModelMetaTest.java b/src/test/java/net/ladenthin/llama/value/ModelMetaTest.java
@@ -133,4 +133,30 @@ public void testToStringContainsNewFields() throws Exception {
         assertThat(json, containsString("\"llama\""));
         assertThat(json, containsString("\"CodeLlama-7B\""));
     }
+
+    @Test
+    public void testChatTemplateSpecialTokensAndMetadata() throws Exception {
+        ModelMeta meta = parse("{\"n_vocab\":32000,"
+                + "\"chat_template\":\"{% for m in messages %}{{ m.content }}{% endfor %}\","
+                + "\"special_tokens\":{\"bos\":1,\"eos\":2,\"eot\":32000,\"sep\":-1,\"nl\":13,\"pad\":-1},"
+                + "\"metadata\":{\"general.architecture\":\"llama\",\"general.quantization_version\":\"2\"}}");
+
+        assertThat(meta.getChatTemplate(), containsString("for m in messages"));
+        assertThat(meta.getBosTokenId(), is(1));
+        assertThat(meta.getEosTokenId(), is(2));
+        assertThat(meta.getEotTokenId(), is(32000));
+        assertThat(meta.getMetadata("general.architecture"), is("llama"));
+        assertThat(meta.getMetadata("general.quantization_version"), is("2"));
+    }
+
+    @Test
+    public void testNewGettersDefaultWhenAbsent() throws Exception {
+        ModelMeta meta = parse("{\"n_vocab\":100}");
+
+        assertThat(meta.getChatTemplate(), is(""));
+        assertThat(meta.getBosTokenId(), is(-1));
+        assertThat(meta.getEosTokenId(), is(-1));
+        assertThat(meta.getEotTokenId(), is(-1));
+        assertThat(meta.getMetadata("general.architecture"), is(""));
+    }
 }