diff --git a/dflash/CMakeLists.txt b/dflash/CMakeLists.txt
index de0b5f2d..9be56683 100644
--- a/dflash/CMakeLists.txt
+++ b/dflash/CMakeLists.txt
@@ -264,6 +264,18 @@ add_library(dflash_common STATIC
     src/server/sse_emitter.cpp
     src/server/prefix_cache.cpp
     src/server/disk_prefix_cache.cpp
+    # ── Jinja chat-template engine (from llama.cpp common/jinja/) ──
+    # Used by render_chat_template_jinja() to support --chat-template-file
+    # in dflash_server. Mirrors llama.cpp's common_chat_template plumbing.
+    # unicode.cpp supplies common_parse_utf8_codepoint() used by jinja's
+    # value.cpp tojson() and is otherwise self-contained.
+    deps/llama.cpp/common/jinja/lexer.cpp
+    deps/llama.cpp/common/jinja/parser.cpp
+    deps/llama.cpp/common/jinja/runtime.cpp
+    deps/llama.cpp/common/jinja/value.cpp
+    deps/llama.cpp/common/jinja/string.cpp
+    deps/llama.cpp/common/jinja/caps.cpp
+    deps/llama.cpp/common/unicode.cpp
 )
 # BSA (Block-Sparse Attention) backs the speculative-prefill drafter scoring
 # path. Default ON so prefill is fast out of the box. Turn OFF if you don't
@@ -452,6 +464,10 @@ target_include_directories(dflash_common
     PRIVATE
         ${DFLASH27B_SRC_INCLUDE_DIRS}
         ${CMAKE_CURRENT_SOURCE_DIR}/deps/llama.cpp/ggml/src
+        # Jinja chat-template engine (lexer/parser/runtime/value/string/caps)
+        # pulled from llama.cpp/common/jinja for --chat-template-file support.
+        # nlohmann_json is already linked PUBLIC (used by jinja/value.cpp).
+        ${CMAKE_CURRENT_SOURCE_DIR}/deps/llama.cpp/common
 )
 if(DFLASH27B_GPU_BACKEND STREQUAL "cuda")
     target_include_directories(dflash_common PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
diff --git a/dflash/src/server/chat_template.cpp b/dflash/src/server/chat_template.cpp
index 92c46588..6086feae 100644
--- a/dflash/src/server/chat_template.cpp
+++ b/dflash/src/server/chat_template.cpp
@@ -2,6 +2,16 @@
 
 #include "chat_template.h"
 
+#include "jinja/lexer.h"
+#include "jinja/parser.h"
+#include "jinja/runtime.h"
+#include "jinja/value.h"
+
+#include <nlohmann/json.hpp>
+
+#include <memory>
+#include <stdexcept>
+
 namespace dflash::common {
 
 // Qwen3.5 tool preamble — matches the official Jinja template exactly.
@@ -155,4 +165,103 @@ std::string render_chat_template(
     return result;
 }
 
+// ─── Jinja path ─────────────────────────────────────────────────────────
+//
+// Render via a Jinja chat template (e.g. froggeric Qwen3.6 template). Each
+// thread caches the most-recently-parsed program for its template source,
+// so steady-state cost is just the runtime execute (parse happens once per
+// process per template).
+
+namespace {
+
+struct JinjaCache {
+    std::string                       src;
+    std::shared_ptr<jinja::program>   prog;
+};
+
+static thread_local JinjaCache tls_jinja_cache;
+
+static std::shared_ptr<jinja::program> get_or_parse(const std::string & template_src) {
+    if (tls_jinja_cache.prog && tls_jinja_cache.src == template_src) {
+        return tls_jinja_cache.prog;
+    }
+    jinja::lexer lex;
+    jinja::lexer_result lex_res;
+    try {
+        lex_res = lex.tokenize(template_src);
+    } catch (const std::exception & e) {
+        throw std::runtime_error(std::string("jinja lexer: ") + e.what());
+    }
+    auto prog = std::make_shared<jinja::program>(jinja::parse_from_tokens(lex_res));
+    tls_jinja_cache.src  = template_src;
+    tls_jinja_cache.prog = prog;
+    return prog;
+}
+
+}  // namespace
+
+std::string render_chat_template_jinja(
+    const std::string & template_src,
+    const std::vector<ChatMessage> & messages,
+    const std::string & bos_token,
+    const std::string & eos_token,
+    bool add_generation_prompt,
+    bool enable_thinking,
+    const std::string & tools_json)
+{
+    if (template_src.empty()) {
+        throw std::runtime_error("render_chat_template_jinja: template_src is empty");
+    }
+
+    auto prog = get_or_parse(template_src);
+
+    // Build the JSON input that mirrors llama.cpp's
+    // common_chat_template_direct_apply_impl. Field names must match the
+    // names the Jinja templates expect (messages, tools, bos_token,
+    // eos_token, add_generation_prompt, enable_thinking).
+    nlohmann::ordered_json messages_j = nlohmann::ordered_json::array();
+    for (const auto & m : messages) {
+        nlohmann::ordered_json mj;
+        mj["role"]    = m.role;
+        mj["content"] = m.content;
+        if (!m.tool_call_id.empty()) {
+            mj["tool_call_id"] = m.tool_call_id;
+        }
+        messages_j.push_back(std::move(mj));
+    }
+
+    nlohmann::ordered_json inputs;
+    inputs["messages"]              = std::move(messages_j);
+    inputs["bos_token"]             = bos_token;
+    inputs["eos_token"]             = eos_token;
+    inputs["add_generation_prompt"] = add_generation_prompt;
+    inputs["enable_thinking"]       = enable_thinking;
+
+    bool has_tools = !tools_json.empty() && tools_json != "[]" && tools_json != "null";
+    if (has_tools) {
+        try {
+            inputs["tools"] = nlohmann::ordered_json::parse(tools_json);
+        } catch (const std::exception & e) {
+            throw std::runtime_error(
+                std::string("render_chat_template_jinja: failed to parse tools JSON: ") + e.what());
+        }
+    }
+
+    jinja::context ctx(template_src);
+    try {
+        jinja::global_from_json(ctx, inputs, /*mark_input=*/false);
+    } catch (const std::exception & e) {
+        throw std::runtime_error(std::string("jinja global_from_json: ") + e.what());
+    }
+
+    try {
+        jinja::runtime rt(ctx);
+        jinja::value results = rt.execute(*prog);
+        auto parts = jinja::runtime::gather_string_parts(results);
+        return parts->as_string().str();
+    } catch (const std::exception & e) {
+        throw std::runtime_error(std::string("jinja runtime: ") + e.what());
+    }
+}
+
 }  // namespace dflash::common
diff --git a/dflash/src/server/chat_template.h b/dflash/src/server/chat_template.h
index 5f35f492..c51d7ef1 100644
--- a/dflash/src/server/chat_template.h
+++ b/dflash/src/server/chat_template.h
@@ -49,4 +49,30 @@ std::string render_chat_template(
 // Detect the appropriate chat format for an architecture.
 ChatFormat chat_format_for_arch(const std::string & arch);
 
+// Render chat messages via a Jinja chat template (e.g. froggeric Qwen3.6
+// template, or any of the llama.cpp models/templates/*.jinja files).
+//
+// Mirrors llama.cpp's common_chat_template_direct_apply: parses the template
+// once per thread, converts inputs to jinja values, runs the program, returns
+// the rendered prompt string.
+//
+// `template_src`  literal Jinja source (read from --chat-template-file)
+// `bos_token`,
+// `eos_token`    passed through to the template (Qwen3.6 templates may use
+//                {{bos_token}} / {{eos_token}}). Use empty strings if unknown.
+// `tools_json`   optional JSON array of tool definitions; when non-empty it
+//                is parsed and injected as `tools` into the template context.
+//
+// Internally caches the most recently parsed program per thread (avoids
+// re-parsing the template on every request). Throws std::runtime_error on
+// lexer/parser/runtime failure (caller should surface a 500 response).
+std::string render_chat_template_jinja(
+    const std::string & template_src,
+    const std::vector<ChatMessage> & messages,
+    const std::string & bos_token,
+    const std::string & eos_token,
+    bool add_generation_prompt = true,
+    bool enable_thinking = false,
+    const std::string & tools_json = "");
+
 }  // namespace dflash::common
diff --git a/dflash/src/server/http_server.cpp b/dflash/src/server/http_server.cpp
index 8188fd07..1fb4b0ad 100644
--- a/dflash/src/server/http_server.cpp
+++ b/dflash/src/server/http_server.cpp
@@ -439,9 +439,41 @@ bool HttpServer::route_request(int fd, const HttpRequest & hr) {
             tools_json = req.tools.dump();
         }
 
-        std::string rendered = render_chat_template(chat_msgs, chat_format_,
-                                                    true, enable_thinking,
-                                                    tools_json);
+        std::string rendered;
+        if (!config_.chat_template_src.empty()) {
+            // Jinja path: caller supplied a chat template file via
+            // --chat-template-file. Override the hardcoded QWEN3/LAGUNA
+            // renderer. Used for tool-using agents that need the Anthropic
+            // tool_use envelope (e.g. froggeric Qwen3.6 template).
+            //
+            // Special tokens like <|im_start|> / <|im_end|> are stored
+            // verbatim in the GGUF vocab — use raw_token() to skip the
+            // GPT-2 byte decode (otherwise <0xC4><0x91> nonsense appears).
+            const std::string & bos_str = (tokenizer_.bos_id() >= 0)
+                ? tokenizer_.raw_token(tokenizer_.bos_id())
+                : std::string();
+            const std::string & eos_str = (tokenizer_.eos_id() >= 0)
+                ? tokenizer_.raw_token(tokenizer_.eos_id())
+                : std::string();
+            try {
+                rendered = render_chat_template_jinja(
+                    config_.chat_template_src,
+                    chat_msgs,
+                    bos_str,
+                    eos_str,
+                    /*add_generation_prompt=*/true,
+                    enable_thinking,
+                    tools_json);
+            } catch (const std::exception & e) {
+                send_error(fd, 500,
+                    std::string("chat template (jinja) render failed: ") + e.what());
+                return true;
+            }
+        } else {
+            rendered = render_chat_template(chat_msgs, chat_format_,
+                                            true, enable_thinking,
+                                            tools_json);
+        }
         req.prompt_tokens = tokenizer_.encode(rendered);
 
         // Detect if prompt ends with <think> (model will start in reasoning mode).
diff --git a/dflash/src/server/http_server.h b/dflash/src/server/http_server.h
index 24d075d8..45d73bf5 100644
--- a/dflash/src/server/http_server.h
+++ b/dflash/src/server/http_server.h
@@ -64,6 +64,12 @@ struct ServerConfig {
     int         disk_cache_min_tokens = 512; // only persist >= this many tokens
     int         disk_cache_continued_interval = 10240; // continued checkpoint every N tokens
     int         disk_cache_cold_max_tokens = 10240;    // cold prefix for prompts longer than this
+
+    // Optional Jinja chat template (overrides the hardcoded ChatFormat::QWEN3
+    // / LAGUNA renderer when non-empty). Used for tool-using agents that need
+    // the Anthropic tool_use envelope, e.g. froggeric Qwen3.6 template.
+    std::string chat_template_src;          // literal Jinja source (loaded from file)
+    std::string chat_template_path;         // path it was loaded from (logged at startup)
 };
 
 // ─── Parsed request ─────────────────────────────────────────────────────
diff --git a/dflash/src/server/server_main.cpp b/dflash/src/server/server_main.cpp
index 627e4c64..c8857934 100644
--- a/dflash/src/server/server_main.cpp
+++ b/dflash/src/server/server_main.cpp
@@ -76,6 +76,13 @@ static void print_usage(const char * prog) {
         "  --kv-cache-min-tokens <N>   Min tokens to persist (default: 512)\n"
         "  --kv-cache-interval <N>     Continued checkpoint every N tokens (default: 10240)\n"
         "  --kv-cache-cold-max <N>     Cold prefix for prompts longer than N tokens (default: 10240)\n"
+        "\n"
+        "Chat template (optional, e.g. froggeric Qwen3.6 template for tool-using\n"
+        "agents that need the Anthropic tool_use envelope):\n"
+        "  --chat-template-file <path>  Load a Jinja chat template file.\n"
+        "                               Overrides the hardcoded Qwen3/Laguna\n"
+        "                               renderer. Empty or missing falls back\n"
+        "                               to the hardcoded template.\n"
         "\n", prog);
 }
 
@@ -143,6 +150,36 @@ int main(int argc, char ** argv) {
             sconfig.pflash_skip_park = true;
         } else if (std::strcmp(argv[i], "--lazy-draft") == 0) {
             sconfig.lazy_draft = true;
+        } else if (std::strcmp(argv[i], "--chat-template-file") == 0 && i + 1 < argc) {
+            const char * path = argv[++i];
+            std::FILE * f = std::fopen(path, "rb");
+            if (!f) {
+                std::fprintf(stderr, "[server] --chat-template-file: cannot open '%s'\n", path);
+                return 1;
+            }
+            std::fseek(f, 0, SEEK_END);
+            long n = std::ftell(f);
+            std::fseek(f, 0, SEEK_SET);
+            if (n <= 0) {
+                // The usage text promises "Empty or missing falls back to the
+                // hardcoded template." Honor that: log a warning and leave
+                // chat_template_src empty so http_server.cpp falls through to
+                // the hardcoded QWEN3/LAGUNA renderer, instead of aborting
+                // startup.
+                std::fclose(f);
+                std::fprintf(stderr, "[server] --chat-template-file: '%s' is empty, "
+                                     "falling back to hardcoded template\n", path);
+            } else {
+                sconfig.chat_template_src.resize((size_t)n);
+                size_t got = std::fread(sconfig.chat_template_src.data(), 1, (size_t)n, f);
+                std::fclose(f);
+                if (got != (size_t)n) {
+                    std::fprintf(stderr, "[server] --chat-template-file: short read on '%s'\n", path);
+                    return 1;
+                }
+                sconfig.chat_template_path = path;
+                std::fprintf(stderr, "[server] loaded chat template from %s (%ld bytes)\n", path, n);
+            }
         } else if (std::strcmp(argv[i], "--kv-cache-dir") == 0 && i + 1 < argc) {
             sconfig.disk_cache_dir = argv[++i];
         } else if (std::strcmp(argv[i], "--kv-cache-budget") == 0 && i + 1 < argc) {
diff --git a/dflash/test/test_server_unit.cpp b/dflash/test/test_server_unit.cpp
index 848b7539..fa05346a 100644
--- a/dflash/test/test_server_unit.cpp
+++ b/dflash/test/test_server_unit.cpp
@@ -15,6 +15,7 @@
 #include "server/utf8_utils.h"
 #include "server/api_types.h"
 #include "server/http_server.h"
+#include "server/chat_template.h"
 #include <nlohmann/json.hpp>
 
 #include <cmath>
@@ -576,6 +577,101 @@ static void test_pflash_threshold_always_mode() {
     TEST_ASSERT(should);
 }
 
+// ═══════════════════════════════════════════════════════════════════════
+// Jinja chat template
+// ═══════════════════════════════════════════════════════════════════════
+
+// Minimal Jinja template: just join roles + contents. Used to verify the
+// runtime + global_from_json plumbing without depending on any external
+// .jinja file at test time.
+static const char MINI_JINJA_TEMPLATE[] =
+    "{%- for m in messages -%}"
+    "<|{{ m.role }}|>{{ m.content }}\n"
+    "{%- endfor -%}"
+    "{%- if add_generation_prompt -%}"
+    "<|assistant|>"
+    "{%- endif -%}";
+
+static void test_jinja_render_basic() {
+    std::vector<ChatMessage> msgs = {
+        {"system", "you are helpful", ""},
+        {"user",   "hi",              ""},
+    };
+    std::string out = render_chat_template_jinja(
+        MINI_JINJA_TEMPLATE, msgs,
+        /*bos=*/"<s>", /*eos=*/"</s>",
+        /*add_gen=*/true, /*think=*/false,
+        /*tools=*/"");
+    TEST_ASSERT(out.find("<|system|>you are helpful") != std::string::npos);
+    TEST_ASSERT(out.find("<|user|>hi")               != std::string::npos);
+    TEST_ASSERT(out.find("<|assistant|>")            != std::string::npos);
+}
+
+static void test_jinja_render_no_gen_prompt() {
+    std::vector<ChatMessage> msgs = {{"user", "ping", ""}};
+    std::string out = render_chat_template_jinja(
+        MINI_JINJA_TEMPLATE, msgs, "", "",
+        /*add_gen=*/false, /*think=*/false, "");
+    TEST_ASSERT(out.find("<|user|>ping") != std::string::npos);
+    TEST_ASSERT(out.find("<|assistant|>") == std::string::npos);
+}
+
+static void test_jinja_render_tools_injected() {
+    // Template references `tools` to confirm it was passed in.
+    static const char TPL[] =
+        "{%- if tools -%}TOOLS_PRESENT:{{ tools[0].name }}{%- endif -%}"
+        "{%- for m in messages -%}<|{{ m.role }}|>{{ m.content }}{%- endfor -%}";
+    std::vector<ChatMessage> msgs = {{"user", "?", ""}};
+    std::string tools = R"([{"name":"my_tool","description":"test"}])";
+    std::string out = render_chat_template_jinja(
+        TPL, msgs, "", "", false, false, tools);
+    TEST_ASSERT(out.find("TOOLS_PRESENT:my_tool") != std::string::npos);
+}
+
+static void test_jinja_render_empty_tools_skipped() {
+    // tools_json == "[]" must NOT define `tools` in the template context.
+    static const char TPL[] =
+        "{%- if tools -%}TOOLS_PRESENT{%- else -%}NO_TOOLS{%- endif -%}";
+    std::vector<ChatMessage> msgs = {{"user", "?", ""}};
+    std::string out = render_chat_template_jinja(
+        TPL, msgs, "", "", false, false, "[]");
+    TEST_ASSERT(out.find("NO_TOOLS")        != std::string::npos);
+    TEST_ASSERT(out.find("TOOLS_PRESENT")   == std::string::npos);
+}
+
+static void test_jinja_render_bos_eos_threaded() {
+    // {{ bos_token }} and {{ eos_token }} must reach the template.
+    static const char TPL[] = "{{ bos_token }}HI{{ eos_token }}";
+    std::vector<ChatMessage> msgs;
+    std::string out = render_chat_template_jinja(
+        TPL, msgs, "<BOS>", "<EOS>", false, false, "");
+    TEST_ASSERT(out == "<BOS>HI<EOS>");
+}
+
+static void test_jinja_render_empty_template_throws() {
+    std::vector<ChatMessage> msgs = {{"user", "x", ""}};
+    bool threw = false;
+    try {
+        (void)render_chat_template_jinja("", msgs, "", "", true, false, "");
+    } catch (const std::runtime_error &) {
+        threw = true;
+    }
+    TEST_ASSERT(threw);
+}
+
+static void test_jinja_render_bad_tools_json_throws() {
+    static const char TPL[] = "{%- for m in messages -%}{{ m.role }}{%- endfor -%}";
+    std::vector<ChatMessage> msgs = {{"user", "x", ""}};
+    bool threw = false;
+    try {
+        (void)render_chat_template_jinja(
+            TPL, msgs, "", "", true, false, "{not valid json");
+    } catch (const std::runtime_error &) {
+        threw = true;
+    }
+    TEST_ASSERT(threw);
+}
+
 // ═══════════════════════════════════════════════════════════════════════
 // Disk Prefix Cache Tests
 // ═══════════════════════════════════════════════════════════════════════
@@ -972,6 +1068,15 @@ int main() {
     RUN_TEST(test_pflash_threshold_auto_mode);
     RUN_TEST(test_pflash_threshold_always_mode);
 
+    std::fprintf(stderr, "\n── Jinja chat template ──\n");
+    RUN_TEST(test_jinja_render_basic);
+    RUN_TEST(test_jinja_render_no_gen_prompt);
+    RUN_TEST(test_jinja_render_tools_injected);
+    RUN_TEST(test_jinja_render_empty_tools_skipped);
+    RUN_TEST(test_jinja_render_bos_eos_threaded);
+    RUN_TEST(test_jinja_render_empty_template_throws);
+    RUN_TEST(test_jinja_render_bad_tools_json_throws);
+
     std::fprintf(stderr, "\n── Disk prefix cache ──\n");
     RUN_TEST(test_disk_cache_config_defaults);
     RUN_TEST(test_disk_cache_disabled_when_no_dir);