Skip to content

Commit 4655d5c

Browse files
gHashTagona-agent
andcommitted
Add DeepSeek chat template and improve tokenizer
- Add DeepSeek Coder chat template (<|User|>/<|Assistant|>/<|EOT|>) - Add DeepSeek special tokens to tokenizer - Skip system message for templates without system support - Fix chat template detection and display - Add Q4_K debug output (disabled) Working models: - Qwen2.5 Coder 0.5B: 3.0 tok/s - SmolLM 135M: 8.9 tok/s - TinyLlama 1.1B: ~2 tok/s Co-authored-by: Ona <no-reply@ona.com>
1 parent a27c581 commit 4655d5c

4 files changed

Lines changed: 38 additions & 4 deletions

File tree

bin/vibee

3.02 KB
Binary file not shown.

src/vibeec/gguf_chat.zig

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,12 @@ const ConversationHistory = struct {
7474
for (self.messages.items) |msg| {
7575
switch (msg.role) {
7676
.system => {
77-
try result.appendSlice(template.system_prefix);
78-
try result.appendSlice(msg.content);
79-
try result.appendSlice(template.system_suffix);
77+
// Skip system message if template doesn't support it (empty prefix)
78+
if (template.system_prefix.len > 0) {
79+
try result.appendSlice(template.system_prefix);
80+
try result.appendSlice(msg.content);
81+
try result.appendSlice(template.system_suffix);
82+
}
8083
},
8184
.user => {
8285
try result.appendSlice(template.user_prefix);
@@ -111,6 +114,11 @@ const ConversationHistory = struct {
111114

112115
// Auto-detect chat template based on model name
113116
fn detectChatTemplate(model_path: []const u8) ChatTemplate {
117+
// Check for DeepSeek models
118+
if (std.mem.indexOf(u8, model_path, "deepseek") != null or
119+
std.mem.indexOf(u8, model_path, "DeepSeek") != null) {
120+
return ChatTemplate.DEEPSEEK;
121+
}
114122
// Check for Qwen models
115123
if (std.mem.indexOf(u8, model_path, "qwen") != null or
116124
std.mem.indexOf(u8, model_path, "Qwen") != null) {
@@ -216,7 +224,16 @@ fn runChatInternal(allocator: std.mem.Allocator, model_path: []const u8, initial
216224
// Add system message
217225
try history.addMessage(.system, system_prompt);
218226

219-
std.debug.print("Chat template: TinyLlama (ChatML format)\n", .{});
227+
// Print detected template name
228+
const template_name = if (std.mem.indexOf(u8, model_path, "deepseek") != null or std.mem.indexOf(u8, model_path, "DeepSeek") != null)
229+
"DeepSeek"
230+
else if (std.mem.indexOf(u8, model_path, "qwen") != null or std.mem.indexOf(u8, model_path, "Qwen") != null)
231+
"Qwen (ChatML)"
232+
else if (std.mem.indexOf(u8, model_path, "smollm") != null or std.mem.indexOf(u8, model_path, "SmolLM") != null)
233+
"SmolLM (ChatML)"
234+
else
235+
"TinyLlama (ChatML)";
236+
std.debug.print("Chat template: {s}\n", .{template_name});
220237
std.debug.print("System: {s}\n", .{system_prompt});
221238
std.debug.print("Sampling: temperature={d:.2}, top_p={d:.2}\n", .{sampling_params.temperature, sampling_params.top_p});
222239
std.debug.print("History: enabled (last 10 messages)\n", .{});

src/vibeec/gguf_inference.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ pub fn dequantizeQ4_KTensor(allocator: std.mem.Allocator, data: []const u8, num_
184184
const d = gguf.f16ToF32(d_bits);
185185
const min = gguf.f16ToF32(dmin_bits);
186186

187+
188+
187189
const scales = block[4..16]; // 12 bytes of scales
188190
const qs = block[16..144]; // 128 bytes of quantized values
189191

src/vibeec/gguf_tokenizer.zig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ pub const Tokenizer = struct {
7979
// First check for special tokens (they have priority)
8080
var found_special = false;
8181
const special_tokens = [_][]const u8{
82+
// Qwen/ChatML tokens
8283
"<|im_start|>", "<|im_end|>", "<|endoftext|>",
8384
"<|object_ref_start|>", "<|object_ref_end|>",
8485
"<|box_start|>", "<|box_end|>",
@@ -88,6 +89,10 @@ pub const Tokenizer = struct {
8889
"<tool_call>", "</tool_call>",
8990
"<|fim_prefix|>", "<|fim_middle|>", "<|fim_suffix|>",
9091
"<|fim_pad|>", "<|repo_name|>", "<|file_sep|>",
92+
// DeepSeek tokens
93+
"<|User|>", "<|Assistant|>", "<|EOT|>",
94+
"<|begin▁of▁sentence|>", "<|end▁of▁sentence|>",
95+
"<|fim▁hole|>", "<|fim▁begin|>", "<|fim▁end|>",
9196
};
9297

9398
for (special_tokens) |special| {
@@ -349,6 +354,16 @@ pub const ChatTemplate = struct {
349354
.assistant_suffix = "<|im_end|>\n",
350355
};
351356

357+
// DeepSeek Coder chat template (no system prompt)
358+
pub const DEEPSEEK = ChatTemplate{
359+
.system_prefix = "",
360+
.system_suffix = "",
361+
.user_prefix = "<|User|>",
362+
.user_suffix = "\n",
363+
.assistant_prefix = "<|Assistant|>",
364+
.assistant_suffix = "<|EOT|>\n",
365+
};
366+
352367
pub fn formatPrompt(
353368
self: *const ChatTemplate,
354369
allocator: std.mem.Allocator,

0 commit comments

Comments
 (0)