Skip to content

Commit 3602d37

Browse files
gHashTagona-agent
andcommitted
Add multi-model support with auto-detection
- ChatTemplate: QWEN, SMOLLM, LLAMA2, TINYLLAMA - Auto-detect model type from filename - Auto-select system prompt (coder vs general) - ChatML format for HTTP API (im_start/im_end) Supported models: - SmolLM-135M (general, fast) - Qwen2.5-Coder-0.5B/1.5B (coding) - TinyLlama-1.1B (general) Co-authored-by: Ona <no-reply@ona.com>
1 parent a0122aa commit 3602d37

4 files changed

Lines changed: 60 additions & 7 deletions

File tree

bin/vibee

3.15 KB
Binary file not shown.

src/vibeec/gguf_chat.zig

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,39 @@ const ConversationHistory = struct {
109109
}
110110
};
111111

112+
// Auto-detect chat template based on model name
113+
fn detectChatTemplate(model_path: []const u8) ChatTemplate {
114+
// Check for Qwen models
115+
if (std.mem.indexOf(u8, model_path, "qwen") != null or
116+
std.mem.indexOf(u8, model_path, "Qwen") != null) {
117+
return ChatTemplate.QWEN;
118+
}
119+
// Check for SmolLM models
120+
if (std.mem.indexOf(u8, model_path, "smollm") != null or
121+
std.mem.indexOf(u8, model_path, "SmolLM") != null) {
122+
return ChatTemplate.SMOLLM;
123+
}
124+
// Check for Llama2 models
125+
if (std.mem.indexOf(u8, model_path, "llama-2") != null or
126+
std.mem.indexOf(u8, model_path, "Llama-2") != null) {
127+
return ChatTemplate.LLAMA2;
128+
}
129+
// Default to TinyLlama/ChatML format
130+
return ChatTemplate.TINYLLAMA;
131+
}
132+
133+
// Auto-detect system prompt based on model type
134+
fn detectSystemPrompt(model_path: []const u8) []const u8 {
135+
// Coder models
136+
if (std.mem.indexOf(u8, model_path, "coder") != null or
137+
std.mem.indexOf(u8, model_path, "Coder") != null or
138+
std.mem.indexOf(u8, model_path, "code") != null) {
139+
return "You are Qwen, a helpful coding assistant. Write clean, efficient code with clear explanations.";
140+
}
141+
// Default assistant
142+
return "You are a helpful AI assistant. Be concise and direct.";
143+
}
144+
112145
// Entry point for CLI chat command (with ternary support)
113146
pub fn runChatWithTernary(allocator: std.mem.Allocator, model_path: []const u8, initial_prompt: ?[]const u8, max_tokens: u32, temperature: f32, top_p: f32, use_ternary: bool) !void {
114147
return runChatInternal(allocator, model_path, initial_prompt, max_tokens, temperature, top_p, use_ternary);
@@ -172,9 +205,9 @@ fn runChatInternal(allocator: std.mem.Allocator, model_path: []const u8, initial
172205
};
173206
defer tokenizer.deinit();
174207

175-
// Use TinyLlama chat template
176-
const template = ChatTemplate.TINYLLAMA;
177-
const system_prompt = "You are a helpful AI assistant.";
208+
// Auto-detect model and select appropriate chat template
209+
const template = detectChatTemplate(model_path);
210+
const system_prompt = detectSystemPrompt(model_path);
178211

179212
// Initialize conversation history (keep last 10 messages + system)
180213
var history = ConversationHistory.init(allocator, 12);

src/vibeec/gguf_tokenizer.zig

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,26 @@ pub const ChatTemplate = struct {
212212
.assistant_suffix = " </s><s>[INST] ",
213213
};
214214

215+
// Qwen2.5 chat template
216+
pub const QWEN = ChatTemplate{
217+
.system_prefix = "<|im_start|>system\n",
218+
.system_suffix = "<|im_end|>\n",
219+
.user_prefix = "<|im_start|>user\n",
220+
.user_suffix = "<|im_end|>\n",
221+
.assistant_prefix = "<|im_start|>assistant\n",
222+
.assistant_suffix = "<|im_end|>\n",
223+
};
224+
225+
// SmolLM chat template
226+
pub const SMOLLM = ChatTemplate{
227+
.system_prefix = "<|im_start|>system\n",
228+
.system_suffix = "<|im_end|>\n",
229+
.user_prefix = "<|im_start|>user\n",
230+
.user_suffix = "<|im_end|>\n",
231+
.assistant_prefix = "<|im_start|>assistant\n",
232+
.assistant_suffix = "<|im_end|>\n",
233+
};
234+
215235
pub fn formatPrompt(
216236
self: *const ChatTemplate,
217237
allocator: std.mem.Allocator,

src/vibeec/http_server.zig

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -222,10 +222,10 @@ pub const HttpServer = struct {
222222
var generated: ?[]u8 = null;
223223
defer if (generated) |g| self.allocator.free(g);
224224

225-
// Build full prompt with system instruction
225+
// Build full prompt with ChatML format (works with most models)
226226
const system_prompt = "You are TRINITY, a helpful AI assistant. Be concise and direct.";
227227
const full_prompt = std.fmt.allocPrint(self.allocator,
228-
"<|system|>\n{s}<|end|>\n<|user|>\n{s}<|end|>\n<|assistant|>\n",
228+
"<|im_start|>system\n{s}<|im_end|>\n<|im_start|>user\n{s}<|im_end|>\n<|im_start|>assistant\n",
229229
.{system_prompt, prompt}
230230
) catch prompt;
231231
defer if (full_prompt.ptr != prompt.ptr) self.allocator.free(full_prompt);
@@ -325,10 +325,10 @@ pub const HttpServer = struct {
325325
"Connection: keep-alive\r\n\r\n";
326326
try connection.stream.writeAll(sse_header);
327327

328-
// Build prompt with system instruction
328+
// Build prompt with ChatML format
329329
const system_prompt = "You are TRINITY, a helpful AI assistant. Be concise and direct.";
330330
const full_prompt = std.fmt.allocPrint(self.allocator,
331-
"<|system|>\n{s}<|end|>\n<|user|>\n{s}<|end|>\n<|assistant|>\n",
331+
"<|im_start|>system\n{s}<|im_end|>\n<|im_start|>user\n{s}<|im_end|>\n<|im_start|>assistant\n",
332332
.{system_prompt, prompt}
333333
) catch prompt;
334334
defer if (full_prompt.ptr != prompt.ptr) self.allocator.free(full_prompt);

0 commit comments

Comments
 (0)