Skip to content

Commit 97ee5e2

Browse files
gHashTagona-agent
andcommitted
Add SmolLM 135M support (12-14 tok/s)
- Tied embeddings fallback (output = token_embd) - GPT-2 tokenizer support (Ġ → space, Ċ → newline) - SmolLM 135M: 12-14 tok/s, 0.27s load, 139MB - TinyLlama 1.1B: 1.5 tok/s, 2.76s load, 1.1GB - 9x speedup with smaller model Co-authored-by: Ona <no-reply@ona.com>
1 parent e8265c9 commit 97ee5e2

3 files changed

Lines changed: 25 additions & 3 deletions

File tree

bin/vibee

9.43 KB
Binary file not shown.

src/vibeec/gguf_model.zig

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,17 @@ pub const FullModel = struct {
118118

119119
// Load embeddings
120120
self.token_embedding = try self.loadTensor("token_embd.weight");
121-
self.output_weight = try self.loadTensor("output.weight");
121+
122+
// Try to load output.weight, fallback to tied embeddings (token_embd)
123+
self.output_weight = self.loadTensor("output.weight") catch |err| blk: {
124+
if (err == error.TensorNotFound) {
125+
// Tied embeddings: output = token_embd (common in smaller models)
126+
std.debug.print(" Using tied embeddings (output = token_embd)\n", .{});
127+
break :blk self.token_embedding;
128+
}
129+
return err;
130+
};
131+
122132
self.output_norm = try self.loadTensor("output_norm.weight");
123133

124134
// Initialize RoPE

src/vibeec/gguf_tokenizer.zig

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,25 @@ pub const Tokenizer = struct {
139139
for (tokens) |token| {
140140
if (token < self.vocab_size) {
141141
const text = self.vocab[token];
142-
// Replace special space character with regular space
142+
// Replace special space characters with regular space
143143
var i: usize = 0;
144144
while (i < text.len) {
145+
// Llama-style space: ▁ (U+2581) = 0xE2 0x96 0x81
145146
if (i + 2 < text.len and text[i] == 0xE2 and text[i + 1] == 0x96 and text[i + 2] == 0x81) {
146147
try result.append(' ');
147148
i += 3;
148-
} else {
149+
}
150+
// GPT-2 style space: Ġ (U+0120) = 0xC4 0xA0
151+
else if (i + 1 < text.len and text[i] == 0xC4 and text[i + 1] == 0xA0) {
152+
try result.append(' ');
153+
i += 2;
154+
}
155+
// Newline token: Ċ (U+010A) = 0xC4 0x8A
156+
else if (i + 1 < text.len and text[i] == 0xC4 and text[i + 1] == 0x8A) {
157+
try result.append('\n');
158+
i += 2;
159+
}
160+
else {
149161
try result.append(text[i]);
150162
i += 1;
151163
}

0 commit comments

Comments
 (0)