vocab : add byte token handling to BPE detokenizer for Gemma4 (ggml-org#21488)

aldehir · OsamaMazhar · commit bdd2f93ec023 · 2026-04-08T21:12:29.000+02:00
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -2804,7 +2804,9 @@ uint8_t llama_vocab::impl::token_to_byte(llama_token id) const {
             return strtol(buf.c_str(), NULL, 16);
         }
         case LLAMA_VOCAB_TYPE_BPE: {
-            GGML_ABORT("fatal error");
+            // Gemma4 uses BPE with SPM-style byte fallback tokens (<0xXX>)
+            auto buf = token_data.text.substr(3, 2);
+            return strtol(buf.c_str(), NULL, 16);
         }
         case LLAMA_VOCAB_TYPE_WPM: {
             GGML_ABORT("fatal error");
@@ -3285,6 +3287,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
                     std::string result = llama_decode_text(token_text);
                     return _try_copy(result.data(), result.size());
                 }
+                if (attr & LLAMA_TOKEN_ATTR_BYTE) {
+                    char byte = (char) token_to_byte(token);
+                    return _try_copy((char*) &byte, 1);
+                }
                 break;
             }
             case LLAMA_VOCAB_TYPE_RWKV: {

Original file line number	Diff line number	Diff line change
`@@ -2804,7 +2804,9 @@ uint8_t llama_vocab::impl::token_to_byte(llama_token id) const {`
`2804`	`2804`	`return strtol(buf.c_str(), NULL, 16);`
`2805`	`2805`	`}`
`2806`	`2806`	`case LLAMA_VOCAB_TYPE_BPE: {`
`2807`		`- GGML_ABORT("fatal error");`
	`2807`	`+ // Gemma4 uses BPE with SPM-style byte fallback tokens (<0xXX>)`
	`2808`	`+ auto buf = token_data.text.substr(3, 2);`
	`2809`	`+ return strtol(buf.c_str(), NULL, 16);`
`2808`	`2810`	`}`
`2809`	`2811`	`case LLAMA_VOCAB_TYPE_WPM: {`
`2810`	`2812`	`GGML_ABORT("fatal error");`
`@@ -3285,6 +3287,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t`
`3285`	`3287`	`std::string result = llama_decode_text(token_text);`
`3286`	`3288`	`return _try_copy(result.data(), result.size());`
`3287`	`3289`	`}`
	`3290`	`+ if (attr & LLAMA_TOKEN_ATTR_BYTE) {`
	`3291`	`+ char byte = (char) token_to_byte(token);`
	`3292`	`+ return _try_copy((char*) &byte, 1);`
	`3293`	`+ }`
`3288`	`3294`	`break;`
`3289`	`3295`	`}`
`3290`	`3296`	`case LLAMA_VOCAB_TYPE_RWKV: {`