@@ -266,12 +266,13 @@ static LayerConfig LayerConfigGemma3_4B_LM(size_t model_dim) {
266266 return config;
267267}
268268
269- // Until we have the SigLIP checkpoints included, we use the LM config directly.
269+ // Shared LM-only config for Gemma3 4B: used directly for text-only checkpoints
270+ // (e.g. TranslateGemma) and as the base for the VLM build.
270271static ModelConfig ConfigGemma3_4B_LM () {
271272 ModelConfig config = ConfigBaseGemmaV3 ();
272- config.display_name = " Gemma3_4B " ;
273- config.model = Model::GEMMA3_4B ;
274- config.wrapping = PromptWrapping::GEMMA_VLM ;
273+ config.display_name = " Gemma3_4B_LM " ;
274+ config.model = Model::GEMMA3_4B_LM ;
275+ config.wrapping = PromptWrapping::GEMMA_IT ;
275276 config.model_dim = 2560 ;
276277 config.vocab_size = kGemmaV3VocabSize ; // new vocab size / tokenizer
277278 config.max_seq_len = 32 * 1024 ;
@@ -319,9 +320,9 @@ static LayerConfig LayerConfigGemma3_12B_LM(size_t model_dim) {
319320
320321static ModelConfig ConfigGemma3_12B_LM () {
321322 ModelConfig config = ConfigBaseGemmaV3 ();
322- config.display_name = " Gemma3_12B " ;
323- config.model = Model::GEMMA3_12B ;
324- config.wrapping = PromptWrapping::GEMMA_VLM ;
323+ config.display_name = " Gemma3_12B_LM " ;
324+ config.model = Model::GEMMA3_12B_LM ;
325+ config.wrapping = PromptWrapping::GEMMA_IT ;
325326 config.model_dim = 3840 ;
326327 config.vocab_size = kGemmaV3VocabSize ; // new vocab size / tokenizer
327328 config.max_seq_len = 32 * 1024 ;
@@ -369,9 +370,9 @@ static LayerConfig LayerConfigGemma3_27B_LM(size_t model_dim) {
369370
370371static ModelConfig ConfigGemma3_27B_LM () {
371372 ModelConfig config = ConfigBaseGemmaV3 ();
372- config.display_name = " Gemma3_27B " ;
373- config.model = Model::GEMMA3_27B ;
374- config.wrapping = PromptWrapping::GEMMA_VLM ;
373+ config.display_name = " Gemma3_27B_LM " ;
374+ config.model = Model::GEMMA3_27B_LM ;
375+ config.wrapping = PromptWrapping::GEMMA_IT ;
375376 config.model_dim = 5376 ;
376377 config.vocab_size = kGemmaV3VocabSize ; // new vocab size / tokenizer
377378 config.max_seq_len = 32 * 1024 ;
@@ -461,6 +462,12 @@ static ModelConfig ConfigFromModel(Model model) {
461462 return ConfigGemma3_27B ();
462463 case Model::GEMMA3_270M:
463464 return ConfigGemma3_270M ();
465+ case Model::GEMMA3_4B_LM:
466+ return ConfigGemma3_4B_LM ();
467+ case Model::GEMMA3_12B_LM:
468+ return ConfigGemma3_12B_LM ();
469+ case Model::GEMMA3_27B_LM:
470+ return ConfigGemma3_27B_LM ();
464471 default :
465472 HWY_ABORT (" Model type %d unknown." , static_cast <int >(model));
466473 }
@@ -494,6 +501,12 @@ const char* ModelPrefix(Model model) {
494501 return " gemma3-27b" ;
495502 case Model::GEMMA3_270M:
496503 return " gemma3-270m" ;
504+ case Model::GEMMA3_4B_LM:
505+ return " gemma3-4b-lm" ;
506+ case Model::GEMMA3_12B_LM:
507+ return " gemma3-12b-lm" ;
508+ case Model::GEMMA3_27B_LM:
509+ return " gemma3-27b-lm" ;
497510 default :
498511 HWY_ABORT (" Model type %d unknown." , static_cast <int >(model));
499512 }
@@ -529,14 +542,16 @@ ModelConfig::ModelConfig(const Model model, Type weight,
529542}
530543
531544static Model FindModel (const std::string& specifier) {
545+ // Some model prefixes are prefixes of other prefixes (e.g. `gemma3-4b-` is a
546+ // prefix of `gemma3-4b-lm-`). Pick the longest matching prefix so the more
547+ // specific model wins.
532548 Model found_model = Model::UNKNOWN;
549+ size_t longest_match = 0 ;
533550 ForEachModel ([&](Model model) {
534- // Some model names are prefixes of other model names
535551 const std::string prefix = std::string (ModelPrefix (model)) + " -" ;
536- if (specifier.rfind (prefix, 0 ) == 0 ) { // Starts with prefix.
537- // We only expect one match.
538- HWY_ASSERT_M (found_model == Model::UNKNOWN, specifier.c_str ());
552+ if (specifier.rfind (prefix, 0 ) == 0 && prefix.size () > longest_match) {
539553 found_model = model;
554+ longest_match = prefix.size ();
540555 }
541556 });
542557 HWY_ASSERT_M (found_model != Model::UNKNOWN, specifier.c_str ());
@@ -687,7 +702,8 @@ Model DeduceModel(const Path& blob_path, size_t layers, int layer_types) {
687702 return (layer_types & kDeduced448 ) ? Model::PALIGEMMA2_3B_448
688703 : Model::PALIGEMMA2_3B_224;
689704 case 34 :
690- return Model::GEMMA3_4B;
705+ return (layer_types & kDeducedViT ) ? Model::GEMMA3_4B
706+ : Model::GEMMA3_4B_LM;
691707 case 42 :
692708 if (layer_types & kDeducedViT ) {
693709 return (layer_types & kDeduced448 ) ? Model::PALIGEMMA2_10B_448
@@ -697,9 +713,11 @@ Model DeduceModel(const Path& blob_path, size_t layers, int layer_types) {
697713 case 46 :
698714 return Model::GEMMA2_27B;
699715 case 48 :
700- return Model::GEMMA3_12B;
716+ return (layer_types & kDeducedViT ) ? Model::GEMMA3_12B
717+ : Model::GEMMA3_12B_LM;
701718 case 62 :
702- return Model::GEMMA3_27B;
719+ return (layer_types & kDeducedViT ) ? Model::GEMMA3_27B
720+ : Model::GEMMA3_27B_LM;
703721
704722 // TODO: detect these.
705723 /*
0 commit comments