@@ -1125,7 +1125,7 @@ def set_gguf_parameters(self):
11251125 if (n_experts := self.find_hparam(["num_local_experts", "num_experts"], optional=True)) is not None:
11261126 self.gguf_writer.add_expert_count(n_experts)
11271127 logger.info(f"gguf: expert count = {n_experts}")
1128- if (n_experts_used := self.find_hparam(["num_experts_per_tok", "num_experts_per_token"], optional=True)) is not None:
1128+ if (n_experts_used := self.find_hparam(["num_experts_per_tok", "num_experts_per_token", "top_k_experts" ], optional=True)) is not None:
11291129 self.gguf_writer.add_expert_used_count(n_experts_used)
11301130 logger.info(f"gguf: experts used count = {n_experts_used}")
11311131 if (n_expert_groups := self.hparams.get("n_group")) is not None:
@@ -6686,7 +6686,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
66866686@ModelBase.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration")
66876687class Gemma3Model(TextModel):
66886688 model_arch = gguf.MODEL_ARCH.GEMMA3
6689- norm_shift = 1.0 # Gemma3RMSNorm adds 1.0 to the norm value
6689+
6690+ def norm_shift(self, name: str) -> float:
6691+ return 1.0 if name.endswith("norm.weight") else 0.0 # Gemma3RMSNorm adds 1.0 to the norm value
66906692
66916693 def set_vocab(self):
66926694 if (self.dir_model / "tokenizer.model").is_file():
@@ -6724,17 +6726,22 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
67246726
67256727 # remove OOV (out-of-vocabulary) rows in token_embd
67266728 if "embed_tokens.weight" in name:
6729+ n_vocab_real = -1
67276730 if (self.dir_model / "tokenizer.model").is_file():
67286731 tokens = self._create_vocab_sentencepiece()[0]
6732+ n_vocab_real = len(tokens)
67296733 else:
6730- tokens = self.get_vocab_base()[0]
6731- data_torch = data_torch[:len(tokens)]
6734+ with open(self.dir_model / "tokenizer.json", "r", encoding="utf-8") as f:
6735+ tokenizer_json = json.load(f)
6736+ n_vocab_real = len(tokenizer_json["model"]["vocab"]) + len(tokenizer_json["added_tokens"])
6737+ data_torch = data_torch[:n_vocab_real]
67326738
67336739 # ref code in Gemma3RMSNorm
67346740 # output = output * (1.0 + self.weight.float())
67356741 # note: this is not the case on gemma3n
6736- if name.endswith("norm.weight"):
6737- data_torch = data_torch + self.norm_shift
6742+ f_shift = self.norm_shift(name)
6743+ if f_shift != 0.0:
6744+ data_torch = data_torch + f_shift
67386745
67396746 yield from super().modify_tensors(data_torch, name, bid)
67406747
@@ -6908,7 +6915,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
69086915 assert data_torch.shape[2] == 1
69096916 data_torch = data_torch.reshape(data_torch.shape[0], data_torch.shape[1])
69106917
6911- yield from super().modify_tensors(data_torch, name, bid)
6918+ mapped_name = self.map_tensor_name(name, (".weight", ".bias", ".input_max", ".input_min", ".output_max", ".output_min"))
6919+ yield (mapped_name, data_torch)
69126920
69136921
69146922@ModelBase.register("Gemma3nForConditionalGeneration")
@@ -7033,7 +7041,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
70337041@ModelBase.register("Gemma3nForCausalLM", "Gemma3nForConditionalGeneration")
70347042class Gemma3NModel(Gemma3Model):
70357043 model_arch = gguf.MODEL_ARCH.GEMMA3N
7036- norm_shift = 0.0 # same value with Gemma3p5RMSNorm scale_shift on python code
70377044
70387045 _altup_proj: list[Tensor] = []
70397046 _altup_unembd: list[Tensor] = []
@@ -7052,6 +7059,10 @@ def __init__(self, *args, **kwargs):
70527059 torch.Tensor(), # to be replaced
70537060 ]
70547061
7062+ def norm_shift(self, name: str) -> float:
7063+ del name
7064+ return 0.0 # same value with Gemma3p5RMSNorm scale_shift on python code
7065+
70557066 def set_vocab(self):
70567067 # For Gemma3n multimodal models, we need the FULL vocab_size (262400)
70577068 # which includes special tokens from 262144-262399 for vision/audio.
@@ -7197,6 +7208,9 @@ def set_vocab(self):
71977208
71987209 assert len(tokens) == vocab.vocab_size
71997210
7211+ # TODO @ngxson : there are some known (rare) issues with the tokenizer during development
7212+ # but I don't have time to dive into them right now;
7213+ # using a dedicated tokenizer name so that we can fix later without re-converting GGUF
72007214 self.gguf_writer.add_tokenizer_model("gemma4")
72017215 self.gguf_writer.add_token_list(tokens)
72027216 self.gguf_writer.add_token_scores(scores)
0 commit comments