We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 26a00cb commit a6cdb69Copy full SHA for a6cdb69
1 file changed
examples/qualcomm/oss_scripts/llama/wrappers/llm_wrappers.py
@@ -181,9 +181,9 @@ def _prepare_model(self): # noqa: C901
181
k.replace("_orig_mod.", ""): v for k, v in state_dict.items()
182
}
183
184
- # Gemma RMSNorm computes (1 + w) * x but ExecuTorch's RMSNorm computes
185
- # w * x, so add the +1 offset to norm weights regardless of load path.
186
- # See https://github.com/huggingface/transformers/pull/29402
+ # Gemma checkpoints store RMSNorm weights as deviations from 1 and use (1 + w) * x.
+ # This model/export path uses standard RMSNorm semantics (w * x), so add +1 to
+ # norm weights regardless of load path. See https://github.com/huggingface/transformers/pull/29402
187
if self.control_args.decoder_model in {
188
"gemma-2b",
189
"gemma2-2b",
0 commit comments