fix: remove __class__ swap in lora_utils to fix training loop crash, delete redundant sharding code

RexBearIU · RexBearIU · commit 672c9d014d32 · 2026-04-21T07:59:34.000Z
diff --git a/src/maxtext/utils/lora_utils.py b/src/maxtext/utils/lora_utils.py
@@ -506,30 +506,36 @@ def apply_lora_to_model(
   model_rngs = getattr(model.decoder, "rngs", None)
   decoder_input_tokens, decoder_positions = _prepare_dummy_inputs()
 
-  lora_model = qwix.apply_lora_to_model(
-      model,
-      lora_provider,
-      decoder_input_tokens=decoder_input_tokens,
-      decoder_positions=decoder_positions,
-      rngs=model_rngs,
-  )
+  # Trigger materialization with Python loop fallback
+  model.decoder.disable_quant_stats_update = True
+  try:
+    lora_model = qwix.apply_lora_to_model(
+        model,
+        lora_provider,
+        decoder_input_tokens=decoder_input_tokens,
+        decoder_positions=decoder_positions,
+        rngs=model_rngs,
+    )
+  finally:
+    model.decoder.disable_quant_stats_update = False
+
+  model = lora_model
 
   if mesh is not None:
     with mesh, nn_partitioning.axis_rules(mt_config.logical_axis_rules):
-      graph_def, state = nnx.split(lora_model)
+      graph_def, state = nnx.split(model)
       default_memory_kind = jax.devices()[0].default_memory().kind
       dst_shardings = jax.tree.map(
           lambda x: jax.sharding.NamedSharding(mesh, x, memory_kind=default_memory_kind) if x is not None else None,
           nnx.get_partition_spec(state),
       )
       from tunix.rl import reshard  # pylint: disable=import-outside-toplevel
-
       state = reshard.reshard_pytree(state, dst_shardings)
-      lora_model = nnx.merge(graph_def, state)
+      model = nnx.merge(graph_def, state)
 
-  _verify_lora_parameters(lora_model, mt_config)
+  _verify_lora_parameters(model, mt_config)
 
-  return lora_model
+  return model
 
 
 def restore_lora_from_path(trainer: Any, mt_config: pyconfig.HyperParameters) -> Any: