Revert fix for fp8

hsuan-lun-chiang · hsuan-lun-chiang · commit f4674bbda5f6 · 2026-05-22T10:24:18.000Z
diff --git a/src/maxtext/layers/nnx_decoders.py b/src/maxtext/layers/nnx_decoders.py
@@ -545,14 +545,8 @@ def pure_layer_fn(state_in, y_in):
       out = merged_layer(y_in, **kwargs)
       return out, nnx.state(merged_layer)
 
-    # Linen FP8 ops keep amax_history in mutable Linen scope; jax.checkpoint
-    # re-traces and hits UnexpectedTracerError. Skip remat for FP8.
-    uses_linen_fp8_mutable_state = self.config.quantization in ("fp8_nanoo", "fp8_gpu")
-    if uses_linen_fp8_mutable_state:
-      out, new_state = pure_layer_fn(state, y)
-    else:
-      checkpointed_fn = jax.checkpoint(pure_layer_fn, policy=policy, prevent_cse=prevent_cse)
-      out, new_state = checkpointed_fn(state, y)
+    checkpointed_fn = jax.checkpoint(pure_layer_fn, policy=policy, prevent_cse=prevent_cse)
+    out, new_state = checkpointed_fn(state, y)
     nnx.update(layer, new_state)
 
     return out
diff --git a/src/maxtext/layers/quantizations.py b/src/maxtext/layers/quantizations.py
@@ -812,15 +812,6 @@ def maybe_quantize_model(model, config):
   if config.use_qwix_quantization and not config.use_batch_split_schedule:
     quantization_provider = get_qt_provider(config)
     if quantization_provider:
-      if config.pure_nnx:
-        # qwix.quantize_model traces NNX modules to locate quant points, so it
-        # requires example model inputs (Linen modules are traced lazily and
-        # take none). Feed dummy decoder tokens/positions of the train shape.
-        input_shape = (config.micro_batch_size_to_train_on, config.max_target_length)
-        dummy_tokens = jnp.ones(input_shape, dtype=jnp.int32)
-        dummy_positions = jnp.ones(input_shape, dtype=jnp.int32)
-        model = qwix.quantize_model(model, quantization_provider, dummy_tokens, dummy_positions)
-      else:
         model = qwix.quantize_model(model, quantization_provider)
   return model