refactor from_pretrained

A9isha · A9isha · commit 00de2a3c93cb · 2026-04-09T23:32:44.000Z
diff --git a/src/maxtext/utils/model_creation_utils.py b/src/maxtext/utils/model_creation_utils.py
@@ -206,7 +206,9 @@ def setup_configs_and_devices(argv: list[str] | None = None, kwargs: dict | None
 def create_models_and_meshes(trainer_config, sampler_config, trainer_devices, sampler_devices):
   """Create reference and actor models and their respective meshes."""
   max_logging.log("Creating reference model and also meshes for reference and rollout")
-  reference_model, reference_mesh = get_maxtext_model(trainer_config, trainer_devices)
+  reference_model, reference_mesh = from_pretrained(
+      trainer_config, devices=trainer_devices, wrap_with_tunix_adapter=True
+  )
   devices_array = maxtext_utils.create_device_mesh(sampler_config, sampler_devices)
   rollout_mesh = Mesh(devices_array, sampler_config.mesh_axes)
 
@@ -220,35 +222,15 @@ def create_models_and_meshes(trainer_config, sampler_config, trainer_devices, sa
     actor_mesh = reference_mesh
   else:
     max_logging.log("Creating policy model with same config as reference model on trainer mesh")
-    actor_model, actor_mesh = get_maxtext_model(trainer_config, trainer_devices)
+    actor_model, actor_mesh = from_pretrained(
+        trainer_config, devices=trainer_devices, wrap_with_tunix_adapter=True
+    )
 
   return reference_model, reference_mesh, actor_model, actor_mesh, rollout_mesh
 
-def get_maxtext_model(config, devices=None):
-  """
-  Load MaxText model with Tunix adapter.
-  # Note: pass the path to your scanned checkpoint for 'load_parameters_path'.
-  # To create a scanned checkpoint, you can use /maxtext/src/MaxText/checkpoint_conversion/to_maxtext.py and if
-  # using Pathways, please set `USE_PATHWAYS=1` and use `$((1 - USE_PATHWAYS))` for storage flags:
-  # export USE_PATHWAYS=1
-  # python src/MaxText/checkpoint_conversion/to_maxtext.py \
-  #  --model_name="gemma2-2b" \
-  #  --base_output_directory="/path/to/your/output/directory" \
-  #  --scan_layers=True \
-  #  --checkpoint_storage_use_ocdbt=$((1 - USE_PATHWAYS)) \
-  #  --checkpoint_storage_use_zarr3=$((1 - USE_PATHWAYS))
-  # Please ensure that you pass the full path ending in `/0/items` for load_parameters_path to train_rl.py i.e.,
-  # load_parameters_path=/path/to/your/output/directory/0/items
-  """
-  model, mesh = from_pretrained(config, devices=devices)
-  with mesh:
-    use_no_op_mappings = "maxtext_config" in config.vllm_additional_config
-    tunix_model = TunixMaxTextAdapter(base_model=model, use_no_op_mappings=use_no_op_mappings)
-    tunix_model.config = None
-  return tunix_model, mesh
-
-
-def from_pretrained(config, original_mesh=None, devices=None, model_mode=MODEL_MODE_TRAIN, rng_key=None):
+def from_pretrained(
+    config, original_mesh=None, devices=None, model_mode=MODEL_MODE_TRAIN, rng_key=None, wrap_with_tunix_adapter=False
+):
   """Creates a NNX model with sharded parameters, possibly loading from a checkpoint."""
   mesh = original_mesh
   if config.convert_checkpoint_if_possible:
@@ -411,6 +393,12 @@ def create_sharded_state():
       except Exception as e:
         raise ValueError(f"Checkpoint loading failed: {e}") from e
 
+    if wrap_with_tunix_adapter:
+      with mesh:
+        use_no_op_mappings = "maxtext_config" in config.vllm_additional_config
+        model = TunixMaxTextAdapter(base_model=model, use_no_op_mappings=use_no_op_mappings)
+        model.config = None
+
     if original_mesh:
       return model
     else: