AI-Hypercomputer
diff --git a/‎src/maxtext/layers/pipeline.py‎
Lines changed: 33 additions & 33 deletions b/‎src/maxtext/layers/pipeline.py‎
Lines changed: 33 additions & 33 deletions
@@ -328,6 +328,31 @@ def _run_weight_initialization(
         out_sharding=self.output_sharding,
     )
 
+  @staticmethod
+  def _remove_fsdp_from_physical_partition_spec(pps):
+    """Removes 'fsdp' and 'fsdp_transpose' from physical partition spec."""
+    if isinstance(pps, P):
+      new_spec = []
+      # Iterate through each axis in the original PartitionSpec.
+      for axis in pps:
+        if axis is None:
+          new_spec.append(None)
+        elif isinstance(axis, str):
+          # If the axis is 'fsdp', replace it with None to signify replication.
+          if axis not in ("fsdp", "fsdp_transpose"):
+            new_spec.append(axis)
+          else:
+            new_spec.append(None)
+        elif isinstance(axis, (list, tuple)):
+          # If the axis is a collection, filter out 'fsdp'.
+          new_axis = [a for a in axis if a not in ("fsdp", "fsdp_transpose")]
+          new_spec.append(tuple(new_axis))
+        else:
+          raise ValueError(f"Unsupported_axis_type: {type(axis)}")
+        # Return a new sharding object with the modified spec.
+      return P(*new_spec)
+    return pps
+
 
 class Pipeline(PipelineBase):
   """Original Pipeline implementation."""
@@ -754,31 +779,6 @@ def _remove_logically_partition_leaf(v):
 
     return jax.tree.map(_remove_logically_partition_leaf, weights, is_leaf=lambda v: isinstance(v, LogicallyPartitioned))
 
-  @staticmethod
-  def _remove_fsdp_from_physical_partition_spec(pps):
-    """Removes 'fsdp' and 'fsdp_transpose' from physical partition spec."""
-    if isinstance(pps, P):
-      new_spec = []
-      # Iterate through each axis in the original PartitionSpec.
-      for axis in pps:
-        if axis is None:
-          new_spec.append(None)
-        elif isinstance(axis, str):
-          # If the axis is 'fsdp', replace it with None to signify replication.
-          if axis not in ("fsdp", "fsdp_transpose"):
-            new_spec.append(axis)
-          else:
-            new_spec.append(None)
-        elif isinstance(axis, (list, tuple)):
-          # If the axis is a collection, filter out 'fsdp'.
-          new_axis = [a for a in axis if a not in ("fsdp", "fsdp_transpose")]
-          new_spec.append(tuple(new_axis))
-        else:
-          raise ValueError(f"Unsupported_axis_type: {type(axis)}")
-        # Return a new sharding object with the modified spec.
-      return P(*new_spec)
-    return pps
-
   def all_gather_over_fsdp(self, variables, logical_partition_spec):
     """Gathers FSDP partitioned variables to reconstruct them fully."""
     physical_partition_spec = logical_to_mesh(
@@ -1107,7 +1107,7 @@ def get_current_stage_weights(
 
   def get_current_weights_from_bsw(self, bsw, loop_iteration, physical_partition_spec, is_initializing=None):
     """Retrieves current weights out of the sliding buffer window (bsw)."""
-    bsw_pps = jax.tree.map(pipeline_utils.remove_fsdp_from_physical_partition_spec, physical_partition_spec)
+    bsw_pps = jax.tree.map(self._remove_fsdp_from_physical_partition_spec, physical_partition_spec)
     _, repeat_ids = self.get_microbatch_and_repeat_ids(loop_iteration)
     target_repeat_id = repeat_ids[0]
 
@@ -1159,12 +1159,12 @@ def from_repeat_weights_to_bsw(
       axes_to_gather=("fsdp", "fsdp_transpose", "expert"),  # three major FSDP-like axes
   ):
     """Generates the buffer sliding window (bsw) from the gathered repeat weights."""
-    bsw_pps = pipeline_utils.generate_bsw_pps_from_pps(physical_partition_spec)
+    bsw_pps = pipeline_utils.derive_stage_weight_partition_specs(physical_partition_spec)
     repeat_weights_pps = jax.tree.map(lambda p: P(*p[1:]), physical_partition_spec)
 
     # Dynamically gather the index pytrees for all specified axes
     axis_indices_dict = {
-        axis: pipeline_utils.get_fsdp_index_pytree(physical_partition_spec, axis) for axis in axes_to_gather
+        axis: pipeline_utils.get_mesh_axis_dim_indices(physical_partition_spec, axis) for axis in axes_to_gather
     }
 
     axis_names = list(axis_indices_dict.keys())
@@ -1299,7 +1299,7 @@ def __call__(
           example_inputs, example_segmentation, example_position, segment_idx, position_idx, deterministic, model_mode
       )
 
-    logical_partition_spec = pipeline_utils.get_logical_spec_repeats_removed(logical_partition_spec)
+    logical_partition_spec = pipeline_utils.strip_pipeline_repeat_logical_axis(logical_partition_spec)
 
     def run_iteration_scannable(model, loop_state):
       return (
@@ -1318,7 +1318,7 @@ def run_iteration_scannable(model, loop_state):
 
     # base scannable function used twice for real and bubble runs
     base_scannable = functools.partial(
-        pipeline_utils.create_run_scannable,
+        pipeline_utils.create_rematerialized_pipeline_stage,
         model=self,
         run_iteration_scannable=run_iteration_scannable,
         deterministic=deterministic,
@@ -1334,10 +1334,10 @@ def run_iteration_scannable(model, loop_state):
 
     def run_all_iterations(model, loop_state):
       if self.config.scan_pipeline_repeats:
-        run_repeats_scanned = pipeline_utils.create_run_repeats_scanned(
-            run_scannable=run_one_repeat_scannable, length=model.config.num_pipeline_repeats
+        run_repeats_scanned = pipeline_utils.create_flax_pipeline_scan(
+            pipeline_stage_fn=run_one_repeat_scannable, length=model.config.num_pipeline_repeats
         )
-        run_bubbles_scanned = pipeline_utils.create_run_repeats_scanned(run_scannable=run_bubbles_scannable, length=1)
+        run_bubbles_scanned = pipeline_utils.create_flax_pipeline_scan(pipeline_stage_fn=run_bubbles_scannable, length=1)
         loop_state, _ = run_repeats_scanned(model, loop_state)
         loop_state, _ = run_bubbles_scanned(model, loop_state)
       else: