Code cleaned up to make it more readable

ajkv-google · ajkv-google · commit ff021b7c468c · 2026-01-15T22:38:20.000Z
diff --git a/recml/core/data/iterator.py b/recml/core/data/iterator.py
@@ -68,21 +68,18 @@ def _maybe_to_numpy(
     ) -> np.ndarray | tf.SparseTensor | tf.RaggedTensor:
       if isinstance(x, (tf.SparseTensor, tf.RaggedTensor, np.ndarray)):
         return x
-      # FIX: Check for attribute existence to avoid crashes on non-Tensor objects
       if hasattr(x, "_numpy"):
         numpy = x._numpy()  # pylint: disable=protected-access
       elif hasattr(x, "numpy"):
         numpy = x.numpy()
       else:
-        return x  # Return as-is if it can't be converted
+        return x
 
       if isinstance(numpy, np.ndarray):
-        # `numpy` shares the same underlying buffer as the `x` Tensor.
         # Tensors are expected to be immutable, so we disable writes.
         numpy.setflags(write=False)
       return numpy
 
-    # FIX: Use jax.tree.map instead of tf.nest.map_structure
     return jax.tree.map(_maybe_to_numpy, batch)
 
   @property
@@ -115,7 +112,6 @@ def _to_element_spec(
         )
       return clu_data.ArraySpec(dtype=x.dtype, shape=tuple(x.shape))
 
-    # element_spec = tf.nest.map_structure(_to_element_spec, batch)
     element_spec = jax.tree.map(_to_element_spec, batch)
     self._element_spec = element_spec
     return element_spec
diff --git a/recml/core/training/partitioning.py b/recml/core/training/partitioning.py
@@ -110,7 +110,6 @@ def _shard(x: np.ndarray) -> jax.Array:
   def partition_init(
       self, init_fn: CreateStateFn, *, abstract_batch: PyTree | None = None
   ) -> CreateStateFn:
-    # FIXED: Use 'with self.mesh'
     with self.mesh:
       if abstract_batch is not None:
         mesh_context.set_global_mesh(self.mesh)
@@ -122,7 +121,6 @@ def partition_init(
       init_fn = jax.jit(init_fn, out_shardings=self.state_sharding)
 
     def _wrapped_init(batch: PyTree) -> State:
-      # FIXED: Use 'with self.mesh'
       with self.mesh:
         state = init_fn(batch)
         state = _maybe_unbox_state(state)
@@ -136,7 +134,6 @@ def partition_step(self, fn: StepFn, *, training: bool = False) -> StepFn:
       jit_kws["out_shardings"] = (self.state_sharding, None)
       jit_kws["donate_argnums"] = (1,)
 
-    # FIXED: Use 'with self.mesh' and legacy bridge
     with self.mesh:
       mesh_context.set_global_mesh(self.mesh)
       step_fn = jax.jit(
@@ -146,7 +143,6 @@ def partition_step(self, fn: StepFn, *, training: bool = False) -> StepFn:
       )
 
     def _wrapped_step(batch: PyTree, state: State) -> Any:
-      # FIXED: Use 'with self.mesh'
       with self.mesh:
         return step_fn(batch, state)
 
@@ -238,9 +234,7 @@ def partition_init(
           " model parallel partitioner."
       )
 
-    # FIXED: Use 'with self.mesh' directly
     with self.mesh:
-      # FIXED: Legacy bridge
       mesh_context.set_global_mesh(self.mesh)
       abstract_state = jax.eval_shape(init_fn, abstract_batch)
       specs = nn.get_partition_spec(abstract_state)
@@ -254,7 +248,6 @@ def partition_init(
       compiled_init_fn = jax.jit(init_fn, out_shardings=state_sharding)
 
     def _init(batch: PyTree) -> State:
-      # FIXED: Use 'with self.mesh' directly
       with self.mesh:
         state = compiled_init_fn(batch)
         state = _maybe_unbox_state(state)
@@ -273,7 +266,7 @@ def partition_step(self, fn: StepFn, *, training: bool = False) -> StepFn:
     else:
       jit_kws["out_shardings"] = None
 
-    # FIXED: Use 'with self.mesh' directly and legacy bridge
+
     with self.mesh:
       mesh_context.set_global_mesh(self.mesh)
       step_fn = jax.jit(
@@ -296,7 +289,6 @@ def partition_step(self, fn: StepFn, *, training: bool = False) -> StepFn:
       )
 
     def _step(batch: PyTree, state: State) -> Any:
-      # FIXED: Use 'with self.mesh' directly
       with self.mesh:
         return step_fn(batch, state)
 
diff --git a/recml/layers/linen/sparsecore.py b/recml/layers/linen/sparsecore.py
@@ -371,17 +371,6 @@ class SparsecoreEmbed(nn.Module):
   sparsecore_config: SparsecoreConfig
   mesh: jax.sharding.Mesh | jax.sharding.AbstractMesh | None = None
 
-  # def get_mesh(self) -> jax.sharding.Mesh | jax.sharding.AbstractMesh:
-  #   if self.mesh is not None:
-  #     return self.mesh
-  #   abstract_mesh = jax.sharding.get_abstract_mesh()
-  #   if not abstract_mesh.shape_tuple:
-  #     raise ValueError(
-  #         'No abstract mesh shape was set with `jax.sharding.use_mesh`. Make'
-  #         ' sure to set the mesh when calling the sparsecore module.'
-  #     )
-  #   return abstract_mesh
-  
   def get_mesh(self) -> jax.sharding.Mesh:
     # Try to get the mesh from our custom global context
     mesh = mesh_context.get_global_mesh()
diff --git a/training.md b/training.md
@@ -68,11 +68,4 @@ RUN pip install "protobuf>=6.31.1" --no-deps
 CMD ["python", "recml/examples/dlrm_experiment_test.py"]
 ```
 
-You can use this dockerfile to run the DLRM model experiment from this repo in your own environment. 
-
-
-
-
-
-
-
+You can use this dockerfile to run the DLRM model experiment from this repo in your own environment.