fix format issue

Sharon Yu · Sharon Yu · commit 6433043cf110 · 2026-01-21T17:38:30.000Z
diff --git a/src/MaxText/max_utils.py b/src/MaxText/max_utils.py
@@ -1040,9 +1040,6 @@ def print_mesh_axes_info(mesh: jax.sharding.Mesh):
     max_logging.info("Mesh Axes: (Empty Mesh)")
     return
 
-  axis_info = [
-      f"{axis_name}: {axis_size}"
-      for axis_name, axis_size in mesh.shape.items()
-  ]
+  axis_info = [f"{axis_name}: {axis_size}" for axis_name, axis_size in mesh.shape.items()]
   info_str = "Mesh Axes: (" + ", ".join(axis_info) + ")"
   max_logging.info(info_str)
diff --git a/src/MaxText/maxtext_utils.py b/src/MaxText/maxtext_utils.py
@@ -1207,40 +1207,32 @@ def schedule(step):
   return optax.join_schedules(pieces, boundaries)
 
 
-def print_state_mesh_shardings_params(
-    state, state_sharding, state_logical_annotations, mesh, logical_axis_rules
-):
+def print_state_mesh_shardings_params(state, state_sharding, state_logical_annotations, mesh, logical_axis_rules):
   """Print state shardings."""
-  if (not hasattr(state, 'params') or
-      not hasattr(state_sharding, 'params') or
-      not hasattr(state_logical_annotations, 'params')):
+  if (
+      not hasattr(state, "params")
+      or not hasattr(state_sharding, "params")
+      or not hasattr(state_logical_annotations, "params")
+  ):
     max_logging.warning(
-      "Warning: 'params' attribute missing in one of the inputs to "
-      "print_state_mesh_shardings_params."
+        "Warning: 'params' attribute missing in one of the inputs to " "print_state_mesh_shardings_params."
     )
     return
 
   leaves_params, _ = jax.tree_util.tree_flatten_with_path(state.params)
   leaves_sharding, _ = jax.tree_util.tree_flatten_with_path(state_sharding.params)
-  leaves_rule_values, _ = jax.tree_util.tree_flatten_with_path(
-      state_logical_annotations.params
-  )
+  leaves_rule_values, _ = jax.tree_util.tree_flatten_with_path(state_logical_annotations.params)
 
   if not len(leaves_params) == len(leaves_sharding) == len(leaves_rule_values):
     max_logging.warning(
-        "Warning: Parameter tree structure mismatch between state, shardings,"
-        " and logical annotations."
+        "Warning: Parameter tree structure mismatch between state, shardings," " and logical annotations."
     )
     return
 
   # Build a reverse map (Potential Physical Axes Tuple -> List of Semantic Names)
   rule_value_to_semantic = defaultdict(list)
   if logical_axis_rules:
-    rules_iter = (
-        logical_axis_rules.items()
-        if isinstance(logical_axis_rules, dict)
-        else logical_axis_rules
-    )
+    rules_iter = logical_axis_rules.items() if isinstance(logical_axis_rules, dict) else logical_axis_rules
     for name, potentials in rules_iter:
       # name: LHS for example 'embed/activation_batch
       # potentials: RHS for example 'data', 'model', None, ['data', 'model']
@@ -1289,7 +1281,7 @@ def get_semantic_names(rule_val_item, rmap):
       names = rmap.get(key)
 
       if names:
-        return "{" + " | ".join(sorted(list(set(names)))) + "}" 
+        return "{" + " | ".join(sorted(list(set(names)))) + "}"
       else:
         # Show rule value if unmapped.
         return f"'{str(key)}'"
@@ -1302,8 +1294,8 @@ def get_semantic_names(rule_val_item, rmap):
         semantic_parts.append(str(name_str))
       semantic_str = "Partitionspec(" + ", ".join(semantic_parts) + ")"
     elif leaf_rule_value is None:
-      semantic_str = "Partitionspec(None)" 
-    else: # Should not be common
+      semantic_str = "Partitionspec(None)"
+    else:  # Should not be common
       semantic_str = str(leaf_rule_value)
 
     # Multi-line logging for each parameter
diff --git a/src/MaxText/train_compile.py b/src/MaxText/train_compile.py
@@ -159,11 +159,11 @@ def is_oom(argv: Sequence[str]) -> bool:
 
   # Get shaped inputs
   (
-    shaped_train_args,
-    shaped_train_kwargs,
-    state_mesh_shardings,
-    _,
-    model,
+      shaped_train_args,
+      shaped_train_kwargs,
+      state_mesh_shardings,
+      _,
+      model,
   ) = get_shaped_inputs(topology_mesh, config)
 
   # Get data sharding
@@ -220,11 +220,11 @@ def main(argv: Sequence[str]) -> None:
 
   # Get shaped inputs
   (
-    shaped_train_args,
-    shaped_train_kwargs,
-    state_mesh_shardings,
-    state_logical_annotations,
-    model,
+      shaped_train_args,
+      shaped_train_kwargs,
+      state_mesh_shardings,
+      state_logical_annotations,
+      model,
   ) = get_shaped_inputs(topology_mesh, config)
 
   # Get data sharding
diff --git a/src/MaxText/train_utils.py b/src/MaxText/train_utils.py
@@ -219,11 +219,7 @@ def setup_train_loop(config, recorder, devices=None):
     if config.debug_sharding:
       max_utils.print_non_trivial_mesh_axis(model.mesh)
       maxtext_utils.print_state_mesh_shardings_params(
-          state,
-          state_mesh_shardings,
-          state_mesh_annotations,
-          model.mesh,
-          config.logical_axis_rules
+          state, state_mesh_shardings, state_mesh_annotations, model.mesh, config.logical_axis_rules
       )
 
     if config.use_dpo: