Merge pull request #3645 from AI-Hypercomputer:chengnuojin-correct-logical

Google-ML-Automation · Google-ML-Automation · commit c73595aad7a3 · 2026-04-11T15:05:45.000-07:00
PiperOrigin-RevId: 898309365
diff --git a/src/maxtext/configs/base.yml b/src/maxtext/configs/base.yml
@@ -479,7 +479,7 @@ logical_axis_rules: [
                       ['activation_mlp_moe', ['tensor', 'tensor_transpose', 'tensor_sequence']],
                       ['activation_kv', ['tensor', 'tensor_transpose', 'tensor_sequence']],
                       ['activation_prefill_kv_batch', ['data', 'fsdp', 'fsdp_transpose', 'expert']],
-                      ['activation_kv_batch', ['data', 'fsdp', 'fsdp_transpose']],
+                      ['activation_kv_batch', ['data', 'fsdp', 'fsdp_transpose', 'expert']],
                       ['activation_kv_head_dim', ['tensor', 'tensor_transpose', 'tensor_sequence']],
                       ['activation_vocab', ['tensor', 'tensor_transpose', 'tensor_sequence']],
                       ['activation_vocab', ['tensor', 'tensor_transpose']],
@@ -969,7 +969,7 @@ xprof_e2e_enable_fw_power_level_event: False
 xprof_e2e_enable_fw_thermal_event: False
 profile_power_events: False # Set to True to enable TPU-specific power/thermal profiling events. Defaults to False to avoid breaking GPU xplane tracing.
 
-log_config: False # Prints the config (after defaults have been set by pyconfig logic)
+log_config: True # Prints the config (after defaults have been set by pyconfig logic)
 debug_sharding: False # Prints model weights sharding info
 
 # Checkpoint Structured logging
diff --git a/src/maxtext/configs/custom_mesh_and_rule/pipeline-large-moe.yml b/src/maxtext/configs/custom_mesh_and_rule/pipeline-large-moe.yml
@@ -46,7 +46,7 @@ logical_axis_rules: [
                       ['activation_mlp', ['tensor']],
                       ['activation_mlp_moe', ['tensor']],
                       ['activation_kv', ['tensor']],
-                      ['activation_kv_batch', ['data', 'fsdp']],
+                      ['activation_kv_batch', ['data', 'fsdp', 'expert']],
                       ['activation_kv_head_dim', ['tensor']],
                       ['activation_vocab', ['tensor']],
                       ['activation_stage', 'stage'],