NNX: update train/eval step sharding signatures to omit rng for pure_nnx

xibinliu · ecnal-cienet · commit 0a1f4e9cecc6 · 2026-03-16T21:22:16.000Z
- get_functional_train_with_signature: use (state, batch) shardings when pure_nnx=True
- get_functional_eval_with_signature: use (state, batch) shardings when pure_nnx=True
diff --git a/src/maxtext/utils/maxtext_utils.py b/src/maxtext/utils/maxtext_utils.py
@@ -93,7 +93,10 @@ def get_functional_train_with_signature(
   """Get the shardings (both state and data) for `train_step`."""
   functional_train = functools.partial(train_step, model, config, state_mesh_shardings, params_shardings)
   functional_train.__name__ = "train_step"
-  in_shardings = (state_mesh_shardings, data_sharding, None)  # State, batch, rng
+  if config.pure_nnx:
+    in_shardings = (state_mesh_shardings, data_sharding)  # State, batch
+  else:
+    in_shardings = (state_mesh_shardings, data_sharding, None)  # State, batch, rng
   out_shardings = (state_mesh_shardings, None)  # State, metrics
   static_argnums = ()  # We partial out the static argnums of model and config
   donate_argnums = 0  # This is the index of the state - we allow the compiler to make use of this memory.
@@ -104,7 +107,10 @@ def get_functional_eval_with_signature(eval_step, data_sharding, state_mesh_shar
   """Get the shardings (both state and data) for `eval_step`."""
   functional_eval = functools.partial(eval_step, model, config)
   functional_eval.__name__ = "eval_step"
-  in_shardings = (state_mesh_shardings, data_sharding, None)  # State, batch, rng
+  if config.pure_nnx:
+    in_shardings = (state_mesh_shardings, data_sharding)  # State, batch
+  else:
+    in_shardings = (state_mesh_shardings, data_sharding, None)  # State, batch, rng
   out_shardings = None  # metrics
   static_argnums = ()  # We partial out the static argnums of model, config
   donate_argnums = ()  # state will be kept instead of being donated in eval_step
diff --git a/tests/integration/aot_identical_test.py b/tests/integration/aot_identical_test.py
@@ -179,6 +179,7 @@ def assert_compile_and_real_match_jaxpr(self, test_name, *extra_args):
         "enable_checkpointing=False",
         "dump_jaxpr=True",
         "dump_jaxpr_delete_local_after=False",
+        "skip_first_n_steps_for_profiler=0",
     ]
     if extra_args:
       shared_args.extend(extra_args)
diff --git a/tests/integration/xaot_test.py b/tests/integration/xaot_test.py
@@ -80,6 +80,7 @@ def run_compile_then_load(self, test_name, *extra_args):
         "learning_rate=1e-3",
         "dataset_type=synthetic",
         "enable_checkpointing=False",
+        "profiler=''",
     ]
 
     if extra_args:

Original file line number	Diff line number	Diff line change
`@@ -179,6 +179,7 @@ def assert_compile_and_real_match_jaxpr(self, test_name, *extra_args):`
`179`	`179`	`"enable_checkpointing=False",`
`180`	`180`	`"dump_jaxpr=True",`
`181`	`181`	`"dump_jaxpr_delete_local_after=False",`
	`182`	`+ "skip_first_n_steps_for_profiler=0",`
`182`	`183`	`]`
`183`	`184`	`if extra_args:`
`184`	`185`	`shared_args.extend(extra_args)`
Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,7 @@ def run_compile_then_load(self, test_name, *extra_args):`
`80`	`80`	`"learning_rate=1e-3",`
`81`	`81`	`"dataset_type=synthetic",`
`82`	`82`	`"enable_checkpointing=False",`
	`83`	`+ "profiler=''",`
`83`	`84`	`]`
`84`	`85`
`85`	`86`	`if extra_args:`