Update microbatching to support size 0 batches. This change is important for Jax privacy, where the batch size is random, and possibly empty.

Ryan McKenna · OptaxDev · commit 26ba03c0b4ec · 2026-03-25T09:16:56.000-07:00
PiperOrigin-RevId: 889281155
diff --git a/optax/microbatching/_microbatching.py b/optax/microbatching/_microbatching.py
@@ -183,8 +183,6 @@ def _sum() -> Accumulator:
 
 def _mean(num_microbatches: int) -> Accumulator:
   """An Accumulator that computes the mean of microbatched outputs."""
-  if num_microbatches <= 0:
-    raise ValueError(f'{num_microbatches=} must be positive.')
   return _lift(
       Accumulator(
           init=_with_floating_check(jnp.zeros_like),
@@ -230,8 +228,6 @@ def _get_out_sharding(x):
 
 def _concat(num_microbatches: int) -> Accumulator:
   """An Accumulator that concatenates microbatched outputs along the axis 0."""
-  if num_microbatches <= 0:
-    raise ValueError(f'{num_microbatches=} must be positive.')
 
   def init(value):
     shape = (num_microbatches,) + value.shape
@@ -321,6 +317,15 @@ def _reshape_all_args(
   return tuple(new_args), new_kwargs, tuple(batch_sizes)[0]
 
 
+def _take_fn(index: int, axis: int) -> Callable[[jax.Array], jax.Array]:
+  """Returns a function that takes the `index`-th element along the `axis`."""
+  def fun(x):
+    if x.shape[axis] == 0:  # jnp.take doesn't work with zero axis size.
+      return jnp.empty_like(x, shape=x.shape[:axis] + x.shape[axis + 1:])
+    return jnp.take(x, indices=index, axis=axis)
+  return fun
+
+
 def microbatch(
     fun: Function,
     argnums: int | Sequence[int],
@@ -421,13 +426,9 @@ def f(index):
       input_args = list(reshaped_args)
       input_kwargs = dict(reshaped_kwargs)
       for i, ax in zip(argnums, in_axes):
-        input_args[i] = jax.tree.map(
-            functools.partial(jnp.take, indices=index, axis=ax), input_args[i]
-        )
+        input_args[i] = jax.tree.map(_take_fn(index, ax), input_args[i])
       for i, ax in zip(argnames, in_axes[len(argnums) :]):
-        input_kwargs[i] = jax.tree.map(
-            functools.partial(jnp.take, indices=index, axis=ax), input_kwargs[i]
-        )
+        input_kwargs[i] = jax.tree.map(_take_fn(index, ax), input_kwargs[i])
       return fun(*input_args, **input_kwargs)
 
     def body_fun(index, carry):
@@ -436,8 +437,10 @@ def body_fun(index, carry):
     early_stop = num_real_microbatches is not None
     loop_bound = num_real_microbatches if early_stop else num_microbatches
     init_carry = accumulator_.init(jax.eval_shape(f, 0))
-    answer = jax.lax.fori_loop(0, loop_bound, body_fun, init_carry)
+    if num_microbatches == 0:
+      return accumulator_.finalize(init_carry)
 
+    answer = jax.lax.fori_loop(0, loop_bound, body_fun, init_carry)
     return accumulator_.finalize(answer)
 
   return microbatched_fun
diff --git a/optax/microbatching/_microbatching_test.py b/optax/microbatching/_microbatching_test.py
@@ -333,6 +333,79 @@ def test_micro_grad_early_stopping(self):
     result, _ = grad_fn(1.0, jnp.ones(16))
     test_utils.assert_trees_all_close(result, 12.0)
 
+  def test_zero_batch_size_microbatch(self):
+    def fun(x):
+      return jnp.sum(x, axis=0)
+
+    m_fun = microbatching.microbatch(
+        fun,
+        argnums=0,
+        microbatch_size=2,
+        accumulator=microbatching.AccumulationType.SUM,
+    )
+    res = m_fun(jnp.zeros((0, 4)))
+    self.assertEqual(res.shape, (4,))
+    test_utils.assert_trees_all_close(res, jnp.zeros(4))
+
+  def test_zero_batch_size_microbatch_mean(self):
+    def fun(x):
+      return jnp.mean(x, axis=0)
+
+    m_fun = microbatching.microbatch(
+        fun,
+        argnums=0,
+        microbatch_size=2,
+        accumulator=microbatching.AccumulationType.MEAN,
+    )
+    res = m_fun(jnp.zeros((0, 4)))
+    self.assertEqual(res.shape, (4,))
+    self.assertTrue(jnp.all(jnp.isnan(res)))
+
+  def test_zero_batch_size_microbatch_concat(self):
+    def fun(x):
+      return x * 2
+
+    m_fun = microbatching.microbatch(
+        fun,
+        argnums=0,
+        microbatch_size=2,
+        accumulator=microbatching.AccumulationType.CONCAT,
+    )
+    res = m_fun(jnp.zeros((0, 4)))
+    self.assertEqual(res.shape, (0, 4))
+
+  def test_zero_batch_size_micro_vmap(self):
+    m_vmap = microbatching.micro_vmap(lambda x: x * 2, microbatch_size=2)
+    res = m_vmap(jnp.zeros((0, 4)))
+    self.assertEqual(res.shape, (0, 4))
+
+  def test_zero_batch_size_micro_grad(self):
+    def mean_squared_loss(params, features, targets):
+      preds = features @ params
+      diff = preds - targets
+      return 0.5 * jnp.mean(diff**2)
+
+    grad_fn = microbatching.micro_grad(
+        mean_squared_loss,
+        argnums=0,
+        batch_argnums=(1, 2),
+        transform_fn=lambda x: (x, x**2),
+        metrics_fn=jnp.linalg.norm,
+        keep_batch_dim=True,
+        microbatch_size=1,
+    )
+    params = jnp.zeros(1)
+    features = jnp.zeros((0, 1))
+    targets = jnp.zeros((0,))
+    (grads, squared_grads), aux = grad_fn(params, features, targets)
+
+    self.assertEqual(grads.shape, (1,))
+    test_utils.assert_trees_all_close(grads, jnp.zeros(1))
+    self.assertEqual(squared_grads.shape, (1,))
+    test_utils.assert_trees_all_close(squared_grads, jnp.zeros(1))
+    self.assertEqual(aux.values.shape, (0,))
+    self.assertEqual(aux.metrics.shape, (0,))
+
 
 if __name__ == '__main__':
   absltest.main()