Allow force_recompute_layer to be an int, for more finegrained checkpointing control. (#889)

priorphil · gemini-code-assist[bot] · web-flow · commit 135d29fa53ad · 2026-04-23T07:34:17.000Z
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/src/tabpfn/architectures/interface.py b/src/tabpfn/architectures/interface.py
@@ -75,13 +75,18 @@ class PerformanceOptions:
     throughput by reducing memory pressure and the number of CPU<->GPU
     synchronisation points required for memory allocations."""
 
-    force_recompute_layer: bool = False
+    force_recompute_layer: bool | int = False
     """Enable activation checkpointing (gradient recomputation) for all layers.
 
     When ``True``, intermediate activations are not stored during the forward pass;
     instead they are recomputed from scratch during the backward pass.  This trades
     compute for memory and is useful when training with very large context sizes.
     Has no effect during inference (``torch.no_grad`` / ``torch.inference_mode``).
+
+    Some models support passing an integer value, where 0 corresponds to no
+    checkpointing, and higher values correspond to more aggressive checkpointing.
+    This allows for finer tuning of the compute/memory tradeoff. Models will clip the
+    value to their maximum supported level of checkpointing.
     """
 
     use_chunkwise_inference: bool = False