By default use all gpus available in auto mode (#808)

psinger-prior · web-flow · commit ea9f11e0f332 · 2026-03-06T11:46:31.000Z
diff --git a/changelog/808.changed.md b/changelog/808.changed.md
@@ -0,0 +1 @@
+"auto" device selection now uses all available CUDA GPUs instead of only the first one
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
@@ -1506,8 +1506,8 @@ def load_from_fit_state(
     def to(self, device: DevicesSpecification) -> None:
         """Move the estimator to the given device(s).
 
-        If "auto": a single device is selected based on availability in the
-        following order of priority: "cuda:0", "mps", "cpu".
+        If "auto": devices are selected based on availability in the
+        following order of priority: all available CUDA GPUs, "mps", "cpu".
 
         To manually select a single device: specify a PyTorch device string e.g.
         "cuda:1". See PyTorch's documentation for information about supported
diff --git a/src/tabpfn/inference_tuning.py b/src/tabpfn/inference_tuning.py
@@ -90,23 +90,31 @@ class ClassifierEvalMetrics(str, Enum):
 
 
 METRIC_NAME_TO_OBJECTIVE = {
-    "f1": lambda y_true, y_pred: -f1_score(
-        y_true,
-        y_pred,
-        average="binary",
-        zero_division=0,
+    "f1": lambda y_true, y_pred: (
+        -f1_score(
+            y_true,
+            y_pred,
+            average="binary",
+            zero_division=0,
+        )
     ),
-    "accuracy": lambda y_true, y_pred: -accuracy_score(
-        y_true,
-        y_pred,
+    "accuracy": lambda y_true, y_pred: (
+        -accuracy_score(
+            y_true,
+            y_pred,
+        )
     ),
-    "balanced_accuracy": lambda y_true, y_pred: -balanced_accuracy_score(
-        y_true,
-        y_pred,
+    "balanced_accuracy": lambda y_true, y_pred: (
+        -balanced_accuracy_score(
+            y_true,
+            y_pred,
+        )
     ),
-    "roc_auc": lambda y_true, y_pred: -roc_auc_score(
-        y_true,
-        y_pred,
+    "roc_auc": lambda y_true, y_pred: (
+        -roc_auc_score(
+            y_true,
+            y_pred,
+        )
     ),
     "log_loss": log_loss,
 }
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
@@ -1203,8 +1203,8 @@ def load_from_fit_state(
     def to(self, device: DevicesSpecification) -> None:
         """Move the estimator to the given device(s).
 
-        If "auto": a single device is selected based on availability in the
-        following order of priority: "cuda:0", "mps", "cpu".
+        If "auto": devices are selected based on availability in the
+        following order of priority: all available CUDA GPUs, "mps", "cpu".
 
         To manually select a single device: specify a PyTorch device string e.g.
         "cuda:1". See PyTorch's documentation for information about supported
diff --git a/src/tabpfn/utils.py b/src/tabpfn/utils.py
@@ -117,7 +117,7 @@ def infer_devices(devices: DevicesSpecification) -> tuple[torch.device, ...]:
     """Selects the appropriate PyTorch devices for inference.
 
     If `device` is "auto" then the devices are selected as follows:
-    1. If CUDA is available and not excluded, returns the first "cuda" device
+    1. If CUDA is available and not excluded, returns all available "cuda" devices
     2. Otherwise, if MPS is available and not excluded, returns the "mps" device
     3. Otherwise, returns the "cpu" device
 
@@ -145,7 +145,9 @@ def infer_devices(devices: DevicesSpecification) -> tuple[torch.device, ...]:
 
     if devices == "auto":
         if "cuda" not in exclude_devices and torch.cuda.is_available():
-            return (torch.device("cuda:0"),)
+            return tuple(
+                torch.device(f"cuda:{i}") for i in range(torch.cuda.device_count())
+            )
 
         if _is_mps_supported() and "mps" not in exclude_devices:
             return (torch.device("mps"),)
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -47,14 +47,14 @@ def test__parse_config__unused_keys__returns_unused_config(
 
 
 @dataclass
-class FakeConfig(ArchitectureConfig):
-    a: int = 1
-    b: FakeSubConfig = field(default_factory=lambda: FakeSubConfig())
+class FakeSubConfig:
+    c: int = 2
 
 
 @dataclass
-class FakeSubConfig:
-    c: int = 2
+class FakeConfig(ArchitectureConfig):
+    a: int = 1
+    b: FakeSubConfig = field(default_factory=FakeSubConfig)
 
 
 class FakeArchitectureModule(ArchitectureModule):
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -34,7 +34,7 @@ def test__infer_devices__auto__single_cuda_gpu_available__selects_it(
     assert infer_devices(devices="auto") == (torch.device("cuda:0"),)
 
 
-def test__infer_devices__auto__multiple_cuda_gpus_available__selects_first(
+def test__infer_devices__auto__multiple_cuda_gpus_available__selects_all(
     mocker: MagicMock, monkeypatch: pytest.MonkeyPatch
 ) -> None:
     monkeypatch.setenv("TABPFN_EXCLUDE_DEVICES", "")
@@ -43,7 +43,11 @@ def test__infer_devices__auto__multiple_cuda_gpus_available__selects_first(
     mock_cuda.device_count.return_value = 3
     mocker.patch("torch.backends.mps").is_available.return_value = True
 
-    assert infer_devices(devices="auto") == (torch.device("cuda:0"),)
+    assert infer_devices(devices="auto") == (
+        torch.device("cuda:0"),
+        torch.device("cuda:1"),
+        torch.device("cuda:2"),
+    )
 
 
 def test__infer_devices__auto__cuda_and_mps_available_but_excluded__selects_cpu(

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"auto" device selection now uses all available CUDA GPUs instead of only the first one`