More informative Memory Error Msg (#805)

klemens-floege · priorphil · web-flow · commit ca64d76cba48 · 2026-03-02T20:30:26.000Z
Co-authored-by: Phil &lt;phil@priorlabs.ai&gt;
diff --git a/changelog/805.added.md b/changelog/805.added.md
@@ -0,0 +1 @@
+More informative Out-Of-Memory error message.
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
@@ -148,6 +148,9 @@ class TabPFNClassifier(ClassifierMixin, BaseEstimator):
     n_features_in_: int
     """The number of features in the input data used during `fit()`."""
 
+    n_train_samples_: int
+    """The number of training samples used during `fit()`."""
+
     inferred_feature_schema_: FeatureSchema
     """The inferred feature schema. This contains the feature modalities per column,
     using heuristics and user-provided indices for categorical features."""
@@ -650,6 +653,7 @@ def _initialize_dataset_preprocessing(
         self.ordinal_encoder_ = ordinal_encoder
         self.feature_names_in_ = feature_names
         self.n_features_in_ = n_features
+        self.n_train_samples_ = len(X)
 
         # Label encoding
         self.label_encoder_ = TabPFNLabelEncoder(original_target_name=original_y_name)
@@ -1062,7 +1066,13 @@ def _raw_predict(
                 ord_encoder=getattr(self, "ordinal_encoder_", None),
             )
 
-        with handle_oom_errors(self.devices_, X, model_type="classifier"):
+        with handle_oom_errors(
+            self.devices_,
+            X,
+            model_type="classifier",
+            n_train_samples=getattr(self, "n_train_samples_", None),
+            n_features=getattr(self, "n_features_in_", None),
+        ):
             return self.forward(
                 X,
                 use_inference_mode=True,
diff --git a/src/tabpfn/errors.py b/src/tabpfn/errors.py
@@ -60,22 +60,43 @@ def __init__(
         self,
         original_error: Exception | None = None,
         *,
+        n_train_samples: int | None = None,
         n_test_samples: int | None = None,
+        n_features: int | None = None,
         model_type: str = "classifier",
     ):
         predict_method = "predict_proba" if model_type == "classifier" else "predict"
 
         size_info = f" with {n_test_samples:,} test samples" if n_test_samples else ""
 
+        size_line = ""
+        if n_train_samples is not None and n_test_samples is not None:
+            size_line = (
+                f"Your sizes: {n_train_samples:,} train / "
+                f"{n_test_samples:,} test samples"
+            )
+            if n_features is not None:
+                size_line += f", {n_features} features"
+            size_line += ".\n"
+
         message = (
             f"{self.device_name} out of memory{size_info}.\n\n"
-            f"Solution: Split your test data into smaller batches:\n\n"
-            f"    batch_size = 1000  # depends on hardware\n"
+            f"This is issue is usually caused by one of the following two reasons:\n\n"
+            f"1) Large test set — split into batches:\n\n"
             f"    predictions = []\n"
-            f"    for i in range(0, len(X_test), batch_size):\n"
-            f"        batch = model.{predict_method}(X_test[i:i + batch_size])\n"
-            f"        predictions.append(batch)\n"
-            f"    predictions = np.vstack(predictions)"
+            f"    for i in range(0, len(X_test), 100):\n"
+            f"        pred = model.{predict_method}("
+            f"X_test[i:i + 100])\n"
+            f"        predictions.append(pred)\n"
+            f"    predictions = np.vstack(predictions)\n\n"
+            f"2) Large training set — batching won't help.\n"
+            f"   You need subsampling or ensembling, see:\n"
+            f"   https://github.com/PriorLabs/tabpfn-extensions/"
+            f"blob/main/examples/large_datasets/"
+            f"large_datasets_example.py\n\n"
+            f"{size_line}"
+            f"Not sure which? If model.{predict_method}(X_test[:1]) "
+            f"also fails, it's (2)."
         )
         if original_error is not None:
             message += f"\n\nOriginal error: {original_error}"
@@ -100,13 +121,17 @@ def handle_oom_errors(
     devices: tuple[torch.device, ...],
     X: XType,
     model_type: str,
+    n_train_samples: int | None = None,
+    n_features: int | None = None,
 ) -> Generator[None, None, None]:
     """Context manager to catch OOM errors and raise helpful TabPFN exceptions.
 
     Args:
         devices: The devices the model is running on.
         X: The input data (used to get n_samples for the error message).
         model_type: Either "classifier" or "regressor".
+        n_train_samples: Number of training samples (for the error message).
+        n_features: Number of features (for the error message).
 
     Raises:
         TabPFNCUDAOutOfMemoryError: If a CUDA OOM error occurs.
@@ -115,16 +140,24 @@ def handle_oom_errors(
     try:
         yield
     except torch.OutOfMemoryError as e:
-        n_samples = X.shape[0] if hasattr(X, "shape") else len(X)
+        n_test_samples = X.shape[0] if hasattr(X, "shape") else len(X)
         raise TabPFNCUDAOutOfMemoryError(
-            e, n_test_samples=n_samples, model_type=model_type
+            e,
+            n_train_samples=n_train_samples,
+            n_test_samples=n_test_samples,
+            n_features=n_features,
+            model_type=model_type,
         ) from None
     except RuntimeError as e:
         is_mps = any(d.type == "mps" for d in devices)
         is_oom = "out of memory" in str(e).lower()
         if is_mps and is_oom:
-            n_samples = X.shape[0] if hasattr(X, "shape") else len(X)
+            n_test_samples = X.shape[0] if hasattr(X, "shape") else len(X)
             raise TabPFNMPSOutOfMemoryError(
-                e, n_test_samples=n_samples, model_type=model_type
+                e,
+                n_train_samples=n_train_samples,
+                n_test_samples=n_test_samples,
+                n_features=n_features,
+                model_type=model_type,
             ) from None
         raise
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
@@ -174,6 +174,9 @@ class TabPFNRegressor(RegressorMixin, BaseEstimator):
     n_features_in_: int
     """The number of features in the input data used during `fit()`."""
 
+    n_train_samples_: int
+    """The number of training samples used during `fit()`."""
+
     inferred_feature_schema_: FeatureSchema
     """The inferred feature schema. This contains the feature modalities per column,
     using heuristics and user-provided indices for categorical features."""
@@ -619,6 +622,7 @@ def _initialize_dataset_preprocessing(
         # Set class variables for sklearn compatibility
         self.feature_names_in_ = feature_names
         self.n_features_in_ = n_features
+        self.n_train_samples_ = len(X)
 
         feature_schema = detect_feature_modalities(
             X=X,
@@ -926,7 +930,13 @@ def predict(
         )
 
         # Runs over iteration engine
-        with handle_oom_errors(self.devices_, X, model_type="regressor"):
+        with handle_oom_errors(
+            self.devices_,
+            X,
+            model_type="regressor",
+            n_train_samples=getattr(self, "n_train_samples_", None),
+            n_features=getattr(self, "n_features_in_", None),
+        ):
             (
                 _,
                 # list of tensors [N_est, N_samples, N_borders] (after forward)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+More informative Out-Of-Memory error message.`