fix(pt): Treat cuBLAS allocation failures as PyTorch OOM during auto batch sizing

OutisLi · OutisLi · commit 7ccb11ded531 · 2026-05-11T09:23:33.000+08:00
PyTorch inference can raise  after an oversized
batch attempt, especially during . Previously this was
treated as a generic RuntimeError, so  stopped after the first batch
size reduction instead of continuing to shrink the inference batch.

Add this cuBLAS allocation failure to the PyTorch auto-batch OOM markers and
cover it with a unit test, allowing  to continue retrying with smaller
batch sizes.
diff --git a/deepmd/pt/utils/auto_batch_size.py b/deepmd/pt/utils/auto_batch_size.py
@@ -78,6 +78,7 @@ def is_oom_error(self, e: Exception) -> bool:
             "CUDA out of memory.",
             "CUDA driver error: out of memory",
             "CUDA error: out of memory",
+            "CUBLAS_STATUS_ALLOC_FAILED",
             "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR",
         )
         if any(m in msg for msg in msgs for m in plain_oom_markers):
diff --git a/source/tests/pt/test_auto_batch_size.py b/source/tests/pt/test_auto_batch_size.py
@@ -21,6 +21,20 @@ def test_is_oom_error_cuda_message(self, empty_cache) -> None:
         )
         empty_cache.assert_called_once()
 
+    @mock.patch("deepmd.pt.utils.auto_batch_size.torch.cuda.empty_cache")
+    def test_is_oom_error_cublas_alloc_failed(self, empty_cache) -> None:
+        auto_batch_size = AutoBatchSize(256, 2.0)
+
+        self.assertTrue(
+            auto_batch_size.is_oom_error(
+                RuntimeError(
+                    "CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling "
+                    "`cublasCreate(handle)`"
+                )
+            )
+        )
+        empty_cache.assert_called_once()
+
     @mock.patch("deepmd.pt.utils.auto_batch_size.torch.cuda.empty_cache")
     def test_is_oom_error_empty_runtime_error_from_cuda_oom(self, empty_cache) -> None:
         auto_batch_size = AutoBatchSize(256, 2.0)

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@ def is_oom_error(self, e: Exception) -> bool:`
`78`	`78`	`"CUDA out of memory.",`
`79`	`79`	`"CUDA driver error: out of memory",`
`80`	`80`	`"CUDA error: out of memory",`
	`81`	`+ "CUBLAS_STATUS_ALLOC_FAILED",`
`81`	`82`	`"cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR",`
`82`	`83`	`)`
`83`	`84`	`if any(m in msg for msg in msgs for m in plain_oom_markers):`