deepmodeling · njzjz · Dec 9, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 11, 2025
diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
@@ -95,7 +95,7 @@ def get_trainer(
     # Initialize DDP
     world_size = dist.get_world_size()
     if world_size > 1:
-        assert paddle.version.nccl() != "0"
+        assert not paddle.core.is_compiled_with_nccl() or paddle.version.nccl() != "0"
         fleet.init(is_collective=True)
 
     def prepare_trainer_input_single(
@@ -214,7 +214,7 @@ def get_compute_device(self) -> str:
 
     def get_ngpus(self) -> int:
         """Get the number of GPUs."""
-        return paddle.device.cuda.device_count()
+        return paddle.device.device_count()
 
     def get_backend_info(self) -> dict:
         """Get backend information."""

diff --git a/deepmd/pd/utils/auto_batch_size.py b/deepmd/pd/utils/auto_batch_size.py
@@ -36,7 +36,7 @@ def is_gpu_available(self) -> bool:
         bool
             True if GPU is available
         """
-        return paddle.device.cuda.device_count() > 0
+        return paddle.device.device_count() > 0
 
     def is_oom_error(self, e: Exception) -> bool:
         """Check if the exception is an OOM error.
@@ -51,6 +51,6 @@ def is_oom_error(self, e: Exception) -> bool:
         # (the meaningless error message should be considered as a bug in cusolver)
         if isinstance(e, MemoryError) and ("ResourceExhaustedError" in e.args[0]):
             # Release all unoccupied cached memory
-            paddle.device.cuda.empty_cache()
+            paddle.device.empty_cache()
             return True
         return False
diff --git a/deepmd/pd/utils/env.py b/deepmd/pd/utils/env.py
@@ -29,10 +29,10 @@
 # Make sure DDP uses correct device if applicable
 LOCAL_RANK = int(os.environ.get("PADDLE_LOCAL_RANK", 0))
 
-if os.environ.get("DEVICE") == "cpu" or paddle.device.cuda.device_count() <= 0:
+if os.environ.get("DEVICE") == "cpu" or paddle.device.device_count() <= 0:
     DEVICE = "cpu"
 else:
-    DEVICE = f"gpu:{LOCAL_RANK}"
+    DEVICE = paddle.device.get_device()
 
 paddle.device.set_device(DEVICE)
 

diff --git a/deepmd/pd/utils/utils.py b/deepmd/pd/utils/utils.py
@@ -3,6 +3,7 @@
     annotations,
 )
 
+import warnings
 from contextlib import (
     contextmanager,
 )
@@ -345,8 +346,20 @@ def get_generator(
             generator = paddle.framework.core.default_cuda_generator(
                 int(DEVICE.split("gpu:")[1])
             )
+        elif DEVICE == "xpu":
+            generator = paddle.framework.core.default_xpu_generator(0)
+        elif DEVICE.startswith("xpu:"):
+            generator = paddle.framework.core.default_xpu_generator(
+                int(DEVICE.split("xpu:")[1])
+            )
         else:
-            raise ValueError("DEVICE should be cpu or gpu or gpu:x")
+            # return none for compability in different devices
+            warnings.warn(
+                f"DEVICE is {DEVICE}, which is not supported. Returning None.",
+                category=UserWarning,
+            )
+            return None
+            # raise ValueError("DEVICE should be cpu or gpu or gpu:x or xpu or xpu:x")
         generator.manual_seed(seed)
         return generator
     else:

diff --git a/source/tests/pd/conftest.py b/source/tests/pd/conftest.py
@@ -6,4 +6,4 @@
 @pytest.fixture(scope="package", autouse=True)
 def clear_cuda_memory(request):
     yield
-    paddle.device.cuda.empty_cache()
+    paddle.device.empty_cache()