Skip to content

Commit 5c63acc

Browse files
committed
move to backends's realizations
1 parent 8b8b2e7 commit 5c63acc

4 files changed

Lines changed: 55 additions & 47 deletions

File tree

deepmd/pd/entrypoints/main.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,23 @@ def get_backend_info(self) -> dict:
224224
**op_info,
225225
}
226226

227+
def get_device_name(self) -> str | None:
228+
"""Use Paddle's CUDA device properties to get the underlying GPU name.
229+
230+
Returns
231+
-------
232+
str or None
233+
The device name if available, otherwise None.
234+
"""
235+
if paddle.device.is_compiled_with_cuda():
236+
cuda_mod = getattr(paddle.device, "cuda", None)
237+
if cuda_mod is not None and cuda_mod.device_count() > 0:
238+
get_props = getattr(cuda_mod, "get_device_properties", None)
239+
if callable(get_props):
240+
props = get_props(0)
241+
return getattr(props, "name", None)
242+
return None
243+
227244

228245
def train(
229246
input_file: str,

deepmd/pt/entrypoints/main.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,18 @@ def get_backend_info(self) -> dict:
252252
**op_info,
253253
}
254254

255+
def get_device_name(self) -> str | None:
256+
"""Use PyTorch's current device name as the device identifier.
257+
258+
Returns
259+
-------
260+
str or None
261+
The device name if available, otherwise None.
262+
"""
263+
if torch.cuda.is_available():
264+
return torch.cuda.get_device_name(torch.cuda.current_device())
265+
return None
266+
255267

256268
def train(
257269
input_file: str,

deepmd/tf/train/run_options.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,21 @@ def get_backend_info(self) -> dict:
7373
"build with TF lib": GLOBAL_CONFIG["tf_libs"].replace(";", "\n"),
7474
}
7575

76+
def get_device_name(self) -> str | None:
77+
"""Prefer the hardware device name if available, fall back to identifier.
78+
79+
Returns
80+
-------
81+
str or None
82+
The device name if available, otherwise None.
83+
"""
84+
gpus = tf.config.list_physical_devices("GPU")
85+
if gpus:
86+
# Use the first physical GPU device identifier as the device name
87+
details = tf.config.experimental.get_device_details(gpus[0])
88+
return details.get("device_name") or gpus[0].name
89+
return None
90+
7691

7792
class RunOptions:
7893
"""Class with info on how to run training (cluster, MPI and GPU config).

deepmd/utils/summary.py

Lines changed: 11 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -74,53 +74,7 @@ def __call__(self) -> None:
7474
"computing device": self.get_compute_device(),
7575
}
7676
)
77-
backend = build_info.get("Backend")
78-
device_name = None
79-
try:
80-
if backend == "PyTorch":
81-
import torch
82-
83-
if torch.cuda.is_available():
84-
# Use PyTorch's current device name as the device identifier
85-
device_name = torch.cuda.get_device_name(
86-
torch.cuda.current_device()
87-
)
88-
elif backend == "TensorFlow":
89-
import tensorflow as tf
90-
91-
gpus = tf.config.list_physical_devices("GPU")
92-
if gpus:
93-
# Use the first physical GPU device identifier as the device name
94-
details = tf.config.experimental.get_device_details(gpus[0])
95-
# Prefer the hardware device name if available, fall back to identifier
96-
device_name = details.get("device_name") or gpus[0].name
97-
elif backend == "Paddle":
98-
import paddle
99-
100-
# Use Paddle's CUDA device properties to get the underlying GPU name
101-
if hasattr(paddle, "device") and hasattr(
102-
paddle.device, "is_compiled_with_cuda"
103-
):
104-
if paddle.device.is_compiled_with_cuda():
105-
cuda_mod = getattr(paddle.device, "cuda", None)
106-
if cuda_mod is not None and callable(
107-
getattr(cuda_mod, "device_count", None)
108-
):
109-
if cuda_mod.device_count() > 0 and callable(
110-
getattr(cuda_mod, "get_device_properties", None)
111-
):
112-
props = cuda_mod.get_device_properties(0)
113-
device_name = getattr(props, "name", None)
114-
elif backend == "JAX":
115-
import jax
116-
117-
if jax.devices():
118-
# Use the first JAX device's kind as the device name
119-
jax_device = jax.devices()[0]
120-
device_name = f"{jax_device.device_kind}"
121-
except Exception:
122-
# Best-effort device name detection; ignore failures silently
123-
pass
77+
device_name = self.get_device_name()
12478
if device_name:
12579
build_info["Device Name"] = device_name
12680
if self.is_built_with_cuda():
@@ -175,3 +129,13 @@ def get_ngpus(self) -> int:
175129
def get_backend_info(self) -> dict:
176130
"""Get backend information."""
177131
return {}
132+
133+
def get_device_name(self) -> str | None:
134+
"""Get the device name (e.g., NVIDIA A800-SXM4-80GB).
135+
136+
Returns
137+
-------
138+
str or None
139+
The device name if available, otherwise None.
140+
"""
141+
return None

0 commit comments

Comments
 (0)