Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/download_libtorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ set -ev
SCRIPT_PATH=$(dirname $(realpath -s $0))
cd ${SCRIPT_PATH}/..

wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcpu.zip -O ~/libtorch.zip
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcpu.zip -O ~/libtorch.zip
unzip ~/libtorch.zip
2 changes: 1 addition & 1 deletion .github/workflows/build_cc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
env:
DEBIAN_FRONTEND: noninteractive
- run: |
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/6.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \
&& printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 \
&& curl -s https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - \
&& sudo apt-get update \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_cc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
run: source/tests/infer/convert-models.sh
- name: Download libtorch
run: |
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcpu.zip -O libtorch.zip
unzip libtorch.zip
# https://github.com/actions/runner-images/issues/9491
- name: Fix kernel mmap rnd bits
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
&& sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
if: false # skip as we use nvidia image
- run: python -m pip install -U uv
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.6.0" "jax[cuda12]==0.5.0"
- run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.7.0" "jax[cuda12]==0.5.0"
- run: |
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
Expand All @@ -67,7 +67,7 @@ jobs:
run: source/tests/infer/convert-models.sh
- name: Download libtorch
run: |
wget https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.6.0%2Bcu124.zip -O libtorch.zip
wget https://download.pytorch.org/libtorch/cu126/libtorch-cxx11-abi-shared-with-deps-2.7.0%2Bcu126.zip -O libtorch.zip
unzip libtorch.zip
- run: |
export CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/libtorch
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])')
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py "jax==0.5.0;python_version>='3.10'"
source/install/uv_with_retry.sh pip install --system -U setuptools
source/install/uv_with_retry.sh pip install --system horovod --no-build-isolation
env:
# Please note that uv has some issues with finding
Expand All @@ -42,6 +43,8 @@ jobs:
HOROVOD_WITH_TENSORFLOW: 1
HOROVOD_WITHOUT_PYTORCH: 1
HOROVOD_WITH_MPI: 1
# https://cmake.org/cmake/help/latest/variable/CMAKE_POLICY_VERSION_MINIMUM.html
CMAKE_POLICY_VERSION_MINIMUM: 3.5
- run: dp --version
- name: Get durations from cache
uses: actions/cache@v4
Expand Down
2 changes: 1 addition & 1 deletion backend/find_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_pt_requirement(pt_version: str = "") -> dict:
cuda_version = os.environ.get("CUDA_VERSION", "12.2")
if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"):
# CUDA 12.2, cudnn 9
pt_version = "2.6.0"
pt_version = "2.7.0"
elif cuda_version in SpecifierSet(">=11,<12"):
# CUDA 11.8, cudnn 8
pt_version = "2.3.1"
Expand Down
19 changes: 10 additions & 9 deletions deepmd/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,13 @@
self.results["virial"] = v[0].reshape(3, 3)

# convert virial into stress for lattice relaxation
if "stress" in properties:
if sum(atoms.get_pbc()) > 0:
# the usual convention (tensile stress is positive)
# stress = -virial / volume
stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume()
# Voigt notation
self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]]
else:
raise PropertyNotImplementedError
if cell is not None:
# the usual convention (tensile stress is positive)
# stress = -virial / volume
stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume()
# Voigt notation
self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]]
elif "stress" in properties:
raise PropertyNotImplementedError

Check warning on line 148 in deepmd/calculator.py

View check run for this annotation

Codecov / codecov/patch

deepmd/calculator.py#L148

Added line #L148 was not covered by tests
else:
pass
16 changes: 14 additions & 2 deletions deepmd/dpmodel/utils/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@
)


def sigmoid_t(x: np.ndarray) -> np.ndarray:
"""Sigmoid."""
if array_api_compat.is_jax_array(x):
from deepmd.jax.env import (
jax,
)

# see https://github.com/jax-ml/jax/discussions/15617
return jax.nn.sigmoid(x)
xp = array_api_compat.array_namespace(x)
return 1 / (1 + xp.exp(-x))


class Identity(NativeOP):
def __init__(self) -> None:
super().__init__()
Expand Down Expand Up @@ -313,9 +326,8 @@ def fn(x):
elif activation_function == "sigmoid":

def fn(x):
xp = array_api_compat.array_namespace(x)
# generated by GitHub Copilot
return 1 / (1 + xp.exp(-x))
return sigmoid_t(x)

return fn
elif activation_function.lower() in ("none", "linear"):
Expand Down
2 changes: 2 additions & 0 deletions deepmd/jax/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

jax.config.update("jax_enable_x64", True)
# jax.config.update("jax_debug_nans", True)
# https://github.com/jax-ml/jax/issues/24909
jax.config.update("jax_default_matmul_precision", "tensorfloat32")

if os.environ.get("DP_DTYPE_PROMOTION_STRICT") == "1":
jax.config.update("jax_numpy_dtype_promotion", "strict")
Expand Down
2 changes: 1 addition & 1 deletion deepmd/jax/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def forward_common_atomic(
kk_redu = get_reduce_name(kk)
model_predict[kk_redu] = jnp.sum(vv, axis=atom_axis)
kk_derv_r, kk_derv_c = get_deriv_name(kk)
if vdef.c_differentiable:
if vdef.r_differentiable:

def eval_output(
cc_ext,
Expand Down
8 changes: 4 additions & 4 deletions deepmd/pt/model/descriptor/repformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,13 +491,13 @@ def forward(
torch.tensor(
real_nloc,
dtype=torch.int32,
device=env.DEVICE,
), # should be int of c++
device=torch.device("cpu"),
), # should be int of c++, placed on cpu
torch.tensor(
real_nall - real_nloc,
dtype=torch.int32,
device=env.DEVICE,
), # should be int of c++
device=torch.device("cpu"),
), # should be int of c++, placed on cpu
)
g1_ext = ret[0].unsqueeze(0)
if has_spin:
Expand Down
15 changes: 10 additions & 5 deletions deepmd/pt/utils/auto_batch_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,16 @@
# several sources think CUSOLVER_STATUS_INTERNAL_ERROR is another out-of-memory error,
# such as https://github.com/JuliaGPU/CUDA.jl/issues/1924
# (the meaningless error message should be considered as a bug in cusolver)
if isinstance(e, RuntimeError) and (
"CUDA out of memory." in e.args[0]
or "CUDA driver error: out of memory" in e.args[0]
or "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR" in e.args[0]
):
if (

Check warning on line 52 in deepmd/pt/utils/auto_batch_size.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/utils/auto_batch_size.py#L52

Added line #L52 was not covered by tests
isinstance(e, RuntimeError)
and (
"CUDA out of memory." in e.args[0]
or "CUDA driver error: out of memory" in e.args[0]
or "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR" in e.args[0]
# https://github.com/deepmodeling/deepmd-kit/issues/4594
or "CUDA error: out of memory" in e.args[0]
)
) or isinstance(e, torch.cuda.OutOfMemoryError):
# Release all unoccupied cached memory
torch.cuda.empty_cache()
return True
Expand Down
20 changes: 13 additions & 7 deletions deepmd/tf/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,9 @@ def _init_attr(self) -> None:
self.numb_dos = 0
self.tmap = tmap.decode("utf-8").split()
if self.tensors["modifier_type"] is not None:
self.modifier_type = run_sess(self.sess, [self.tensors["modifier_type"]])[0]
self.modifier_type = run_sess(self.sess, [self.tensors["modifier_type"]])[
0
].decode()
else:
self.modifier_type = None

Expand Down Expand Up @@ -761,15 +763,17 @@ def eval(
odef.name: oo for oo, odef in zip(output, self.output_def.var_defs.values())
}
# ugly!!
if self.modifier_type is not None and isinstance(self.model_type, DeepPot):
if self.modifier_type is not None and issubclass(self.model_type, DeepPot):
if atomic:
raise RuntimeError("modifier does not support atomic modification")
me, mf, mv = self.dm.eval(coords, cells, atom_types)
output = list(output) # tuple to list
e, f, v = output[:3]
output_dict["energy_redu"] += me.reshape(e.shape)
output_dict["energy_deri_r"] += mf.reshape(f.shape)
output_dict["energy_deri_c_redu"] += mv.reshape(v.shape)
output_dict["energy_redu"] += me.reshape(output_dict["energy_redu"].shape)
output_dict["energy_derv_r"] += mf.reshape(
output_dict["energy_derv_r"].shape
)
output_dict["energy_derv_c_redu"] += mv.reshape(
output_dict["energy_derv_c_redu"].shape
)
return output_dict

def _prepare_feed_dict(
Expand Down Expand Up @@ -1350,6 +1354,8 @@ def sort_input(
natoms = atom_type[0].size
idx_map = np.arange(natoms) # pylint: disable=no-explicit-dtype
return coord, atom_type, idx_map
if atom_type.ndim > 1:
atom_type = atom_type[0]
if sel_atoms is not None:
selection = [False] * np.size(atom_type)
for ii in sel_atoms:
Expand Down
3 changes: 2 additions & 1 deletion deepmd/tf/model/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from deepmd.tf.env import (
MODEL_VERSION,
global_cvt_2_ener_float,
tf,
)
from deepmd.tf.utils.type_embed import (
Expand Down Expand Up @@ -173,7 +174,7 @@ def build(
if "global" not in self.model_type:
gname = "global_" + self.model_type
atom_out = tf.reshape(output, [-1, natomsel, nout])
global_out = tf.reduce_sum(atom_out, axis=1)
global_out = tf.reduce_sum(global_cvt_2_ener_float(atom_out), axis=1)
global_out = tf.reshape(global_out, [-1, nout], name="o_" + gname + suffix)

out_cpnts = tf.split(atom_out, nout, axis=-1)
Expand Down
5 changes: 5 additions & 0 deletions deepmd/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ def __init__(
# enforce type_map if necessary
self.enforce_type_map = False
if type_map is not None and self.type_map is not None and len(type_map):
missing_elements = [elem for elem in self.type_map if elem not in type_map]
if missing_elements:
raise ValueError(
f"Elements {missing_elements} are not present in the provided `type_map`."
)
if not self.mixed_type:
atom_type_ = [
type_map.index(self.type_map[ii]) for ii in self.atom_type
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ CMAKE_ARGS = "-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=1"
# error: 'value' is unavailable: introduced in macOS 10.13
select = "*-macosx_x86_64"
inherit.environment = "append"
environment.MACOSX_DEPLOYMENT_TARGET = "10.13"
environment.MACOSX_DEPLOYMENT_TARGET = "11.0"

[tool.cibuildwheel.linux]
repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 --exclude libc10.so --exclude libtorch.so --exclude libtorch_cpu.so -w {dest_dir} {wheel}"
Expand Down
2 changes: 1 addition & 1 deletion source/cmake/googletest.cmake.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.2)
cmake_minimum_required(VERSION 3.5)

project(googletest-download NONE)

Expand Down
Loading