diff --git a/.clang-format b/.clang-format index 727520b46b..1da466d870 100644 --- a/.clang-format +++ b/.clang-format @@ -1,4 +1,5 @@ --- BasedOnStyle: Google BinPackParameters: false +InsertBraces: true ... diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..18a2acda7f --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,3 @@ +FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-24.04 + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 0000000000..e829988adf --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,40 @@ +# DeePMD-kit devcontainer environment + +This [devcontainer](https://vscode.js.cn/docs/devcontainers/devcontainer-cli) environment setups Python and C++ environment to develop DeePMD-kit. +One can setup locally or use [GitHub Codespaces](https://docs.github.com/en/codespaces) by clicking the Code button on the DeePMD-kit repository page. +The whole setup process requires about 10 minutes, so one needs to be patient. + +## Python environment + +The following packages are installed into the Python environment `.venv`: + +- DeePMD-kit (in edit mode) +- Backends including TensorFlow, PyTorch, JAX +- LAMMPS +- MPICH +- CMake +- prek (including hooks) +- Test packages including pytest +- Doc packages including sphinx + +## C++ interface + +The C++ interface with TensorFlow and PyTorch support is installed into `dp` directory. + +When calling and debuging LAMMPS with DeePMD-kit, use the following scripts instead of the regular `lmp`: + +- `.devcontainer/lmp` +- `.devcontainer/gdb_lmp` + +Use the following scripts for `pytest` with LAMMPS: + +- `.devcontainer/pytest_lmp` +- `.devcontainer/gdb_pytest_lmp` + +## Rebuild + +Usually the Python package does not need to reinstall. +But when one wants to recompile the C++ code, the following scripts can be executed. + +- `.devcontainer/build_cxx.sh` +- `.devcontainer/build_py.sh` diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh new file mode 100755 index 0000000000..33a9f890ef --- /dev/null +++ b/.devcontainer/build_cxx.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -ev + +NPROC=$(nproc --all) +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +mkdir -p ${SCRIPT_PATH}/../buildcxx/ +cd ${SCRIPT_PATH}/../buildcxx/ +cmake -D ENABLE_TENSORFLOW=ON \ + -D ENABLE_PYTORCH=ON \ + -D ENABLE_PADDLE=ON \ + -D CMAKE_INSTALL_PREFIX=${SCRIPT_PATH}/../dp/ \ + -D LAMMPS_VERSION=stable_22Jul2025_update2 \ + -D CMAKE_BUILD_TYPE=Debug \ + -D BUILD_TESTING:BOOL=TRUE \ + -D TENSORFLOW_ROOT=${TENSORFLOW_ROOT} \ + ${SCRIPT_PATH}/../source +cmake --build . -j${NPROC} +cmake --install . diff --git a/.devcontainer/build_py.sh b/.devcontainer/build_py.sh new file mode 100755 index 0000000000..e0539d8cec --- /dev/null +++ b/.devcontainer/build_py.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ev + +SCRIPT_PATH=$(dirname $(realpath -s $0)) +cd ${SCRIPT_PATH}/.. + +uv sync --dev --python 3.12 --extra cpu --extra torch --extra jax --extra lmp --extra test --extra docs +prek install diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000..557b5a3bf6 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,16 @@ +{ + "name": "DeePMD-kit", + "build": { + "dockerfile": "Dockerfile" + }, + "features": { + "ghcr.io/devcontainers/features/github-cli:1": {} + }, + "postCreateCommand": ".devcontainer/build_py.sh && .devcontainer/download_libtorch.sh && .devcontainer/build_cxx.sh && prek install-hooks", + "remoteEnv": { + "PATH": "${containerEnv:PATH}:${containerWorkspaceFolder}/.venv/bin", + "DP_ENABLE_PYTORCH": "1", + "DP_VARIANT": "cpu", + "UV_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/cpu" + } +} diff --git a/.devcontainer/download_libtorch.sh b/.devcontainer/download_libtorch.sh new file mode 100755 index 0000000000..8c1e480b7c --- /dev/null +++ b/.devcontainer/download_libtorch.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ev + +SCRIPT_PATH=$(dirname $(realpath -s $0)) +cd ${SCRIPT_PATH}/.. + +wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.8.0%2Bcpu.zip -O ~/libtorch.zip +unzip ~/libtorch.zip diff --git a/.devcontainer/gdb_lmp b/.devcontainer/gdb_lmp new file mode 100755 index 0000000000..fc1c8b90fe --- /dev/null +++ b/.devcontainer/gdb_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + gdb ${SCRIPT_PATH}/../.venv/lib/python3.12/site-packages/lammps/lmp "$@" diff --git a/.devcontainer/gdb_pytest_lmp b/.devcontainer/gdb_pytest_lmp new file mode 100755 index 0000000000..d27587ec43 --- /dev/null +++ b/.devcontainer/gdb_pytest_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + gdb --args python -m pytest -s "$@" diff --git a/.devcontainer/lmp b/.devcontainer/lmp new file mode 100755 index 0000000000..524f99b326 --- /dev/null +++ b/.devcontainer/lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0)) + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + ${SCRIPT_PATH}/../.venv/bin/lmp "$@" diff --git a/.devcontainer/pytest_lmp b/.devcontainer/pytest_lmp new file mode 100755 index 0000000000..bb88da883f --- /dev/null +++ b/.devcontainer/pytest_lmp @@ -0,0 +1,9 @@ +#!/bin/bash +SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. + +export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + +env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ + LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ + python -m pytest "$@" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000000..7c5100942a --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,3 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..776405a339 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# do not show up detailed difference on GitHub +source/3rdparty/* linguist-generated=true +source/3rdparty/README.md linguist-generated=false +.git_archival.txt export-subst diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index f13b187dfb..75e2aa50ae 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: dropdown @@ -37,6 +37,7 @@ body: - docker - pip - Built from source + - dp1s - Others (write below) validations: required: true diff --git a/.github/ISSUE_TEMPLATE/generic-issue.yml b/.github/ISSUE_TEMPLATE/generic-issue.yml index af9f01c64d..f84097580e 100644 --- a/.github/ISSUE_TEMPLATE/generic-issue.yml +++ b/.github/ISSUE_TEMPLATE/generic-issue.yml @@ -21,10 +21,10 @@ body: validations: required: true - type: input - id: tf-version + id: backend-version attributes: - label: TensorFlow Version - description: "The version will be printed when running DeePMD-kit." + label: Backend and its version + description: "The backend and its version will be printed when running DeePMD-kit, e.g. TensorFlow v2.15.0." validations: required: true - type: textarea diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 5855aef6c5..cbd920f6b3 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,4 +4,7 @@ updates: directory: "/" schedule: interval: "weekly" - target-branch: "devel" + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/labeler.yml b/.github/labeler.yml index 8a741fe9da..0183a144ba 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,14 +1,38 @@ -Python: -- deepmd/**/* -- source/tests/**/* -Docs: doc/**/* -Examples: examples/**/* -Core: source/lib/**/* -CUDA: source/lib/src/cuda/**/* -ROCM: source/lib/src/rocm/**/* -OP: source/op/**/* -C++: source/api_cc/**/* -C: source/api_c/**/* -LAMMPS: source/lmp/**/* -Gromacs: source/gmx/**/* -i-Pi: source/ipi/**/* +Python: + - changed-files: + - any-glob-to-any-file: + - deepmd/**/* + - source/tests/**/* +Docs: + - changed-files: + - any-glob-to-any-file: doc/**/* +Examples: + - changed-files: + - any-glob-to-any-file: examples/**/* +Core: + - changed-files: + - any-glob-to-any-file: source/lib/**/* +CUDA: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +ROCM: + - changed-files: + - any-glob-to-any-file: source/lib/src/gpu/**/* +OP: + - changed-files: + - any-glob-to-any-file: source/op/**/* +C++: + - changed-files: + - any-glob-to-any-file: source/api_cc/**/* +C: + - changed-files: + - any-glob-to-any-file: source/api_c/**/* +LAMMPS: + - changed-files: + - any-glob-to-any-file: source/lmp/**/* +Gromacs: + - changed-files: + - any-glob-to-any-file: source/gmx/**/* +i-PI: + - changed-files: + - any-glob-to-any-file: source/ipi/**/* diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 0000000000..382e5db00e --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,34 @@ +changelog: + exclude: + authors: + - app/pre-commit-ci + - app/dependabot + categories: + - title: Breaking Changes + labels: + - "breaking change" + - title: New Features + labels: + - "new feature" + - title: Enhancement + labels: + - enhancement + - title: Documentation + labels: + # automatically added + - Docs + # for docs outside the doc directory + - "other docs" + exclude: + labels: + - build + - bug + - title: Build and release + labels: + - build + - title: Bug fixings + labels: + - bug + - title: Other Changes + labels: + - "*" diff --git a/.github/skills/add-descriptor/SKILL.md b/.github/skills/add-descriptor/SKILL.md new file mode 100644 index 0000000000..8e5b66088a --- /dev/null +++ b/.github/skills/add-descriptor/SKILL.md @@ -0,0 +1,311 @@ +--- +name: add-descriptor +description: Guides through adding a new descriptor type to deepmd-kit. Covers implementing in dpmodel (array-API-compatible), wrapping for JAX/pt_expt backends, hard-coding for PT/PD, registering arguments, and writing all required tests. +license: LGPL-3.0-or-later +compatibility: Requires Python 3.10+, numpy, pytest. Optional backends for full testing (torch, jax, paddle). +metadata: + author: deepmd-kit + version: '2.0' +--- + +# Adding a New Descriptor to deepmd-kit + +Follow these steps in order. Each step lists files to create/modify and patterns to follow. + +## Step 1: Implement in dpmodel + +**Create** `deepmd/dpmodel/descriptor/.py` + +Inherit from `NativeOP` and `BaseDescriptor`. Register with decorators: + +```python +from deepmd.dpmodel import NativeOP +from .base_descriptor import BaseDescriptor + + +@BaseDescriptor.register("your_name") +@BaseDescriptor.register("alias_name") # optional aliases +class DescrptYourName(NativeOP, BaseDescriptor): ... +``` + +Key requirements: + +- `__init__`: initialize cutoff, sel, networks, davg/dstd statistics +- `call(coord_ext, atype_ext, nlist, mapping=None)`: forward pass returning `(descriptor, rot_mat, g2, h2, sw)` +- `serialize() -> dict`: save with `@class`, `type`, `@version`, `@variables` keys +- `deserialize(cls, data)`: reconstruct from dict +- Property/getter methods: `get_rcut`, `get_sel`, `get_dim_out`, `mixed_types`, etc. +- `__getitem__`/`__setitem__` for `davg`/`dstd` access via multiple key aliases + +All dpmodel code **must** use `array_api_compat` for cross-backend compatibility (numpy/torch/jax/paddle). See [references/dpmodel-implementation.md](references/dpmodel-implementation.md) for full method table, array API pitfalls, and utilities. + +**Reference implementations**: + +- Simple: `deepmd/dpmodel/descriptor/se_e2_a.py` +- Three-body: `deepmd/dpmodel/descriptor/se_t.py` +- Attention-based: `deepmd/dpmodel/descriptor/dpa1.py` + +## Step 2: Register + +**Edit** `deepmd/dpmodel/descriptor/__init__.py` — add import and `__all__` entry. + +**Edit** `deepmd/utils/argcheck.py` — register descriptor arguments: + +```python +@descrpt_args_plugin.register("your_name", alias=["alias"], doc="Description") +def descrpt_your_name_args() -> list[Argument]: + return [ + Argument("sel", [list[int], str], optional=True, default="auto", doc=doc_sel), + Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut), + Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth), + Argument( + "neuron", list[int], optional=True, default=[10, 20, 40], doc=doc_neuron + ), + # ... add all constructor parameters + ] +``` + +## Step 3: Wrap for JAX backend + +**Create** `deepmd/jax/descriptor/.py` + +Pattern: `@flax_module` decorator + custom `__setattr__` for attribute conversion. + +```python +from deepmd.dpmodel.descriptor.your_name import DescrptYourName as DescrptYourNameDP +from deepmd.jax.common import ArrayAPIVariable, flax_module, to_jax_array +from deepmd.jax.descriptor.base_descriptor import BaseDescriptor +from deepmd.jax.utils.exclude_mask import PairExcludeMask +from deepmd.jax.utils.network import NetworkCollection + + +@BaseDescriptor.register("your_name") +@flax_module +class DescrptYourName(DescrptYourNameDP): + def __setattr__(self, name, value): + if name in {"davg", "dstd"}: + value = to_jax_array(value) + if value is not None: + value = ArrayAPIVariable(value) + elif name in {"embeddings"}: + if value is not None: + value = NetworkCollection.deserialize(value.serialize()) + elif name == "env_mat": + pass # stateless + elif name == "emask": + value = PairExcludeMask(value.ntypes, value.exclude_types) + return super().__setattr__(name, value) +``` + +For nested sub-components, define wrapper classes bottom-up. See `deepmd/jax/descriptor/dpa1.py` for example. + +**Edit** `deepmd/jax/descriptor/__init__.py` — add import and `__all__` entry. + +## Step 4: Wrap for pt_expt backend + +**Create** `deepmd/pt_expt/descriptor/.py` + +The `@torch_module` decorator handles everything automatically: + +- Auto-generates `forward()` delegating to `call()` (and `forward_lower()` from `call_lower()`) +- Auto-generates `__setattr__` that converts numpy arrays to torch buffers and dpmodel objects to pt_expt modules via a converter registry +- Any unregistered `NativeOP` assigned as an attribute will raise `TypeError` — register it first + +Simple descriptors (no custom sub-components) need only an empty body: + +```python +from deepmd.dpmodel.descriptor.your_name import DescrptYourName as DescrptYourNameDP +from deepmd.pt_expt.common import torch_module +from deepmd.pt_expt.descriptor.base_descriptor import BaseDescriptor + + +@BaseDescriptor.register("your_name") +@torch_module +class DescrptYourName(DescrptYourNameDP): + pass +``` + +Standard dpmodel sub-components (`NetworkCollection`, `EmbeddingNet`, `PairExcludeMask`, `EnvMat`, `TypeEmbedNet`) are pre-registered in `deepmd/pt_expt/utils/` and converted automatically. No `__setattr__` override needed. + +For **custom sub-components** (e.g., a new block class inheriting `NativeOP`), create a separate wrapper file and register bottom-up with `register_dpmodel_mapping`: + +```python +# deepmd/pt_expt/descriptor/your_block.py +from deepmd.dpmodel.descriptor.your_block import YourBlock as YourBlockDP +from deepmd.pt_expt.common import register_dpmodel_mapping, torch_module + + +@torch_module +class YourBlock(YourBlockDP): + pass + + +register_dpmodel_mapping( + YourBlockDP, + lambda v: YourBlock.deserialize(v.serialize()), +) +``` + +Then import this module in `deepmd/pt_expt/descriptor/__init__.py` for its side effect (the registration must happen before the parent descriptor is instantiated). + +Reference: `deepmd/pt_expt/descriptor/se_t_tebd.py` + `se_t_tebd_block.py` + +**Edit** `deepmd/pt_expt/descriptor/__init__.py` — add import and `__all__` entry. + +## Step 5: Hard-code for PT backend (if needed) + +**Create** `deepmd/pt/model/descriptor/.py` + +PT descriptors are fully reimplemented in PyTorch (not wrapping dpmodel). They inherit from `BaseDescriptor` and `torch.nn.Module`. Must implement `forward()`, `serialize()`, `deserialize()`. + +**Edit** `deepmd/pt/model/descriptor/__init__.py` — add import. + +Reference: `deepmd/pt/model/descriptor/se_a.py` + +## Step 6: Hard-code for TF backend (if needed) + +**Create** `deepmd/tf/descriptor/.py` + +TF descriptors are fully reimplemented in TensorFlow. They inherit from `BaseDescriptor` and implement the TF computation graph. + +**Edit** `deepmd/tf/descriptor/__init__.py` — add import. + +Reference: `deepmd/tf/descriptor/se_a.py` + +## Step 7: Hard-code for PD backend (if needed) + +Same as PT but using Paddle. Inherit from `BaseDescriptor` and `paddle.nn.Layer`. + +**Edit** `deepmd/pd/model/descriptor/__init__.py` — add import. + +Reference: `deepmd/pd/model/descriptor/se_a.py` + +## Step 8: Write tests + +Eight test categories. See [references/test-patterns.md](references/test-patterns.md) for full code templates. + +pt_expt tests use `pytest.mark.parametrize` (not `itertools.product`), do not inherit from `unittest.TestCase`, and use `setup_method` (not `setUp`). + +| Test | File | Purpose | +| --------------------- | -------------------------------------------------------------- | ------------------------------------------------- | +| 8a. dpmodel | `source/tests/common/dpmodel/test_descriptor_.py` | Serialize/deserialize round-trip | +| 8b. pt_expt | `source/tests/pt_expt/descriptor/test_.py` | Consistency + exportable + make_fx (float64 only) | +| 8c. PT | `source/tests/pt/model/test_descriptor_.py` | PT hard-coded tests (if applicable) | +| 8d. PD | `source/tests/pd/model/test_descriptor_.py` | PD hard-coded tests (if applicable) | +| 8e. array_api_strict | `source/tests/array_api_strict/descriptor/.py` | Wrapper for consistency tests | +| 8f. Universal dpmodel | `source/tests/universal/dpmodel/descriptor/test_descriptor.py` | Add parametrized entry | +| 8g. Universal PT | `source/tests/universal/pt/descriptor/test_descriptor.py` | Add parametrized entry | +| 8h. Consistency | `source/tests/consistent/descriptor/test_.py` | Cross-backend + API consistency | + +## Step 9: Write documentation + +**Create** `doc/model/.md` + +Each descriptor needs a documentation page in `doc/model/`. Use MyST Markdown format with Sphinx extensions. List supported backends using icon substitutions. + +Template: + +````markdown +# Descriptor `"your_name"` {{ pytorch_icon }} {{ dpmodel_icon }} + +:::{note} +**Supported backends**: PyTorch {{ pytorch_icon }}, DP {{ dpmodel_icon }} +::: + +Brief description of what the descriptor is and its theoretical motivation. + +## Theory + +Mathematical formulation using LaTeX: + +```math + \mathcal{D}^i = ... +``` + +## Instructions + +Example JSON configuration: + +```json +"descriptor": { + "type": "your_name", + "sel": [46, 92], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [10, 20, 40], + "resnet_dt": false, + "seed": 1 +} +``` + +Explain key parameters and link to the argument schema using `{ref}` directives, +e.g. `{ref}rcut `. +```` + +Available backend icons: `{{ tensorflow_icon }}`, `{{ pytorch_icon }}`, `{{ jax_icon }}`, `{{ paddle_icon }}`, `{{ dpmodel_icon }}`. Only list backends that actually support this descriptor. + +**Edit** `doc/model/index.rst` — add the new page to the `toctree`: + +```rst +.. toctree:: + :maxdepth: 1 + + ... + +``` + +**Reference docs**: `doc/model/train-se-e2-r.md` (simple), `doc/model/dpa2.md` (modern) + +## Verification + +```bash +# dpmodel self-consistency +python -m pytest source/tests/common/dpmodel/test_descriptor_.py -v + +# pt_expt unit tests +python -m pytest source/tests/pt_expt/descriptor/test_.py -v + +# Cross-backend consistency +python -m pytest source/tests/consistent/descriptor/test_.py -v + +# PT/PD unit tests (if hard-coded) +python -m pytest source/tests/pt/model/test_descriptor_.py -v +python -m pytest source/tests/pd/model/test_descriptor_.py -v + +# Quick smoke test +python -c " +from deepmd.dpmodel.descriptor import DescrptYourName +d = DescrptYourName(rcut=6.0, rcut_smth=1.8, sel=[20, 20]) +d2 = DescrptYourName.deserialize(d.serialize()) +print('Round-trip OK:', d.get_dim_out() == d2.get_dim_out()) +" +``` + +## Files summary + +| Step | Action | File | +| ---- | ------ | -------------------------------------------------------------- | +| 1 | Create | `deepmd/dpmodel/descriptor/.py` | +| 2 | Edit | `deepmd/dpmodel/descriptor/__init__.py` | +| 2 | Edit | `deepmd/utils/argcheck.py` | +| 3 | Create | `deepmd/jax/descriptor/.py` | +| 3 | Edit | `deepmd/jax/descriptor/__init__.py` | +| 4 | Create | `deepmd/pt_expt/descriptor/.py` | +| 4 | Edit | `deepmd/pt_expt/descriptor/__init__.py` | +| 5 | Create | `deepmd/pt/model/descriptor/.py` (if needed) | +| 5 | Edit | `deepmd/pt/model/descriptor/__init__.py` (if needed) | +| 6 | Create | `deepmd/tf/descriptor/.py` (if needed) | +| 6 | Edit | `deepmd/tf/descriptor/__init__.py` (if needed) | +| 7 | Create | `deepmd/pd/model/descriptor/.py` (if needed) | +| 7 | Edit | `deepmd/pd/model/descriptor/__init__.py` (if needed) | +| 8a | Create | `source/tests/common/dpmodel/test_descriptor_.py` | +| 8b | Create | `source/tests/pt_expt/descriptor/test_.py` | +| 8c | Create | `source/tests/pt/model/test_descriptor_.py` (if PT) | +| 8d | Create | `source/tests/pd/model/test_descriptor_.py` (if PD) | +| 8e | Create | `source/tests/array_api_strict/descriptor/.py` | +| 8e | Edit | `source/tests/array_api_strict/descriptor/__init__.py` | +| 8f | Edit | `source/tests/universal/dpmodel/descriptor/test_descriptor.py` | +| 8g | Edit | `source/tests/universal/pt/descriptor/test_descriptor.py` | +| 8h | Create | `source/tests/consistent/descriptor/test_.py` | +| 9 | Create | `doc/model/.md` | +| 9 | Edit | `doc/model/index.rst` | diff --git a/.github/skills/add-descriptor/references/dpmodel-implementation.md b/.github/skills/add-descriptor/references/dpmodel-implementation.md new file mode 100644 index 0000000000..301dc7c752 --- /dev/null +++ b/.github/skills/add-descriptor/references/dpmodel-implementation.md @@ -0,0 +1,119 @@ +# dpmodel Implementation Details + +## Required methods + +| Method | Purpose | +| ------------------------------------------------------- | ------------------------------------------------------------ | +| `__init__(self, rcut, rcut_smth, sel, ...)` | Initialize cutoff, sel, networks, statistics | +| `call(self, coord_ext, atype_ext, nlist, mapping=None)` | Forward pass, returns `(descriptor, rot_mat, g2, h2, sw)` | +| `serialize(self) -> dict` | Save to dict with `@class`, `type`, `@version`, `@variables` | +| `deserialize(cls, data) -> Self` | Reconstruct from dict | +| `get_rcut() -> float` | Cutoff radius | +| `get_rcut_smth() -> float` | Smooth cutoff | +| `get_sel() -> list[int]` | Neighbor selection per type | +| `get_ntypes() -> int` | Number of atom types | +| `get_type_map() -> list[str]` | Type map | +| `get_dim_out() -> int` | Output descriptor dimension | +| `get_dim_emb() -> int` | Embedding dimension | +| `get_env_protection() -> float` | Environment protection value | +| `mixed_types() -> bool` | Whether descriptor mixes types | +| `has_message_passing() -> bool` | Whether it uses message passing | +| `need_sorted_nlist_for_lower() -> bool` | Whether nlist must be sorted | +| `compute_input_stats(merged, path)` | Compute davg/dstd from data | +| `set_stat_mean_and_stddev(mean, stddev)` | Set statistics | +| `get_stat_mean_and_stddev()` | Get statistics | +| `change_type_map(type_map, ...)` | Handle type map changes | +| `share_params(base_class, shared_level, resume)` | Parameter sharing | +| `update_sel(cls, train_data, type_map, local_jdata)` | Auto-update sel | + +## Statistics handling + +Support both naming conventions via `__getitem__`/`__setitem__`: + +```python +def __setitem__(self, key, value): + if key in ("avg", "data_avg", "davg"): + self.davg = value + elif key in ("std", "data_std", "dstd"): + self.dstd = value + else: + raise KeyError(key) + + +def __getitem__(self, key): + if key in ("avg", "data_avg", "davg"): + return self.davg + elif key in ("std", "data_std", "dstd"): + return self.dstd + else: + raise KeyError(key) +``` + +## Key utilities + +| Utility | Import from | Purpose | +| ------------------- | ----------------------------------- | ------------------------------ | +| `EnvMat` | `deepmd.dpmodel.utils.env_mat` | Environment matrix computation | +| `EmbeddingNet` | `deepmd.dpmodel.utils.network` | Embedding neural network | +| `NetworkCollection` | `deepmd.dpmodel.utils.network` | Manages type-indexed networks | +| `PairExcludeMask` | `deepmd.dpmodel.utils.exclude_mask` | Type exclusion pairs | +| `EnvMatStatSe` | `deepmd.dpmodel.utils.env_mat_stat` | Statistics computation | + +## Array API compatibility (CRITICAL) + +All dpmodel code must use `array_api_compat` to work across numpy/torch/jax/paddle: + +```python +import array_api_compat + +xp = array_api_compat.array_namespace(coord_ext) +device = array_api_compat.device(coord_ext) +``` + +To check whether a method is within the [array API standard](https://data-apis.org/array-api/), use the following command (query `zeros_like` for example): + +```sh +uvx --from array-api-strict python -c "import array_api_strict,pydoc;print(pydoc.render_doc(array_api_strict.zeros_like))" +``` + +If the method exists, its doc will be printed; otherwise, `AttributeError` is thrown. + +For methods of an `Array` class, call (query `Array.shape` for example): + +```sh +uvx --from array-api-strict python -c "import array_api_strict,pydoc;print(pydoc.render_doc(array_api_strict._array_object.Array.shape))" +``` + +Rules: + +1. **Never use `np.einsum` on arrays that might be torch tensors** — torch disables `__array_function__` so `np.einsum` fails on tensors with `requires_grad=True`. Use `xp.sum` with broadcasting: + + ```python + # BAD: np.einsum("lni,lnj->lij", gg, tr) + # GOOD: xp.sum(gg[:, :, :, None] * tr[:, :, None, :], axis=1) + ``` + +1. **`xp.zeros`/`xp.ones` must include `device=`** — omitting device can trigger CUDA init or create tensors on wrong device: + + ```python + # BAD: xp.zeros([2, 1], dtype=nlist.dtype) + # GOOD: xp.zeros([2, 1], dtype=nlist.dtype, device=array_api_compat.device(nlist)) + ``` + +1. **`xp.split` with `axis=` keyword doesn't work for torch** — use slicing: + + ```python + # BAD: g2, h2 = xp.split(dmatrix, [1], axis=-1) + # GOOD: g2, h2 = dmatrix[..., :1], dmatrix[..., 1:] + ``` + +1. **`xp_take_along_axis` indices must be int64 for torch**. + +1. **Don't maintain separate ArrayAPI subclasses** — dpmodel classes should be array_api compatible directly. + +1. **Boolean fancy indexing (`arr[mask]`) is not array-API compatible** — use mask multiplication: + + ```python + # BAD: gr[ti_mask] += gr_tmp + # GOOD: gr += gr_tmp * xp.astype(mask[:, None, None], gr_tmp.dtype) + ``` diff --git a/.github/skills/add-descriptor/references/test-patterns.md b/.github/skills/add-descriptor/references/test-patterns.md new file mode 100644 index 0000000000..cc1145b820 --- /dev/null +++ b/.github/skills/add-descriptor/references/test-patterns.md @@ -0,0 +1,462 @@ +# Test Patterns for Descriptors + +## 8a. dpmodel self-consistency test + +**Create** `source/tests/common/dpmodel/test_descriptor_.py` + +```python +import unittest + +import numpy as np + +from deepmd.dpmodel.descriptor import DescrptYourName + +from ...seed import GLOBAL_SEED +from .case_single_frame_with_nlist import TestCaseSingleFrameWithNlist + + +class TestDescrptYourName(unittest.TestCase, TestCaseSingleFrameWithNlist): + def setUp(self) -> None: + TestCaseSingleFrameWithNlist.setUp(self) + + def test_self_consistency(self) -> None: + rng = np.random.default_rng(GLOBAL_SEED) + nf, nloc, nnei = self.nlist.shape + davg = rng.normal( + size=(self.nt, nnei, 4) + ) # 4 for full env mat, 1 for radial-only + dstd = 0.1 + np.abs( + rng.normal(size=(self.nt, nnei, 4)) + ) # 4 for full env mat, 1 for radial-only + + em0 = DescrptYourName(self.rcut, self.rcut_smth, self.sel) + em0.davg = davg + em0.dstd = dstd + em1 = DescrptYourName.deserialize(em0.serialize()) + mm0 = em0.call(self.coord_ext, self.atype_ext, self.nlist) + mm1 = em1.call(self.coord_ext, self.atype_ext, self.nlist) + for ii in [0, 4]: # descriptor and sw + np.testing.assert_allclose(mm0[ii], mm1[ii]) +``` + +Reference: `source/tests/common/dpmodel/test_descriptor_se_t.py` + +## 8b. pt_expt unit tests + +**Create** `source/tests/pt_expt/descriptor/test_.py` + +Three test types: consistency, exportable, make_fx. Use `pytest.mark.parametrize` with trailing comments explaining each parameter. Do **not** inherit from `unittest.TestCase`. Use `setup_method` instead of `setUp`. + +```python +import numpy as np +import pytest +import torch +from torch.fx.experimental.proxy_tensor import make_fx + +from deepmd.dpmodel.descriptor import DescrptYourName as DPDescrptYourName +from deepmd.pt_expt.descriptor.your_name import DescrptYourName +from deepmd.pt_expt.utils import env +from deepmd.pt_expt.utils.env import PRECISION_DICT + +from ...pt.model.test_env_mat import TestCaseSingleFrameWithNlist +from ...pt.model.test_mlp import get_tols +from ...seed import GLOBAL_SEED + + +class TestDescrptYourName(TestCaseSingleFrameWithNlist): + def setup_method(self) -> None: + TestCaseSingleFrameWithNlist.setUp(self) + self.device = env.DEVICE + + @pytest.mark.parametrize("idt", [False, True]) # resnet_dt + @pytest.mark.parametrize("prec", ["float64", "float32"]) # precision + def test_consistency(self, idt, prec) -> None: + rng = np.random.default_rng(GLOBAL_SEED) + _, _, nnei = self.nlist.shape + davg = rng.normal(size=(self.nt, nnei, 4)) + dstd = rng.normal(size=(self.nt, nnei, 4)) + dstd = 0.1 + np.abs(dstd) + + dtype = PRECISION_DICT[prec] + rtol, atol = get_tols(prec) + err_msg = f"idt={idt} prec={prec}" + dd0 = DescrptYourName( + self.rcut, + self.rcut_smth, + self.sel, + precision=prec, + resnet_dt=idt, + seed=GLOBAL_SEED, + ).to(self.device) + dd0.davg = torch.tensor(davg, dtype=dtype, device=self.device) + dd0.dstd = torch.tensor(dstd, dtype=dtype, device=self.device) + rd0, _, _, _, sw0 = dd0( + torch.tensor(self.coord_ext, dtype=dtype, device=self.device), + torch.tensor(self.atype_ext, dtype=int, device=self.device), + torch.tensor(self.nlist, dtype=int, device=self.device), + ) + # Serialize/deserialize round-trip + dd1 = DescrptYourName.deserialize(dd0.serialize()) + rd1, _, _, _, sw1 = dd1( + torch.tensor(self.coord_ext, dtype=dtype, device=self.device), + torch.tensor(self.atype_ext, dtype=int, device=self.device), + torch.tensor(self.nlist, dtype=int, device=self.device), + ) + np.testing.assert_allclose( + rd0.detach().cpu().numpy(), + rd1.detach().cpu().numpy(), + rtol=rtol, + atol=atol, + err_msg=err_msg, + ) + np.testing.assert_allclose( + sw0.detach().cpu().numpy(), + sw1.detach().cpu().numpy(), + rtol=rtol, + atol=atol, + err_msg=err_msg, + ) + # Permutation equivariance + np.testing.assert_allclose( + rd0.detach().cpu().numpy()[0][self.perm[: self.nloc]], + rd0.detach().cpu().numpy()[1], + rtol=rtol, + atol=atol, + err_msg=err_msg, + ) + # Compare with dpmodel + dd2 = DPDescrptYourName.deserialize(dd0.serialize()) + rd2, _, _, _, sw2 = dd2.call( + self.coord_ext, + self.atype_ext, + self.nlist, + ) + np.testing.assert_allclose( + rd1.detach().cpu().numpy(), rd2, rtol=rtol, atol=atol, err_msg=err_msg + ) + np.testing.assert_allclose( + sw1.detach().cpu().numpy(), sw2, rtol=rtol, atol=atol, err_msg=err_msg + ) + + @pytest.mark.parametrize("idt", [False, True]) # resnet_dt + @pytest.mark.parametrize("prec", ["float64", "float32"]) # precision + def test_exportable(self, idt, prec) -> None: + rng = np.random.default_rng(GLOBAL_SEED) + _, _, nnei = self.nlist.shape + davg = rng.normal(size=(self.nt, nnei, 4)) + dstd = rng.normal(size=(self.nt, nnei, 4)) + dstd = 0.1 + np.abs(dstd) + + dtype = PRECISION_DICT[prec] + dd0 = DescrptYourName( + self.rcut, + self.rcut_smth, + self.sel, + precision=prec, + resnet_dt=idt, + seed=GLOBAL_SEED, + ).to(self.device) + dd0.davg = torch.tensor(davg, dtype=dtype, device=self.device) + dd0.dstd = torch.tensor(dstd, dtype=dtype, device=self.device) + dd0 = dd0.eval() + inputs = ( + torch.tensor(self.coord_ext, dtype=dtype, device=self.device), + torch.tensor(self.atype_ext, dtype=int, device=self.device), + torch.tensor(self.nlist, dtype=int, device=self.device), + ) + torch.export.export(dd0, inputs) + + @pytest.mark.parametrize("prec", ["float64"]) # precision — float64 only + def test_make_fx(self, prec) -> None: + """Verify make_fx traces forward + autograd (for forward_lower).""" + rng = np.random.default_rng(GLOBAL_SEED) + _, _, nnei = self.nlist.shape + davg = rng.normal(size=(self.nt, nnei, 4)) + dstd = rng.normal(size=(self.nt, nnei, 4)) + dstd = 0.1 + np.abs(dstd) + + dtype = PRECISION_DICT[prec] + rtol, atol = get_tols(prec) + dd0 = DescrptYourName( + self.rcut, + self.rcut_smth, + self.sel, + precision=prec, + seed=GLOBAL_SEED, + ).to(self.device) + dd0.davg = torch.tensor(davg, dtype=dtype, device=self.device) + dd0.dstd = torch.tensor(dstd, dtype=dtype, device=self.device) + dd0 = dd0.eval() + + coord_ext = torch.tensor(self.coord_ext, dtype=dtype, device=self.device) + atype_ext = torch.tensor(self.atype_ext, dtype=int, device=self.device) + nlist = torch.tensor(self.nlist, dtype=int, device=self.device) + + def fn(coord_ext, atype_ext, nlist): + coord_ext = coord_ext.detach().requires_grad_(True) + rd = dd0(coord_ext, atype_ext, nlist)[0] + grad = torch.autograd.grad(rd.sum(), coord_ext, create_graph=False)[0] + return rd, grad + + rd_eager, grad_eager = fn(coord_ext, atype_ext, nlist) + traced = make_fx(fn)(coord_ext, atype_ext, nlist) + rd_traced, grad_traced = traced(coord_ext, atype_ext, nlist) + np.testing.assert_allclose( + rd_eager.detach().cpu().numpy(), + rd_traced.detach().cpu().numpy(), + rtol=rtol, + atol=atol, + ) + np.testing.assert_allclose( + grad_eager.detach().cpu().numpy(), + grad_traced.detach().cpu().numpy(), + rtol=rtol, + atol=atol, + ) +``` + +Reference: `source/tests/pt_expt/descriptor/test_se_t.py` + +## 8e. array_api_strict wrapper + +**Create** `source/tests/array_api_strict/descriptor/.py` + +```python +from typing import Any + +from deepmd.dpmodel.descriptor.your_name import DescrptYourName as DescrptYourNameDP + +from ..common import to_array_api_strict_array +from ..utils.exclude_mask import PairExcludeMask +from ..utils.network import NetworkCollection +from .base_descriptor import BaseDescriptor + + +@BaseDescriptor.register("your_name") +class DescrptYourName(DescrptYourNameDP): + def __setattr__(self, name: str, value: Any) -> None: + if name in {"dstd", "davg"}: + value = to_array_api_strict_array(value) + elif name in {"embeddings"}: + if value is not None: + value = NetworkCollection.deserialize(value.serialize()) + elif name == "env_mat": + pass + elif name == "emask": + value = PairExcludeMask(value.ntypes, value.exclude_types) + return super().__setattr__(name, value) +``` + +**Edit** `source/tests/array_api_strict/descriptor/__init__.py` — add import and `__all__` entry. + +Reference: `source/tests/array_api_strict/descriptor/se_e2_r.py` + +## 8h. Cross-backend consistency test + +**Create** `source/tests/consistent/descriptor/test_.py` + +Two test classes: one for numerical consistency (`CommonTest`), one for API consistency (`DescriptorAPITest`). + +```python +import unittest +from typing import Any + +import numpy as np + +from deepmd.dpmodel.descriptor.your_name import DescrptYourName as DescrptYourNameDP +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +from deepmd.utils.argcheck import descrpt_your_name_args + +from ..common import ( + INSTALLED_ARRAY_API_STRICT, + INSTALLED_JAX, + INSTALLED_PT, + INSTALLED_PT_EXPT, + INSTALLED_TF, + CommonTest, + parameterized, +) +from .common import DescriptorAPITest, DescriptorTest + +# Conditional imports for each backend. +# Omit any backend that has no implementation for this descriptor. +if INSTALLED_PT: + from deepmd.pt.model.descriptor.your_name import DescrptYourName as YourNamePT +else: + YourNamePT = None +if INSTALLED_PT_EXPT: + from deepmd.pt_expt.descriptor.your_name import DescrptYourName as YourNamePTExpt +else: + YourNamePTExpt = None +if INSTALLED_TF: + from deepmd.tf.descriptor.your_name import DescrptYourName as YourNameTF +else: + YourNameTF = None +if INSTALLED_JAX: + from deepmd.jax.descriptor.your_name import DescrptYourName as YourNameJAX +else: + YourNameJAX = None +if INSTALLED_ARRAY_API_STRICT: + from ...array_api_strict.descriptor.your_name import ( + DescrptYourName as YourNameStrict, + ) +else: + YourNameStrict = None + + +@parameterized( + (True, False), # resnet_dt + ("float32", "float64"), # precision +) +class TestYourName(CommonTest, DescriptorTest, unittest.TestCase): + @property + def data(self) -> dict: + resnet_dt, precision = self.param + return { + "sel": [9, 10], + "rcut_smth": 5.80, + "rcut": 6.00, + "neuron": [6, 12, 24], + "resnet_dt": resnet_dt, + "precision": precision, + "seed": 1145141919810, + "activation_function": "relu", + } + + @property + def skip_pt(self) -> bool: + return CommonTest.skip_pt + + @property + def skip_pt_expt(self) -> bool: + # Add parameter-based skips here if needed, e.g.: + # return (not some_supported_param) or CommonTest.skip_pt_expt + return CommonTest.skip_pt_expt + + @property + def skip_dp(self) -> bool: + return CommonTest.skip_dp + + @property + def skip_jax(self) -> bool: + return CommonTest.skip_jax + + @property + def skip_array_api_strict(self) -> bool: + return CommonTest.skip_array_api_strict + + tf_class = YourNameTF + dp_class = DescrptYourNameDP + pt_class = YourNamePT + pt_expt_class = YourNamePTExpt + jax_class = YourNameJAX + array_api_strict_class = YourNameStrict + args = descrpt_your_name_args() + + def setUp(self) -> None: + CommonTest.setUp(self) + self.ntypes = 2 + self.coords = np.array( + [ + 12.83, + 2.56, + 2.18, + 12.09, + 2.87, + 2.74, + 0.25, + 3.32, + 1.68, + 3.36, + 3.00, + 1.81, + 3.51, + 2.51, + 2.60, + 4.27, + 3.22, + 1.56, + ], + dtype=GLOBAL_NP_FLOAT_PRECISION, + ) + self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=np.int32) + self.box = np.array( + [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0], + dtype=GLOBAL_NP_FLOAT_PRECISION, + ) + self.natoms = np.array([6, 6, 2, 4], dtype=np.int32) + + # Implement eval_* methods using self.eval_*_descriptor() helpers. + # For mixed_types descriptors (dpa1, dpa2, dpa3, se_atten_v2), + # pass mixed_types=True to each eval call. + def eval_dp(self, dp_obj: Any) -> Any: + return self.eval_dp_descriptor( + dp_obj, self.natoms, self.coords, self.atype, self.box + ) + + def eval_pt(self, pt_obj: Any) -> Any: + return self.eval_pt_descriptor( + pt_obj, self.natoms, self.coords, self.atype, self.box + ) + + def eval_pt_expt(self, pt_expt_obj: Any) -> Any: + return self.eval_pt_expt_descriptor( + pt_expt_obj, self.natoms, self.coords, self.atype, self.box + ) + + def eval_jax(self, jax_obj: Any) -> Any: + return self.eval_jax_descriptor( + jax_obj, self.natoms, self.coords, self.atype, self.box + ) + + def eval_array_api_strict(self, obj: Any) -> Any: + return self.eval_array_api_strict_descriptor( + obj, self.natoms, self.coords, self.atype, self.box + ) + + def extract_ret(self, ret: Any, backend: Any) -> tuple: + return (ret[0],) + + @property + def rtol(self) -> float: + _, precision = self.param + return 1e-10 if precision == "float64" else 1e-4 + + @property + def atol(self) -> float: + _, precision = self.param + return 1e-10 if precision == "float64" else 1e-4 + + +@parameterized( + ("float64",), # precision — API test only needs one precision +) +class TestYourNameAPI(DescriptorAPITest, unittest.TestCase): + @property + def data(self) -> dict: + (precision,) = self.param + return { + "sel": [9, 10], + "rcut_smth": 5.80, + "rcut": 6.00, + "neuron": [6, 12, 24], + "precision": precision, + "seed": 1145141919810, + } + + dp_class = DescrptYourNameDP + pt_class = YourNamePT + pt_expt_class = YourNamePTExpt + args = descrpt_your_name_args() + ntypes = 2 + + @property + def skip_pt(self) -> bool: + return not INSTALLED_PT + + @property + def skip_pt_expt(self) -> bool: + return not INSTALLED_PT_EXPT +``` + +Reference: `source/tests/consistent/descriptor/test_se_t.py` diff --git a/.github/skills/debug-gradient-flow/SKILL.md b/.github/skills/debug-gradient-flow/SKILL.md new file mode 100644 index 0000000000..f85be554a8 --- /dev/null +++ b/.github/skills/debug-gradient-flow/SKILL.md @@ -0,0 +1,182 @@ +--- +name: debug-gradient-flow +description: Diagnose gradient flow issues in training, especially for compiled models (torch.compile/make_fx). Systematically isolates which loss components (energy, force, virial) contribute gradients to which parameters, and identifies where the gradient chain breaks. +license: LGPL-3.0-or-later +metadata: + author: deepmd-kit + version: '1.0' +--- + +# Debugging Gradient Flow in Training + +Use this method when a loss component (force, virial, energy) does not decrease during training, or when compiled model training diverges from uncompiled training. + +## When to use + +- A loss term (e.g. `rmse_f`, `rmse_v`) stays flat or NaN during training +- Compiled training (`enable_compile=True`) behaves differently from uncompiled +- After adding a new loss component or model output +- After changes to `make_fx` tracing, `torch.compile`, or `autograd.grad` code paths + +## Method: Per-component gradient isolation + +The core technique: **zero out all loss terms except one**, run `loss.backward()`, and count which model parameters receive non-zero gradients. Compare across uncompiled and compiled paths to pinpoint where gradients are lost. + +### Step 1: Write a gradient probe script + +Create a script that constructs a trainer, injects labels if needed, and reports per-parameter gradient status: + +```python +def check_grad(trainer, label_overrides=None): + trainer.wrapper.train() + trainer.optimizer.zero_grad(set_to_none=True) + inp, lab = trainer.get_data(is_train=True) + lr = trainer.scheduler.get_last_lr()[0] + + # Override labels to isolate a single loss component + if label_overrides: + lab.update(label_overrides) + + _, loss, more_loss = trainer.wrapper(**inp, cur_lr=lr, label=lab) + loss.backward() + + status = {} + for name, p in trainer.wrapper.named_parameters(): + if p.requires_grad: + has_grad = p.grad is not None and p.grad.abs().sum() > 0 + status[name] = has_grad + return status +``` + +### Step 2: Run for each loss component in isolation + +Test each loss component separately by zeroing out the others: + +```python +scenarios = { + "energy only": {"find_force": 0.0, "find_virial": 0.0}, + "force only": {"find_energy": 0.0, "find_virial": 0.0}, + "virial only": { + "find_energy": 0.0, + "find_force": 0.0, + "virial": torch.randn(nframes, 9, ...), # inject if data lacks virial + "find_virial": 1.0, + }, + "all losses": { + "virial": torch.randn(nframes, 9, ...), + "find_virial": 1.0, + }, +} +``` + +If training data lacks virial labels, inject synthetic ones — the numerical values don't matter, only gradient flow matters. + +### Step 3: Compare compiled vs uncompiled + +Run each scenario for both compiled and uncompiled trainers. Present results as a table: + +``` + Uncompiled Compiled +energy only: 22/22 22/22 +force only: 20/22 16/22 <-- problem +virial only: 20/22 16/22 <-- problem +all losses: 22/22 22/22 <-- OK in practice +``` + +Key interpretations: + +- **Same count, both paths**: gradient flow is correct +- **Compiled < Uncompiled**: `make_fx` or `torch.compile` breaks some gradient paths +- **0 grads in compiled**: catastrophic failure (e.g. wrong `create_graph`, wrong backend) +- **"all losses" is OK but isolated isn't**: the missing grads are covered by other loss terms; may be acceptable + +### Step 4: Identify affected parameters + +When compiled has fewer grads, print the per-parameter diff: + +```python +print(f"{'Parameter':<60} {'Uncompiled':>10} {'Compiled':>10}") +for name in sorted(status_uncompiled): + uc = "GRAD" if status_uncompiled[name] else "-" + cc = "GRAD" if status_compiled[name] else "-" + marker = " <-- DIFF" if uc != cc else "" + print(f"{name:<60} {uc:>10} {cc:>10}{marker}") +``` + +This tells you exactly which layers lose gradients and helps locate the broken link in the computation graph. + +### Step 5: Bisect the cause + +If compiled has fewer grads, test these layers in order: + +| Layer | What to try | What it tests | +| ------------------------------------------------ | ------------------------------------------------------- | ------------------------------------------------------ | +| `make_fx` only (no `torch.compile`) | Replace `torch.compile(traced, ...)` with just `traced` | Is `make_fx` the problem or `torch.compile`? | +| Different `torch.compile` backends | Try `eager`, `aot_eager`, `inductor` | Which backend breaks gradients? | +| `model.train()` vs `model.eval()` during tracing | Toggle training mode before `make_fx` | Does `create_graph=self.training` get the wrong value? | +| `coord.requires_grad_(True)` placement | Check if coord has grad before entering compiled graph | Is the autograd entry point correct? | + +```python +# Test make_fx only (no torch.compile) +traced = make_fx(fn)(ext_coord, ext_atype, nlist, mapping, fparam, aparam) +# Use traced directly instead of torch.compile(traced) + +# Test different backends +for backend in ["eager", "aot_eager", "inductor"]: + compiled = torch.compile(traced, backend=backend, dynamic=False) + # ... run gradient check +``` + +## Common root causes + +### 1. `create_graph=False` during tracing + +**Symptom**: force/virial loss doesn't decrease; 0 params get grad from force/virial loss. + +**Cause**: `model.eval()` before `make_fx` tracing makes `create_graph=self.training` evaluate to `False`. The `autograd.grad` that computes force is traced without graph creation, so the force tensor is detached from model parameters. + +**Fix**: `model.train()` before `make_fx` tracing. + +**Location**: `_trace_and_compile` in `deepmd/pt_expt/train/training.py` + +### 2. `torch.compile` inductor backend kills second-order gradients + +**Symptom**: force/virial loss doesn't decrease; 0 params get grad with inductor, but `eager`/`aot_eager` work fine. + +**Cause**: The inductor backend's graph lowering doesn't support backward through `make_fx`-decomposed `autograd.grad` ops. + +**Fix**: Default to `aot_eager` backend. + +### 3. Ghost force contributions discarded + +**Symptom**: force values differ between compiled and uncompiled models. + +**Cause**: Using `extended_force[:, :nloc, :]` (slice) instead of scatter-summing ghost atom contributions back to local atoms via `mapping`. + +**Fix**: `torch.zeros(...).scatter_add_(1, mapping_idx, extended_force[:, :actual_nall, :])` + +### 4. Virial RMSE normalization mismatch + +**Symptom**: `rmse_v` values differ between backends by a factor of `natoms`. + +**Cause**: dpmodel `rmse_v = sqrt(l2_virial_loss)` missing `* atom_norm` normalization that other backends apply. + +**Fix**: `rmse_v = sqrt(l2_virial_loss) * atom_norm` + +## Verification + +After fixing, always verify: + +1. **Gradient count matches**: uncompiled and compiled should have the same number of params with grad for each isolated loss component +1. **Numerical consistency**: compiled model energy/force/virial should match uncompiled to float precision (`atol=1e-10, rtol=1e-10`) +1. **Loss decreases**: run a few training steps and verify `rmse_f` / `rmse_v` actually decrease +1. **Regression test**: add a test that catches the bug by reverting the fix and confirming the test fails + +```bash +# Run compiled consistency test +python -m pytest source/tests/pt_expt/test_training.py::TestCompiledConsistency -v +# Run loss consistency test +python -m pytest source/tests/consistent/loss/test_ener.py -v +# Run full training smoke test +python -m pytest source/tests/pt_expt/test_training.py -v +``` diff --git a/.github/skills/debug-gradient-flow/references/gradient-probe-script.md b/.github/skills/debug-gradient-flow/references/gradient-probe-script.md new file mode 100644 index 0000000000..bd932bd4a5 --- /dev/null +++ b/.github/skills/debug-gradient-flow/references/gradient-probe-script.md @@ -0,0 +1,303 @@ +# Gradient Probe Script + +Complete, copy-pasteable script for diagnosing gradient flow issues. Adapt `make_config` for the model/loss you are testing. + +```python +"""Gradient flow diagnostic for deepmd-kit training. + +Tests each loss component in isolation across compiled/uncompiled paths. +Prints per-parameter gradient status to identify exactly where gradients +are lost. + +Usage: + cd ~/research/deepmodeling/deepmd-kit/source + python /tmp/gradient_probe.py +""" + +import os +import tempfile +from collections import defaultdict + +import torch + +from deepmd.pt_expt.entrypoints.main import get_trainer +from deepmd.utils.argcheck import normalize +from deepmd.utils.compat import update_deepmd_input + +# Adapt this path to your training data +EXAMPLE_DIR = os.path.join( + os.path.expanduser("~"), + "research/deepmodeling/deepmd-kit/source/examples/water", +) + + +def make_config(data_dir, enable_compile=False): + """Build a minimal config for gradient probing. + + Adapt this function for the model architecture and loss you are testing. + Key: enable all loss terms (e, f, v) so we can selectively zero them. + """ + config = { + "model": { + "type_map": ["O", "H"], + "descriptor": { + "type": "se_e2_a", + "sel": [6, 12], + "rcut_smth": 0.50, + "rcut": 3.00, + "neuron": [8, 16], + "resnet_dt": False, + "axis_neuron": 4, + "type_one_side": True, + "seed": 1, + }, + "fitting_net": { + "neuron": [16, 16], + "resnet_dt": True, + "seed": 1, + }, + "data_stat_nbatch": 1, + }, + "learning_rate": { + "type": "exp", + "decay_steps": 500, + "start_lr": 0.001, + "stop_lr": 3.51e-8, + }, + "loss": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 1.0, + "limit_pref_v": 1.0, + }, + "training": { + "training_data": { + "systems": [os.path.join(data_dir, "data_0")], + "batch_size": 1, + }, + "validation_data": { + "systems": [os.path.join(data_dir, "data_3")], + "batch_size": 1, + "numb_btch": 1, + }, + "numb_steps": 1, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 5, + "save_freq": 1, + }, + } + if enable_compile: + config["training"]["enable_compile"] = True + return config + + +def run_and_get_grads(trainer, label_overrides=None): + """Forward + backward, return per-parameter gradient status.""" + trainer.wrapper.train() + trainer.optimizer.zero_grad(set_to_none=True) + inp, lab = trainer.get_data(is_train=True) + lr = trainer.scheduler.get_last_lr()[0] + + if label_overrides: + for k, v in label_overrides.items(): + if callable(v): + lab[k] = v(inp, lab) + else: + lab[k] = v + + _, loss, more_loss = trainer.wrapper(**inp, cur_lr=lr, label=lab) + loss.backward() + + status = {} + for name, p in trainer.wrapper.named_parameters(): + if p.requires_grad: + has_grad = p.grad is not None and p.grad.abs().sum().item() > 0 + status[name] = has_grad + return status, loss.item() + + +def make_virial_injector(dtype, device): + """Return a callable that creates synthetic virial labels.""" + + def inject(inp, lab): + nframes = inp["atype"].shape[0] + return torch.randn(nframes, 9, dtype=dtype, device=device) + + return inject + + +def main(): + data_dir = os.path.join(EXAMPLE_DIR, "data") + if not os.path.isdir(data_dir): + print(f"Data not found: {data_dir}") + return + + tmpdir = tempfile.mkdtemp(prefix="grad_probe_") + old_cwd = os.getcwd() + os.chdir(tmpdir) + try: + # --- Phase 1: Summary table --- + print("=" * 80) + print("PHASE 1: Gradient count per loss component") + print("=" * 80) + + # Get dtype/device from a quick trainer + config_tmp = make_config(data_dir) + config_tmp = update_deepmd_input(config_tmp, warning=False) + config_tmp = normalize(config_tmp) + trainer_tmp = get_trainer(config_tmp) + inp_tmp, _ = trainer_tmp.get_data(is_train=True) + dtype = inp_tmp["coord"].dtype + device = inp_tmp["coord"].device + del trainer_tmp + + scenarios = { + "energy only": {"find_force": 0.0, "find_virial": 0.0}, + "force only": {"find_energy": 0.0, "find_virial": 0.0}, + "virial only": { + "find_energy": 0.0, + "find_force": 0.0, + "virial": make_virial_injector(dtype, device), + "find_virial": 1.0, + }, + "all losses": { + "virial": make_virial_injector(dtype, device), + "find_virial": 1.0, + }, + } + + all_results = {} # (compile_mode, scenario) -> (status_dict, count, total) + + for compile_mode in ["uncompiled", "compiled"]: + enable = compile_mode == "compiled" + for scenario, overrides in scenarios.items(): + config = make_config(data_dir, enable_compile=enable) + config = update_deepmd_input(config, warning=False) + config = normalize(config) + trainer = get_trainer(config) + status, loss_val = run_and_get_grads(trainer, overrides) + count = sum(1 for v in status.values() if v) + total = len(status) + key = (compile_mode, scenario) + all_results[key] = (status, count, total) + del trainer + + # Print summary table + print(f"\n{'Scenario':<20} {'Uncompiled':>12} {'Compiled':>12} {'Match':>8}") + print("-" * 56) + for scenario in scenarios: + _, uc_count, uc_total = all_results[("uncompiled", scenario)] + _, cc_count, cc_total = all_results[("compiled", scenario)] + match = "OK" if uc_count == cc_count else "DIFF" + print( + f"{scenario:<20} " + f"{uc_count:>5}/{uc_total:<5} " + f"{cc_count:>5}/{cc_total:<5} " + f"{match:>8}" + ) + + # --- Phase 2: Per-parameter diff for mismatches --- + print("\n" + "=" * 80) + print("PHASE 2: Per-parameter diff (only for mismatching scenarios)") + print("=" * 80) + + for scenario in scenarios: + uc_status, uc_count, _ = all_results[("uncompiled", scenario)] + cc_status, cc_count, _ = all_results[("compiled", scenario)] + if uc_count == cc_count: + continue + print(f"\n--- {scenario} ---") + print(f"{'Parameter':<60} {'Uncompiled':>10} {'Compiled':>10}") + print("-" * 84) + for name in sorted(uc_status.keys()): + uc = "GRAD" if uc_status.get(name, False) else "-" + cc = "GRAD" if cc_status.get(name, False) else "-" + marker = " <-- DIFF" if uc != cc else "" + print(f"{name:<60} {uc:>10} {cc:>10}{marker}") + + # --- Phase 3: torch.compile backend comparison --- + print("\n" + "=" * 80) + print("PHASE 3: torch.compile backend comparison (force-only loss)") + print("=" * 80) + + from deepmd.pt_expt.train import training as training_mod + + orig_trace = training_mod._trace_and_compile + + for backend in ["eager", "aot_eager", "inductor"]: + + def patched(model, ec, ea, nl, mp, fp, ap, opts, _b=backend): + opts["backend"] = _b + return orig_trace(model, ec, ea, nl, mp, fp, ap, opts) + + training_mod._trace_and_compile = patched + try: + config = make_config(data_dir, enable_compile=True) + config = update_deepmd_input(config, warning=False) + config = normalize(config) + trainer = get_trainer(config) + status, _ = run_and_get_grads( + trainer, {"find_energy": 0.0, "find_virial": 0.0} + ) + count = sum(1 for v in status.values() if v) + total = len(status) + print(f" {backend:<12}: {count}/{total} params have force grad") + del trainer + except Exception as e: + print(f" {backend:<12}: FAILED ({e})") + + training_mod._trace_and_compile = orig_trace + + finally: + os.chdir(old_cwd) + + +if __name__ == "__main__": + main() +``` + +## Adapting the script + +### Different model architecture + +Change `make_config` to use a different descriptor/fitting. The rest of the script works unchanged. + +### Different loss type + +Change `scenarios` to match the loss component keys. For example, for a dipole model: + +```python +scenarios = { + "dipole only": {"find_energy": 0.0}, + "energy only": {"find_dipole": 0.0}, + "all losses": {}, +} +``` + +### Testing without `get_trainer` + +If you need to test a standalone model without the full training infrastructure: + +```python +model = get_model(config["model"]) +model.train() + +# Build input +coord = torch.randn(1, natoms, 3, requires_grad=True) +atype = torch.tensor([[0, 0, 1, 1, 1, 1]]) +box = torch.eye(3).reshape(1, 9) * 10.0 + +# Forward +pred = model(coord, atype, box) + +# Backward from a specific output +pred["force"].sum().backward() + +for name, p in model.named_parameters(): + has_grad = p.grad is not None and p.grad.abs().sum() > 0 + print(f"{name}: {'GRAD' if has_grad else '-'}") +``` diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 0ebb80fa98..e54c0619ab 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -1,81 +1,86 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Build C++ jobs: buildcc: name: Build C++ - runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-cc:latest + runs-on: ubuntu-22.04 strategy: matrix: include: - - variant: cpu - dp_variant: cpu - - variant: cuda - dp_variant: cuda - - variant: cuda120 - dp_variant: cuda - - variant: rocm - dp_variant: rocm - - variant: clang - dp_variant: clang + - variant: cpu + dp_variant: cpu + - variant: cuda120 + dp_variant: cuda + - variant: rocm + dp_variant: rocm + - variant: clang + dp_variant: clang steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - - uses: actions/checkout@v3 - with: - submodules: true - - run: apt-get update && apt-get install -y nvidia-cuda-toolkit - if: matrix.variant == 'cuda' - - run: | - apt-get update \ - && apt-get -y install wget \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ - && dpkg -i cuda-keyring_1.0-1_all.deb \ - && apt-get update \ - && apt-get -y install cuda-12-0 - if: matrix.variant == 'cuda120' - env: - DEBIAN_FRONTEND: noninteractive - - run: | - apt-get update && apt-get install -y gnupg2 \ - && echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.3/ jammy main' | tee /etc/apt/sources.list.d/rocm.list \ - && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | tee /etc/apt/preferences.d/rocm-pin-600 \ - && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - \ - && apt-get update \ - && apt-get install -y rocm-dev hipcub-dev - if: matrix.variant == 'rocm' - - run: apt-get update && apt-get install -y clang - if: matrix.variant == 'clang' - - run: source/install/build_cc.sh - env: - DP_VARIANT: ${{ matrix.dp_variant }} - DOWNLOAD_TENSORFLOW: "FALSE" - if: matrix.variant != 'clang' - - run: source/install/build_cc.sh - env: - DP_VARIANT: cpu - DOWNLOAD_TENSORFLOW: "FALSE" - CC: clang - CXX: clang++ - if: matrix.variant == 'clang' - - name: Test files exist - run: | - test -f dp/bin/dp_ipi && - test -f dp/lib/libdeepmd_cc.so && - test -f dp/lib/libdeepmd_c.so && - test -f dp/lib/libdeepmd_op.so && - test -f dp/lib/libdeepmd_ipi.so && - test -f dp/lib/libdeepmd_lmp.so && - test -f dp/lib/libdeepmd.so + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + - uses: lukka/get-cmake@latest + - run: python -m pip install uv + - run: source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu + - run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ + && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ + && sudo apt-get update \ + && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2 + if: matrix.variant == 'cuda120' + env: + DEBIAN_FRONTEND: noninteractive + - run: | + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/6.3/ jammy main' | sudo tee /etc/apt/sources.list.d/rocm.list \ + && printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 \ + && curl -s https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - \ + && sudo apt-get update \ + && sudo apt-get install -y rocm-dev hipcub-dev + if: matrix.variant == 'rocm' + - run: | + source/install/build_cc.sh + env: + DP_VARIANT: ${{ matrix.dp_variant }} + DOWNLOAD_TENSORFLOW: "FALSE" + CMAKE_GENERATOR: Ninja + if: matrix.variant != 'clang' + - run: | + source/install/build_cc.sh + env: + DP_VARIANT: cpu + DOWNLOAD_TENSORFLOW: "FALSE" + CC: clang + CXX: clang++ + CMAKE_GENERATOR: Ninja + if: matrix.variant == 'clang' + - name: Test files exist + run: | + test -f dp/bin/dp_ipi && + test -f dp/lib/libdeepmd_cc.so && + test -f dp/lib/libdeepmd_c.so && + test -f dp/lib/libdeepmd_op.so && + test -f dp/lib/libdeepmd_ipi.so && + test -f dp/lib/libdeepmd_lmp.so && + test -f dp/lib/libdeepmd.so pass: name: Pass building C++ needs: [buildcc] runs-on: ubuntu-latest if: always() steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 - with: - jobs: ${{ toJSON(needs) }} + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index c497e86e1d..88a4cb241a 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -2,7 +2,19 @@ name: Build and upload to PyPI on: push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" + tags: + - "v*" pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: build_wheels: @@ -14,88 +26,111 @@ jobs: include: # linux-64 - os: ubuntu-latest - python: 310 + python: 311 platform_id: manylinux_x86_64 dp_variant: cuda + cuda_version: 12.8 # macos-x86-64 - - os: macos-latest - python: 310 + - os: macos-15-intel + python: 311 platform_id: macosx_x86_64 dp_variant: cpu + # macos-arm64 + - os: macos-14 + python: 311 + platform_id: macosx_arm64 + dp_variant: cpu # win-64 - - os: windows-2019 - python: 310 + - os: windows-2022 + python: 311 platform_id: win_amd64 dp_variant: cpu # linux-aarch64 - - os: ubuntu-latest + - os: ubuntu-24.04-arm python: 310 platform_id: manylinux_aarch64 dp_variant: cpu steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v6 with: - submodules: true # https://github.com/pypa/setuptools_scm/issues/480 fetch-depth: 0 - - uses: docker/setup-qemu-action@v2 - name: Setup QEMU - if: matrix.platform_id == 'manylinux_aarch64' + - name: Install uv + run: curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/uv/releases/download/0.2.24/uv-installer.sh | sh + if: runner.os != 'Linux' - name: Build wheels - uses: pypa/cibuildwheel@v2.12.1 + uses: pypa/cibuildwheel@v3.4 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: all CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} DP_VARIANT: ${{ matrix.dp_variant }} - - uses: actions/upload-artifact@v3 + CUDA_VERSION: ${{ matrix.cuda_version }} + DP_PKG_NAME: ${{ matrix.dp_pkg_name }} + CIBW_BUILD_FRONTEND: "build[uv]" + - uses: actions/upload-artifact@v7 with: + name: cibw-cp${{ matrix.python }}-${{ matrix.platform_id }}-cu${{ matrix.cuda_version }}-${{ strategy.job-index }} path: ./wheelhouse/*.whl build_sdist: name: Build source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: actions/setup-python@v4 - name: Install Python + - uses: actions/checkout@v6 with: - python-version: '3.10' - - run: python -m pip install build + fetch-depth: 0 - name: Build sdist - run: python -m build --sdist + run: pipx run uv tool run --with build[uv] --from build python -m build --installer uv --sdist - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v7 with: + name: cibw-sdist path: dist/*.tar.gz upload_pypi: needs: [build_wheels, build_sdist] runs-on: ubuntu-latest + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v8 with: - name: artifact + pattern: cibw-* path: dist + merge-multiple: true - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.pypi_password }} build_docker: # use the already built wheels to build docker needs: [build_wheels] runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - variant: "" + cuda_version: "12" steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 + - name: Free Disk Space (Ubuntu) + uses: insightsengineering/disk-space-reclaimer@v1 + with: + tools-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + docker-images: true + - uses: actions/checkout@v6 + - uses: actions/download-artifact@v8 with: - name: artifact path: source/install/docker/dist + pattern: cibw-*-manylinux_x86_64-cu${{ matrix.cuda_version }}* + merge-multiple: true - name: Log in to the Container registry - uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} @@ -103,25 +138,64 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@507c2f2dc502c992ad446e3d7a5dfbe311567a96 + uses: docker/metadata-action@v6 with: images: ghcr.io/deepmodeling/deepmd-kit - name: Build and push Docker image - uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 + uses: docker/build-push-action@v7 with: context: source/install/docker - push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }} - tags: ${{ steps.meta.outputs.tags }} + push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' && github.actor != 'dependabot[bot]' }} + tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + VARIANT=${{ matrix.variant }} + CUDA_VERSION=${{ matrix.cuda_version }} + + build_pypi_index: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v8 + with: + path: dist/packages + pattern: cibw-* + merge-multiple: true + - uses: actions/setup-python@v6 + name: Install Python + with: + python-version: "3.11" + - run: pip install dumb-pypi + - run: | + ls dist/packages > package_list.txt + dumb-pypi --output-dir dist --packages-url ../../packages --package-list package_list.txt --title "DeePMD-kit Developed Packages" + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v4 + with: + path: dist + deploy_pypi_index: + needs: build_pypi_index + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/master' && github.repository_owner == 'deepmodeling' + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 pass: name: Pass testing build wheels - needs: [build_wheels, build_sdist] + needs: [build_wheels, build_sdist, build_docker, build_pypi_index] runs-on: ubuntu-latest if: always() steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 - with: - jobs: ${{ toJSON(needs) }} + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..fcf0d7ea17 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,67 @@ +name: "CodeQL" + +on: + push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" + pull_request: + schedule: + - cron: "45 2 * * 2" +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true +jobs: + analyze: + name: Analyze + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-22.04' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: ["c-cpp", "javascript-typescript", "python"] + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + cache: "pip" + if: matrix.language == 'c-cpp' + - name: "Setup dependencies" + if: matrix.language == 'c-cpp' + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ + && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ + && sudo apt-get update \ + && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2 + python -m pip install uv + uv pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu + env: + DEBIAN_FRONTEND: noninteractive + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + queries: security-extended,security-and-quality + - name: "Run, Build Application using script" + run: | + source/install/build_cc.sh + env: + DP_VARIANT: cuda + DOWNLOAD_TENSORFLOW: "FALSE" + if: matrix.language == 'c-cpp' + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml new file mode 100644 index 0000000000..55b456877f --- /dev/null +++ b/.github/workflows/copilot-setup-steps.yml @@ -0,0 +1,66 @@ +name: "Copilot Setup Steps" + +# Automatically run the setup steps when they are changed to allow for easy validation, and +# allow manual testing through the repository's "Actions" tab +on: + workflow_dispatch: + push: + branches-ignore: + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" + paths: + - .github/workflows/copilot-setup-steps.yml + pull_request: + paths: + - .github/workflows/copilot-setup-steps.yml + +jobs: + # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. + copilot-setup-steps: + runs-on: ubuntu-latest + + # Set the permissions to the lowest permissions possible needed for your steps. + # Copilot will be given its own token for its operations. + permissions: + # If you want to clone the repository as part of your setup steps, for example to install dependencies, you'll need the `contents: read` permission. If you don't clone the repository in your setup steps, Copilot will do this for you automatically after the steps complete. + contents: read + + # You can define any steps you want, and they will run before the agent starts. + # If you do not check out your code, Copilot will do this for you. + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.10" + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + + - name: Create virtual environment + run: uv venv venv + + - name: Activate virtual environment + run: echo "VIRTUAL_ENV=$PWD/venv" >> $GITHUB_ENV && echo "$PWD/venv/bin" >> $GITHUB_PATH + + - name: Install base dependencies + run: uv pip install --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu + + - name: Build Python package + run: uv pip install -e .[cpu,test] + + - name: Install prek tools + run: uv tool install prek + + - name: Install prek hooks + run: prek install --install-hooks + + - name: Verify installation + run: | + dp --version + python -c "import deepmd; import deepmd.tf; print('DeePMD-kit installation verified')" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 2c8ba30ba1..334fe72426 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,14 +1,14 @@ -name: "Pull Request Labeler" -on: -- pull_request_target - -jobs: - triage: - permissions: - contents: read - pull-requests: write - runs-on: ubuntu-latest - steps: - - uses: actions/labeler@v4 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" +name: "Pull Request Labeler" +on: + - pull_request_target + +jobs: + triage: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@v6 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/mirror_gitee.yml b/.github/workflows/mirror_gitee.yml index 2d090c0539..79d1ce11a5 100644 --- a/.github/workflows/mirror_gitee.yml +++ b/.github/workflows/mirror_gitee.yml @@ -1,6 +1,13 @@ name: Mirror to Gitee Repo -on: [ push, delete, create ] +on: + push: + branches-ignore: + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" + delete: + create: # Ensures that only one mirror task will run at a time. concurrency: diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index 6cc5ed8dc8..4257e36b08 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -2,25 +2,85 @@ name: Build C library on: push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" + tags: + - "v*" pull_request: - + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true jobs: build_c: name: Build C library runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - tensorflow_build_version: "2.20" + tensorflow_version: "==2.20.*" + filename: libdeepmd_c.tar.gz steps: - - uses: actions/checkout@v3 + - name: Free Disk Space (Ubuntu) + uses: insightsengineering/disk-space-reclaimer@v1 + with: + tools-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + docker-images: true + - uses: actions/checkout@v6 + with: + fetch-depth: 0 - name: Package C library run: ./source/install/docker_package_c.sh - - name: Test C library - run: ./source/install/docker_test_package_c.sh + env: + TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }} + TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }} + - run: cp libdeepmd_c.tar.gz ${{ matrix.filename }} + if: matrix.filename != 'libdeepmd_c.tar.gz' # for download and debug - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v7 with: - path: ./libdeepmd_c.tar.gz + name: libdeepmd_c-${{ strategy.job-index }}-${{ matrix.filename }} + path: ${{ matrix.filename }} + - name: Test C library + run: ./source/install/docker_test_package_c.sh - name: Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: - files: libdeepmd_c.tar.gz + files: ${{ matrix.filename }} + test_c: + name: Test building from C library + needs: [build_c] + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v6 + - name: Download artifact + uses: actions/download-artifact@v8 + with: + pattern: libdeepmd_c-* + merge-multiple: true + - run: tar -vxzf ./libdeepmd_c.tar.gz + - name: Test C library + run: ./source/install/build_from_c.sh + env: + DEEPMD_C_ROOT: ${{ github.workspace }}/libdeepmd_c + pass: + name: Pass building c library + needs: [build_c, test_c] + runs-on: ubuntu-latest + if: always() + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/remove_test_cuda_label.yml b/.github/workflows/remove_test_cuda_label.yml new file mode 100644 index 0000000000..aa6012b790 --- /dev/null +++ b/.github/workflows/remove_test_cuda_label.yml @@ -0,0 +1,18 @@ +on: + pull_request_target: + types: + - "labeled" +name: Test CUDA +jobs: + remove_label: + permissions: + contents: read + pull-requests: write + # so one can re-trigger the workflow without manually removing the label + runs-on: ubuntu-latest + if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' + steps: + - uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: Test CUDA + number: ${{ github.event.pull_request.number }} diff --git a/.github/workflows/suppr.txt b/.github/workflows/suppr.txt new file mode 100644 index 0000000000..2a43b31eb3 --- /dev/null +++ b/.github/workflows/suppr.txt @@ -0,0 +1,5 @@ +leak:libpaddle_inference +# TensorFlow 2.20 +leak:xla:: +leak:mlir:: +leak:llvm:: diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index a6eb4727f1..151c60e9de 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -1,47 +1,115 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test C++ jobs: testcc: name: Test C++ - runs-on: ubuntu-latest - container: ghcr.io/deepmodeling/deepmd-kit-test-cc:latest + runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - check_memleak: true + enable_tensorflow: true + enable_pytorch: true + enable_paddle: false + - check_memleak: true + enable_tensorflow: false + enable_pytorch: false + enable_paddle: true + - check_memleak: false + enable_tensorflow: true + enable_pytorch: true + enable_paddle: false + - check_memleak: false + enable_tensorflow: false + enable_pytorch: false + enable_paddle: true steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - - uses: actions/checkout@v3 - - run: source/install/test_cc_local.sh - env: - OMP_NUM_THREADS: 1 - TF_INTRA_OP_PARALLELISM_THREADS: 1 - TF_INTER_OP_PARALLELISM_THREADS: 1 - tensorflow_root: /usr/local - # test lammps - - run: apt-get update && apt-get install -y python3-pip python3-venv - - run: source/install/build_lammps.sh - - run: | - python -m pip install -U pip - python -m pip install -e .[cpu,test] - env: - DP_BUILD_TESTING: 1 - - run: pytest --cov=deepmd source/lmp/tests - env: - OMP_NUM_THREADS: 1 - TF_INTRA_OP_PARALLELISM_THREADS: 1 - TF_INTER_OP_PARALLELISM_THREADS: 1 - LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib - - uses: codecov/codecov-action@v3 - with: - gcov: true + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + cache: "pip" + - uses: lukka/get-cmake@latest + - run: python -m pip install uv + - name: Install Python dependencies + run: | + source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_cpu --group pin_pytorch_cpu --group pin_jax --torch-backend cpu + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp,jax] mpi4py mpich + - name: Convert models + run: source/tests/infer/convert-models.sh + # https://github.com/actions/runner-images/issues/9491 + - name: Fix kernel mmap rnd bits + run: sudo sysctl vm.mmap_rnd_bits=28 + if: ${{ matrix.check_memleak }} + - run: | + source/install/test_cc_local.sh + env: + OMP_NUM_THREADS: 1 + TF_INTRA_OP_PARALLELISM_THREADS: 1 + TF_INTER_OP_PARALLELISM_THREADS: 1 + CMAKE_GENERATOR: Ninja + CXXFLAGS: ${{ matrix.check_memleak && '-fsanitize=leak' || '' }} + LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/workflows/suppr.txt + ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && 'TRUE' || 'FALSE' }} + ENABLE_PYTORCH: ${{ matrix.enable_pytorch && 'TRUE' || 'FALSE' }} + ENABLE_PADDLE: ${{ matrix.enable_paddle && 'TRUE' || 'FALSE' }} + # test lammps + - run: | + cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/paddle/lib/*.so ${{ github.workspace }}/dp_test/lib/ + cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* ${{ github.workspace }}/dp_test/lib/ + cp ${{ github.workspace }}/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* ${{ github.workspace }}/dp_test/lib/ + if: matrix.enable_paddle + - run: | + export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$LD_LIBRARY_PATH + pytest --cov=deepmd source/lmp/tests + env: + OMP_NUM_THREADS: 1 + TF_INTRA_OP_PARALLELISM_THREADS: 1 + TF_INTER_OP_PARALLELISM_THREADS: 1 + LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp + ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && '1' || '0' }} + ENABLE_PYTORCH: ${{ matrix.enable_pytorch && '1' || '0' }} + ENABLE_JAX: ${{ matrix.enable_tensorflow && '1' || '0' }} + ENABLE_PADDLE: ${{ matrix.enable_paddle && '1' || '0' }} + if: ${{ !matrix.check_memleak }} + # test ipi + - run: | + export PATH=${{ github.workspace }}/dp_test/bin:$PATH + pytest --cov=deepmd source/ipi/tests + env: + OMP_NUM_THREADS: 1 + TF_INTRA_OP_PARALLELISM_THREADS: 1 + TF_INTER_OP_PARALLELISM_THREADS: 1 + LD_LIBRARY_PATH: ${{ github.workspace }}/dp_test/lib + ENABLE_TENSORFLOW: ${{ matrix.enable_tensorflow && '1' || '0' }} + ENABLE_PYTORCH: ${{ matrix.enable_pytorch && '1' || '0' }} + ENABLE_JAX: ${{ matrix.enable_tensorflow && '1' || '0' }} + ENABLE_PADDLE: ${{ matrix.enable_paddle && '1' || '0' }} + if: ${{ !matrix.check_memleak }} + - uses: codecov/codecov-action@v5 + with: + use_oidc: true + permissions: + id-token: write pass: name: Pass testing C++ needs: [testcc] runs-on: ubuntu-latest if: always() steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 - with: - jobs: ${{ toJSON(needs) }} + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml new file mode 100644 index 0000000000..65773ccbfe --- /dev/null +++ b/.github/workflows/test_cuda.yml @@ -0,0 +1,103 @@ +on: + # manually trigger + workflow_dispatch: + pull_request: + types: + - "labeled" + # to let the PR pass the test + - "opened" + - "reopened" + - "synchronize" + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true +name: Test CUDA +jobs: + test_cuda: + name: Test Python and C++ on CUDA + runs-on: nvidia + # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 + container: + image: nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04 + options: --gpus all + if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group') + steps: + - name: Make sudo and git work + run: apt-get update && apt-get install -y sudo git + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.11" + # cache: 'pip' + - name: Install wget and unzip + run: apt-get update && apt-get install -y wget unzip + - uses: lukka/get-cmake@latest + with: + useLocalCache: true + useCloudCache: false + - run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ + && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ + && sudo apt-get update \ + && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3 + if: false # skip as we use nvidia image + - run: python -m pip install -U uv + - run: source/install/uv_with_retry.sh pip install --system --group pin_tensorflow_gpu --group pin_pytorch_gpu --group pin_jax "jax[cuda12]" + - run: | + export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + pip install --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cu126/paddlepaddle-gpu/" --index-url https://pypi.org/simple --trusted-host www.paddlepaddle.org.cn --trusted-host paddlepaddle.org.cn "paddlepaddle-gpu==3.4.0.dev20260310" + source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit + # See https://github.com/jax-ml/jax/issues/29042 + source/install/uv_with_retry.sh pip install --system -U 'nvidia-cublas-cu12>=12.9.0.13' + env: + DP_VARIANT: cuda + DP_ENABLE_NATIVE_OPTIMIZATION: 1 + DP_ENABLE_PYTORCH: 1 + - run: dp --version + - run: python -m pytest source/tests + env: + NUM_WORKERS: 0 + CUDA_VISIBLE_DEVICES: 0 + # See https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html + XLA_PYTHON_CLIENT_PREALLOCATE: false + XLA_PYTHON_CLIENT_ALLOCATOR: platform + FLAGS_use_stride_compute_kernel: 0 + - name: Convert models + run: source/tests/infer/convert-models.sh + - run: | + export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH + source/install/test_cc_local.sh + env: + OMP_NUM_THREADS: 1 + TF_INTRA_OP_PARALLELISM_THREADS: 1 + TF_INTER_OP_PARALLELISM_THREADS: 1 + CMAKE_GENERATOR: Ninja + DP_VARIANT: cuda + DP_USE_MPICH2: 1 + - run: | + export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$GITHUB_WORKSPACE/dp_test/lib:$LD_LIBRARY_PATH + export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH + cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/paddle/lib/* $GITHUB_WORKSPACE/dp_test/lib/ + cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/third_party/install/onednn/lib/* $GITHUB_WORKSPACE/dp_test/lib/ + cp $GITHUB_WORKSPACE/source/build_tests/paddle_inference_install_dir/third_party/install/mklml/lib/* $GITHUB_WORKSPACE/dp_test/lib/ + python -m pytest -s source/lmp/tests || (cat log.lammps && exit 1) + python -m pytest source/ipi/tests + env: + OMP_NUM_THREADS: 1 + TF_INTRA_OP_PARALLELISM_THREADS: 1 + TF_INTER_OP_PARALLELISM_THREADS: 1 + LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp + CUDA_VISIBLE_DEVICES: 0 + pass: + name: Pass testing on CUDA + needs: [test_cuda] + runs-on: ubuntu-latest + if: always() + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} + allowed-skips: test_cuda diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index c690e16328..a8bd8375f9 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -1,64 +1,118 @@ on: push: + branches-ignore: + - "gh-readonly-queue/**" + - "copilot/**" + - "dependabot/**" + - "pre-commit-ci-update-config" pull_request: + merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.ref || github.run_id }} + cancel-in-progress: true name: Test Python jobs: testpython: name: Test Python - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: + fail-fast: false matrix: - include: - - python: 3.7 - gcc: 5 - tf: 1.14 - - python: 3.7 - gcc: 6 - tf: 1.14 - - python: 3.7 - gcc: 7 - tf: 1.14 - - python: 3.7 - gcc: 8 - tf: 1.14 - - python: 3.7 - gcc: 5 - tf: - - python: 3.7 - gcc: 8 - tf: - - python: "3.10" - gcc: 5 - tf: - - python: "3.10" - gcc: 8 - tf: + group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + python: ["3.10", "3.13"] - container: ghcr.io/deepmodeling/deepmd-kit-test-environment:py${{ matrix.python }}-gcc${{ matrix.gcc }}-tf${{ matrix.tf }} steps: - - name: work around permission issue - run: git config --global --add safe.directory /__w/deepmd-kit/deepmd-kit - - uses: actions/checkout@v3 - # https://github.com/pypa/pip/issues/11770 - - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -e .[cpu,test] - env: - CC: gcc-${{ matrix.gcc }} - CXX: g++-${{ matrix.gcc }} - TENSORFLOW_VERSION: ${{ matrix.tf }} - DP_BUILD_TESTING: 1 - - run: dp --version - - run: pytest --cov=deepmd source/tests --durations=0 - - uses: codecov/codecov-action@v3 - with: - gcov: true + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python }} + - run: python -m pip install -U uv + - run: | + source/install/uv_with_retry.sh pip install --system openmpi --group pin_tensorflow_cpu --group pin_pytorch_cpu --torch-backend cpu + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') + source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py --group pin_jax + source/install/uv_with_retry.sh pip install --system --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cpu/paddlepaddle/" --index-url https://pypi.org/simple --trusted-host www.paddlepaddle.org.cn --trusted-host paddlepaddle.org.cn paddlepaddle==3.4.0.dev20260310 + env: + # Please note that uv has some issues with finding + # existing TensorFlow package. Currently, it uses + # TensorFlow in the build dependency, but if it + # changes, setting `TENSORFLOW_ROOT`. + DP_ENABLE_PYTORCH: 1 + DP_BUILD_TESTING: 1 + HOROVOD_WITH_TENSORFLOW: 1 + HOROVOD_WITHOUT_PYTORCH: 1 + HOROVOD_WITH_MPI: 1 + # https://cmake.org/cmake/help/latest/variable/CMAKE_POLICY_VERSION_MINIMUM.html + CMAKE_POLICY_VERSION_MINIMUM: 3.5 + - run: dp --version + - name: Get durations from cache + uses: actions/cache@v5 + with: + path: .test_durations + # the key must never match, even when restarting workflows, as that + # will cause durations to get out of sync between groups, the + # combined durations will be loaded if available + key: test2-durations-split-${{ github.run_id }}-${{ github.run_number}}-${{ matrix.python }}-${{ matrix.group }} + restore-keys: | + test2-durations-combined-${{ matrix.python }}-${{ github.sha }} + test2-durations-combined-${{ matrix.python }} + - run: pytest --cov=deepmd source/tests --splits 12 --group ${{ matrix.group }} --store-durations --clean-durations --durations-path=.test_durations --splitting-algorithm least_duration + env: + NUM_WORKERS: 0 + DP_CI_IMPORT_PADDLE_BEFORE_TF: 1 + FLAGS_use_stride_compute_kernel: 0 + - name: Test TF2 eager mode + run: pytest --cov=deepmd --cov-append source/tests/consistent/io/test_io.py source/jax2tf_tests + env: + NUM_WORKERS: 0 + DP_TEST_TF2_ONLY: 1 + DP_DTYPE_PROMOTION_STRICT: 1 + if: matrix.group == 1 + - run: mv .test_durations .test_durations_${{ matrix.group }} + - name: Upload partial durations + uses: actions/upload-artifact@v7 + with: + name: split-${{ matrix.python }}-${{ matrix.group }} + path: .test_durations_${{ matrix.group }} + include-hidden-files: true + - uses: codecov/codecov-action@v5 + with: + use_oidc: true + permissions: + id-token: write + update_durations: + name: Combine and update integration test durations + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + python: ["3.10", "3.13"] + needs: testpython + steps: + - name: Get durations from cache + uses: actions/cache@v5 + with: + path: .test_durations + # key won't match during the first run for the given commit, but + # restore-key will if there's a previous stored durations file, + # so cache will both be loaded and stored + key: test2-durations-combined-${{ matrix.python }}-${{ github.sha }} + restore-keys: test2-durations-combined-${{ matrix.python }} + - name: Download artifacts + uses: actions/download-artifact@v8 + with: + pattern: split-${{ matrix.python }}-* + merge-multiple: true + - name: Combine test durations + run: jq -s add .test_durations_* > .test_durations pass: name: Pass testing Python - needs: [testpython] + needs: [testpython, update_durations] runs-on: ubuntu-latest if: always() steps: - - name: Decide whether the needed jobs succeeded or failed - uses: re-actors/alls-green@release/v1 - with: - jobs: ${{ toJSON(needs) }} + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/todo.yml b/.github/workflows/todo.yml new file mode 100644 index 0000000000..8ae70380fc --- /dev/null +++ b/.github/workflows/todo.yml @@ -0,0 +1,20 @@ +name: TODO workflow +on: + push: + branches: + - master +jobs: + build: + if: github.repository_owner == 'deepmodeling' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - name: Run tdg-github-action + uses: ribtoks/tdg-github-action@master + with: + TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + SHA: ${{ github.sha }} + REF: ${{ github.ref }} + EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*" + COMMENT_ON_ISSUES: 1 diff --git a/.gitignore b/.gitignore index 7401566afd..6382ecedd2 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ *.bz2 *.pyc *.pb +*.DS_Store tmp* CMakeCache.txt CMakeFiles @@ -22,12 +23,12 @@ _skbuild deepmd_kit.egg-info/ dist .eggs -_version.py +/deepmd/_version.py venv* .vscode/** _build _templates -API_CC +doc/API_CC/ doc/api_py/ doc/api_core/ doc/api_c/ @@ -43,3 +44,30 @@ build_cc_tests build_c_tests build_c/ libdeepmd_c/ +.uv/ +libtorch/ +uv.lock +buildcxx/ +node_modules/ +*.bib.original + +# Coverage files +.coverage +.coverage.* + +# Test output files (temporary) +test_dp_test/ +test_dp_test_*.out +*_detail.out + +# Training and model output files +*.pth +*.ckpt* +checkpoint +lcurve.out +out.json +input_v2_compat.json +frozen_model.* + +# Test system directories +system/ diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 7f3510b9d6..0000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "source/lib/src/cuda/cub"] - path = source/lib/src/cuda/cub - url = https://github.com/NVIDIA/cub.git diff --git a/.license-header.txt b/.license-header.txt new file mode 100644 index 0000000000..4352084d27 --- /dev/null +++ b/.license-header.txt @@ -0,0 +1 @@ +SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 017c54c544..303b789e13 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,67 +1,163 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks +exclude: ^source/3rdparty/.+/ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 hooks: - - id: trailing-whitespace + - id: trailing-whitespace exclude: "^.+\\.pbtxt$" - - id: end-of-file-fixer - exclude: "^.+\\.pbtxt$" - - id: check-yaml - #- id: check-json - - id: check-added-large-files - - id: check-merge-conflict - - id: check-symlinks - - id: check-toml -# Python -- repo: https://github.com/psf/black - rev: 22.12.0 - hooks: - - id: black-jupyter -- repo: https://github.com/PyCQA/isort - rev: 5.12.0 + - id: end-of-file-fixer + exclude: "^.+\\.pbtxt$|deeppot_sea.*\\.json$" + - id: check-yaml + - id: check-json + - id: check-added-large-files + args: ["--maxkb=1024", "--enforce-all"] + exclude: | + (?x)^( + source/tests/infer/dipolecharge_e.pbtxt| + source/tests/infer/deeppolar_new.pbtxt + )$ + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + # Python + - repo: https://github.com/PyCQA/isort + rev: 8.0.1 hooks: - - id: isort - files: \.py$ -- repo: https://github.com/charliermarsh/ruff-pre-commit + - id: isort + files: \.py$ + exclude: ^source/3rdparty + - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.243 + rev: v0.15.6 hooks: - - id: ruff - args: ["--fix"] -# numpydoc -- repo: https://github.com/Carreau/velin + - id: ruff + args: ["--fix"] + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + - id: ruff-format + exclude: ^source/3rdparty + types_or: [python, pyi, jupyter] + # numpydoc + - repo: https://github.com/Carreau/velin rev: 0.0.12 hooks: - - id: velin - args: ["--write"] -# Python inside docs -- repo: https://github.com/asottile/blacken-docs - rev: 1.13.0 + - id: velin + args: ["--write"] + exclude: ^source/3rdparty + # markdown + - repo: https://github.com/hukkin/mdformat + rev: 1.0.0 + hooks: + - id: mdformat + additional_dependencies: + # - mdformat-myst==0.3.0 + # See https://github.com/executablebooks/mdformat-myst/issues/13 + - "git+https://github.com/njzjz-bothub/mdformat-myst@d9c414e#egg=mdformat-myst" + - mdformat-ruff==0.1.3 + - mdformat-web==0.2.0 + - mdformat-config==0.2.1 + - mdformat-beautysh==1.0.0 + - mdformat-gfm-alerts==2.0.0 + # C++ + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v22.1.1 + hooks: + - id: clang-format + exclude: ^(source/3rdparty|source/lib/src/gpu/cudart/.+\.inc|.+\.ipynb$|source/tests/infer/.+\.json$) + # yaml, CSS, javascript + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + types_or: [yaml, css] + # workflow files cannot be modified by pre-commit.ci + exclude: ^(source/3rdparty|\.clang-format) + # Shell + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.12.0-2 + hooks: + - id: shfmt + # CMake + - repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + #- id: cmake-lint + - repo: https://github.com/njzjz/mirrors-bibtex-tidy + rev: v1.14.0 + hooks: + - id: bibtex-tidy + args: + - --curly + - --numeric + - --align=13 + - --blank-lines + # disable sort: the order of keys and fields has explict meanings + #- --sort=key + - --duplicates=key,doi,citation,abstract + - --merge=combine + #- --sort-fields + #- --strip-comments + - --trailing-commas + - --encode-urls + - --remove-empty-fields + - --wrap=80 + # license header + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.6 hooks: - - id: blacken-docs -# C++ -- repo: https://github.com/pre-commit/mirrors-clang-format - rev: v15.0.7 + # C++, js + - id: insert-license + files: \.(c|cc|cpp|js|ts|h|hpp)$ + args: + - --license-filepath + - .license-header.txt + - --comment-style + - // + - --no-extra-eol + exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc + # CSS + - id: insert-license + files: \.(css|scss)$ + args: + - --license-filepath + - .license-header.txt + - --comment-style + - /*| *| */ + - --no-extra-eol + # Python + - id: insert-license + files: \.(py|pyx)$ + args: + - --license-filepath + - .license-header.txt + - --comment-style + - "#" + - --no-extra-eol + exclude: ^source/3rdparty + # HTML + - id: insert-license + files: \.(html|vue|xml)$ + args: + - --license-filepath + - .license-header.txt + - --comment-style + - + - --no-extra-eol + - repo: local hooks: - - id: clang-format - exclude: ^source/3rdparty|source/lib/src/cuda/cudart/.+\.inc -# CSS -- repo: https://github.com/pre-commit/mirrors-csslint - rev: v1.0.5 + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: DeepMD|DeepMd|Pytorch|Tensorflow|Numpy|Github|Lammps|I-Pi|I-PI|i-Pi + # unclear why PairDeepMD is used instead of PairDeePMD + exclude: .pre-commit-config.yaml|source/lmp + # customized pylint rules + - repo: https://github.com/pylint-dev/pylint/ + rev: v4.0.5 hooks: - - id: csslint -# Shell -- repo: https://github.com/scop/pre-commit-shfmt - rev: v3.6.0-1 - hooks: - - id: shfmt -# CMake -- repo: https://github.com/cheshirekow/cmake-format-precommit - rev: v0.6.13 - hooks: - - id: cmake-format - #- id: cmake-lint -ci: - autoupdate_branch: devel + - id: pylint + entry: env PYTHONPATH=source/checker pylint + files: ^deepmd/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 6f3ff6be3f..7cda715627 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -2,7 +2,15 @@ version: 2 build: os: ubuntu-20.04 tools: - python: mambaforge-4.10 -conda: - environment: doc/environment.yml -formats: all + python: "3.11" + jobs: + post_create_environment: + - pip install uv + post_install: + - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH uv pip install -r doc/requirements.txt + apt_packages: + - inkscape +sphinx: + configuration: doc/conf.py +formats: + - pdf diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..bcac9f1514 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,191 @@ +# DeePMD-kit + +DeePMD-kit is a deep learning package for many-body potential energy representation and molecular dynamics. It supports multiple backends (TensorFlow, PyTorch, JAX, Paddle) and integrates with MD packages like LAMMPS, GROMACS, and i-PI. + +**Always reference these instructions first and fallback to search or bash commands only when you encounter unexpected information that does not match the info here.** + +## Working Effectively + +### Bootstrap and Build Repository + +- Create virtual environment: `uv venv venv && source venv/bin/activate` +- Install base dependencies: `uv pip install tensorflow-cpu` (takes ~8 seconds) +- Install PyTorch: `uv pip install torch --index-url https://download.pytorch.org/whl/cpu` (takes ~5 seconds) +- Build Python package: `uv pip install -e .[cpu,test]` -- takes 67 seconds. **NEVER CANCEL. Set timeout to 120+ seconds.** +- Build C++ components: `export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')` then `export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')` then `./source/install/build_cc.sh` -- takes 164 seconds. **NEVER CANCEL. Set timeout to 300+ seconds.** + +### Test Repository + +- Run single test: `pytest source/tests/tf/test_dp_test.py::TestDPTestEner::test_1frame -v` -- takes 8-13 seconds +- Run test subset: `pytest source/tests/tf/test_dp_test.py -v` -- takes 15 seconds. **NEVER CANCEL. Set timeout to 60+ seconds.** +- **Recommended: Use single test cases for validation instead of full test suite** -- full suite has 314 test files and takes 60+ minutes + +### Lint and Format Code + +- Install linter: `uv pip install ruff` +- Run linting: `ruff check .` -- takes \<1 second +- Format code: `ruff format .` -- takes \<1 second +- **Always run `ruff check .` and `ruff format .` before committing changes or the CI will fail.** + +### Training and Validation + +- Test TensorFlow training: `cd examples/water/se_e2_a && dp train input.json --skip-neighbor-stat` -- training proceeds but is slow on CPU +- Test PyTorch training: `cd examples/water/se_e2_a && dp --pt train input_torch.json --skip-neighbor-stat` -- training proceeds but is slow on CPU +- **Training examples are for validation only. Real training takes hours/days. Timeout training tests after 60 seconds for validation.** + +## Validation Scenarios + +**ALWAYS manually validate any new code through at least one complete scenario:** + +### Basic Functionality Validation + +1. **CLI Interface**: Run `dp --version` and `dp -h` to verify installation +1. **Python Interface**: Run `python -c "import deepmd; import deepmd.tf; print('Both interfaces work')"` +1. **Backend Selection**: Test `dp --tf -h`, `dp --pt -h`, `dp --jax -h`, `dp --pd -h` + +### Training Workflow Validation + +1. **TensorFlow Training**: `cd examples/water/se_e2_a && timeout 60 dp train input.json --skip-neighbor-stat` -- should start training and show decreasing loss +1. **PyTorch Training**: `cd examples/water/se_e2_a && timeout 60 dp --pt train input_torch.json --skip-neighbor-stat` -- should start training and show decreasing loss +1. **Verify training output**: Look for "batch X: trn: rmse" messages showing decreasing error values + +### Test-Based Validation + +1. **Core Tests**: `pytest source/tests/tf/test_dp_test.py::TestDPTestEner::test_1frame -v` -- should pass in ~10 seconds +1. **Multi-backend**: Test both TensorFlow and PyTorch components work + +## Common Commands and Timing + +### Repository Structure + +``` +ls -la [repo-root] +.github/ # GitHub workflows and templates +CONTRIBUTING.md # Contributing guide +README.md # Project overview +deepmd/ # Python package source +doc/ # Documentation +examples/ # Training examples and configurations +pyproject.toml # Python build configuration +source/ # C++ source code and tests +``` + +### Key Directories and Files + +- `deepmd/` - Main Python package with backend implementations +- `source/lib/` - Core C++ library +- `source/op/` - Backend-specific operators (TF, PyTorch, etc.) +- `source/api_cc/` - C++ API +- `source/api_c/` - C API +- `source/tests/` - Test suite (314 test files) +- `examples/water/se_e2_a/` - Basic water training example +- `examples/` - Various model examples for different scenarios + +### Common CLI Commands + +- `dp --version` - Show version information +- `dp -h` - Show help and available commands +- `dp train input.json` - Train a model (TensorFlow backend) +- `dp --pt train input.json` - Train with PyTorch backend +- `dp --jax train input.json` - Train with JAX backend +- `dp --pd train input.json` - Train with Paddle backend +- `dp test -m model.pb -s system/` - Test a trained model +- `dp freeze -o model.pb` - Freeze/save a model + +### Build Dependencies and Setup + +- **Python 3.10+** required +- **Virtual environment** strongly recommended: `uv venv venv && source venv/bin/activate` +- **Backend dependencies**: TensorFlow, PyTorch, JAX, or Paddle (install before building) +- **Build tools**: CMake, C++ compiler, scikit-build-core +- **C++ build requires**: Both TensorFlow and PyTorch installed, set TENSORFLOW_ROOT and PYTORCH_ROOT environment variables + +### Key Configuration Files + +- `pyproject.toml` - Python build configuration and dependencies +- `source/CMakeLists.txt` - C++ build configuration +- `examples/water/se_e2_a/input.json` - Basic TensorFlow training config +- `examples/water/se_e2_a/input_torch.json` - Basic PyTorch training config + +## Frequent Patterns and Time Expectations + +### Installation and Build Times + +- **Virtual environment setup**: ~5 seconds +- **TensorFlow CPU install**: ~8 seconds +- **PyTorch CPU install**: ~5 seconds +- **Python package build**: ~67 seconds. **NEVER CANCEL.** +- **C++ components build**: ~164 seconds. **NEVER CANCEL.** +- **Full fresh setup**: ~3-4 minutes total + +### Testing Times + +- **Single test**: 8-13 seconds +- **Test file (~5 tests)**: ~15 seconds +- **Backend-specific test subset**: 15-30 minutes. **Use sparingly.** +- **Full test suite (314 files)**: 60+ minutes. **Avoid in development - use single tests instead.** + +### Linting and Formatting + +- **Ruff check**: \<1 second +- **Ruff format**: \<1 second +- **Pre-commit hooks**: May have network issues, use individual tools + +### Commit Messages and PR Titles + +**All commit messages and PR titles must follow [conventional commit specification](https://www.conventionalcommits.org/):** + +- **Format**: `type(scope): description` +- **Common types**: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`, `ci` +- **Examples**: + - `feat(core): add new descriptor type` + - `fix(tf): resolve memory leak in training` + - `docs: update installation guide` + - `ci: add workflow for testing` + +### Training and Model Operations + +- **Training initialization**: 10-30 seconds +- **Training per batch**: 0.1-1 second (CPU), much faster on GPU +- **Model freezing**: 5-15 seconds +- **Model testing**: 10-30 seconds + +## Backend-Specific Notes + +### TensorFlow Backend + +- **Default backend** when no flag specified +- **Configuration**: Use `input.json` format +- **Training**: `dp train input.json` +- **Requirements**: `tensorflow` or `tensorflow-cpu` package + +### PyTorch Backend + +- **Activation**: Use `--pt` flag or `export DP_BACKEND=pytorch` +- **Configuration**: Use `input_torch.json` format typically +- **Training**: `dp --pt train input_torch.json` +- **Requirements**: `torch` package + +### JAX Backend + +- **Activation**: Use `--jax` flag +- **Training**: `dp --jax train input.json` +- **Requirements**: `jax` and related packages +- **Note**: Experimental backend, may have limitations + +### Paddle Backend + +- **Activation**: Use `--pd` flag +- **Training**: `dp --pd train input.json` +- **Requirements**: `paddlepaddle` package +- **Note**: Less commonly used + +## Critical Warnings + +- **NEVER CANCEL BUILD OPERATIONS**: Python build takes 67 seconds, C++ build takes 164 seconds +- **USE SINGLE TESTS FOR VALIDATION**: Run individual tests instead of full test suite for faster feedback +- **ALWAYS activate virtual environment**: Build and runtime failures occur without proper environment +- **ALWAYS install backend dependencies first**: TensorFlow/PyTorch required before building C++ components +- **ALWAYS run linting before commits**: `ruff check . && ruff format .` or CI will fail +- **ALWAYS test both Python and C++ components**: Some features require both to be built +- **ALWAYS follow conventional commit format**: All commit messages and PR titles must use conventional commit specification (`type(scope): description`) diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index f946fed778..0000000000 --- a/CITATION.cff +++ /dev/null @@ -1,19 +0,0 @@ -preferred-citation: - type: article - authors: - - family-names: "Wang" - given-names: "Han" - - family-names: "Zhang" - given-names: "Linfeng" - - family-names: "Han" - given-names: "Jiequn" - - family-names: "E" - given-names: "Weinan" - doi: "10.1016/j.cpc.2018.03.016" - journal: "Computer Physics Communications" - month: 7 - start: 178 # First page number - end: 184 # Last page number - title: "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics" - volume: 228 - year: 2018 diff --git a/CITATIONS.bib b/CITATIONS.bib new file mode 100644 index 0000000000..0fd28323dd --- /dev/null +++ b/CITATIONS.bib @@ -0,0 +1,427 @@ +The proposed feature of each article is described in the "annote" field. +Please cite a article if any feature is used +@article{Wang_ComputPhysCommun_2018_v228_p178, + annote = {general purpose}, + author = {Wang, Han and Zhang, Linfeng and Han, Jiequn and E, Weinan}, + doi = {10.1016/j.cpc.2018.03.016}, + year = 2018, + month = {jul}, + publisher = {Elsevier {BV}}, + volume = 228, + journal = {Comput. Phys. Comm.}, + title = { + {DeePMD-kit: A deep learning package for many-body potential energy + representation and molecular dynamics} + }, + pages = {178--184}, +} + +@article{Zeng_JChemPhys_2023_v159_p054801, + annote = {general purpose}, + title = {{DeePMD-kit v2: A software package for deep potential models}}, + author = { + Jinzhe Zeng and Duo Zhang and Denghui Lu and Pinghui Mo and Zeyu Li and + Yixiao Chen and Mari{\'a}n Rynik and Li'ang Huang and Ziyao Li and Shaochen + Shi and Yingze Wang and Haotian Ye and Ping Tuo and Jiabin Yang and Ye Ding + and Yifan Li and Davide Tisi and Qiyu Zeng and Han Bao and Yu Xia and + Jiameng Huang and Koki Muraoka and Yibo Wang and Junhan Chang and Fengbo + Yuan and Sigbj{\o}rn L{\o}land Bore and Chun Cai and Yinnian Lin and Bo + Wang and Jiayan Xu and Jia-Xin Zhu and Chenxing Luo and Yuzhi Zhang and + Rhys E A Goodall and Wenshuo Liang and Anurag Kumar Singh and Sikai Yao and + Jingchao Zhang and Renata Wentzcovitch and Jiequn Han and Jie Liu and Weile + Jia and Darrin M York and Weinan E and Roberto Car and Linfeng Zhang and + Han Wang + }, + journal = {J. Chem. Phys.}, + volume = 159, + issue = 5, + year = 2023, + pages = 054801, + doi = {10.1063/5.0155600}, +} + +@article{Zeng_JChemTheoryComput_2025_v21_p4375, + annote = {general purpose}, + author = { + Jinzhe Zeng and Duo Zhang and Anyang Peng and Xiangyu Zhang and Sensen He + and Yan Wang and Xinzijian Liu and Hangrui Bi and Yifan Li and Chun Cai and + Chengqian Zhang and Yiming Du and Jia-Xin Zhu and Pinghui Mo and Zhengtao + Huang and Qiyu Zeng and Shaochen Shi and Xuejian Qin and Zhaoxi Yu and + Chenxing Luo and Ye Ding and Yun-Pei Liu and Ruosong Shi and Zhenyu Wang + and Sigbj{\o}rn L{\o}land Bore and Junhan Chang and Zhe Deng and Zhaohan + Ding and Siyuan Han and Wanrun Jiang and Guolin Ke and Zhaoqing Liu and + Denghui Lu and Koki Muraoka and Hananeh Oliaei and Anurag Kumar Singh and + Haohui Que and Weihong Xu and Zhangmancang Xu and Yong-Bin Zhuang and Jiayu + Dai and Timothy J. Giese and Weile Jia and Ben Xu and Darrin M. York and + Linfeng Zhang and Han Wang + }, + title = { + {DeePMD-kit v3: A Multiple-Backend Framework for Machine Learning + Potentials} + }, + journal = {J. Chem. Theory Comput.}, + year = 2025, + volume = 21, + number = 9, + pages = {4375--4385}, + doi = {10.1021/acs.jctc.5c00340}, + abstract = { + In recent years, machine learning potentials (MLPs) have become + indispensable tools in physics, chemistry, and materials science, driving + the development of software packages for molecular dynamics (MD) + simulations and related applications. These packages, typically built on + specific machine learning frameworks, such as TensorFlow, PyTorch, or JAX, + face integration challenges when advanced applications demand communication + across different frameworks. The previous TensorFlow-based implementation + of the DeePMD-kit exemplified these limitations. In this work, we introduce + DeePMD-kit version 3, a significant update featuring a multibackend + framework that supports TensorFlow, PyTorch, JAX, and PaddlePaddle + backends, and demonstrate the versatility of this architecture through the + integration of other MLP packages and of differentiable molecular force + fields. This architecture allows seamless back-end switching with minimal + modifications, enabling users and developers to integrate DeePMD-kit with + other packages using different machine learning frameworks. This innovation + facilitates the development of more complex and interoperable workflows, + paving the way for broader applications of MLPs in scientific research. + }, +} + +@article{Lu_CompPhysCommun_2021_v259_p107624, + annote = {GPU support}, + title = { + {86 PFLOPS Deep Potential Molecular Dynamics simulation of 100 million + atoms with ab initio accuracy} + }, + author = { + Lu, Denghui and Wang, Han and Chen, Mohan and Lin, Lin and Car, Roberto and + E, Weinan and Jia, Weile and Zhang, Linfeng + }, + journal = {Comput. Phys. Comm.}, + volume = 259, + pages = 107624, + year = 2021, + publisher = {Elsevier}, + doi = {10.1016/j.cpc.2020.107624}, +} + +@article{Zhang_PhysRevLett_2018_v120_p143001, + annote = {local frame (loc\_frame)}, + author = {Linfeng Zhang and Jiequn Han and Han Wang and Roberto Car and Weinan E}, + journal = {Phys. Rev. Lett.}, + number = 14, + pages = 143001, + publisher = {APS}, + title = { + {Deep potential molecular dynamics: a scalable model with the accuracy of + quantum mechanics} + }, + volume = 120, + year = 2018, + doi = {10.1103/PhysRevLett.120.143001}, +} + +@incollection{Zhang_BookChap_NIPS_2018_v31_p4436, + annote = {DeepPot-SE (se\_e2\_a, se\_e2\_r, se\_e3, se\_atten)}, + title = { + {End-to-end Symmetry Preserving Inter-atomic Potential Energy Model for + Finite and Extended Systems} + }, + author = { + Zhang, Linfeng and Han, Jiequn and Wang, Han and Saidi, Wissam and Car, + Roberto and E, Weinan + }, + booktitle = {Advances in Neural Information Processing Systems 31}, + editor = { + S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. + Cesa-Bianchi and R. Garnett + }, + pages = {4436--4446}, + year = 2018, + publisher = {Curran Associates, Inc.}, + url = {https://dl.acm.org/doi/10.5555/3327345.3327356}, +} + +@article{Wang_NuclFusion_2022_v62_p126013, + annote = {three-body embedding DeepPot-SE (se\_e3)}, + author = {Xiaoyang Wang and Yinan Wang and Linfeng Zhang and Fuzhi Dai and Han Wang}, + title = { + {A tungsten deep neural-network potential for simulating mechanical + property degradation under fusion service environment} + }, + journal = {Nucl. Fusion}, + year = 2022, + volume = 62, + issue = 12, + pages = 126013, + doi = {10.1088/1741-4326/ac888b}, +} + +@article{Zhang_NpjComputMater_2024_v10_p94, + annote = {DPA-1, attention-based descriptor}, + author = { + Duo Zhang and Hangrui Bi and Fu-Zhi Dai and Wanrun Jiang and Xinzijian Liu + and Linfeng Zhang and Han Wang + }, + title = { + {Pretraining of attention-based deep learning potential model for molecular + simulation} + }, + journal = {Npj Comput. Mater}, + year = 2024, + volume = 10, + issue = 1, + pages = 94, + doi = {10.1038/s41524-024-01278-7}, +} + +@article{Zhang_npjComputMater_2024_v10_p293, + annote = {DPA-2}, + author = { + Duo Zhang and Xinzijian Liu and Xiangyu Zhang and Chengqian Zhang and Chun + Cai and Hangrui Bi and Yiming Du and Xuejian Qin and Anyang Peng and + Jiameng Huang and Bowen Li and Yifan Shan and Jinzhe Zeng and Yuzhi Zhang + and Siyuan Liu and Yifan Li and Junhan Chang and Xinyan Wang and Shuo Zhou + and Jianchuan Liu and Xiaoshan Luo and Zhenyu Wang and Wanrun Jiang and + Jing Wu and Yudi Yang and Jiyuan Yang and Manyi Yang and Fu-Qiang Gong and + Linshuang Zhang and Mengchao Shi and Fu-Zhi Dai and Darrin M. York and Shi + Liu and Tong Zhu and Zhicheng Zhong and Jian Lv and Jun Cheng and Weile Jia + and Mohan Chen and Guolin Ke and Weinan E and Linfeng Zhang and Han Wang + }, + title = {{DPA-2: a large atomic model as a multi-task learner}}, + journal = {npj Comput. Mater}, + year = 2024, + volume = 10, + number = 1, + pages = 293, + doi = {10.1038/s41524-024-01493-2}, +} + +@article{Zhang_PhysPlasmas_2020_v27_p122704, + annote = {frame-specific parameters (e.g. electronic temperature)}, + author = { + Zhang, Yuzhi and Gao, Chang and Liu, Qianrui and Zhang, Linfeng and Wang, + Han and Chen, Mohan + }, + title = { + {Warm dense matter simulation via electron temperature dependent deep + potential molecular dynamics} + }, + journal = {Phys. Plasmas}, + volume = 27, + number = 12, + pages = 122704, + year = 2020, + month = 12, + doi = {10.1063/5.0023265}, +} + +@misc{Zeng_2023_TTMDPMD, + annote = {atom-specific parameter (e.g. electron temperature)}, + author = { + Zeng, Qiyu and Chen, Bo and Zhang, Shen and Kang, Dongdong and Wang, Han + and Yu, Xiaoxiang and Dai, Jiayu + }, + title = {{Full-scale ab initio simulations of laser-driven atomistic dynamics}}, + publisher = {arXiv}, + year = 2023, + doi = {10.48550/arXiv.2308.13863}, +} + +@article{Zhang_PhysRevB_2020_v102_p41121, + annote = {fit dipole}, + title = {{Deep neural network for the dielectric response of insulators}}, + author = { + Zhang, Linfeng and Chen, Mohan and Wu, Xifan and Wang, Han and E, Weinan + and Car, Roberto + }, + journal = {Phys. Rev. B}, + volume = 102, + number = 4, + pages = {041121}, + year = 2020, + publisher = {APS}, + doi = {10.1103/PhysRevB.102.041121}, +} + +@article{Sommers_PhysChemChemPhys_2020_v22_p10592, + annote = {fit polarizability}, + title = { + {Raman spectrum and polarizability of liquid water from deep neural + networks} + }, + author = { + Sommers, Grace M and Andrade, Marcos F Calegari and Zhang, Linfeng and + Wang, Han and Car, Roberto + }, + journal = {Phys. Chem. Chem. Phys.}, + volume = 22, + number = 19, + pages = {10592--10602}, + year = 2020, + publisher = {Royal Society of Chemistry}, + doi = {10.1039/D0CP01893G}, +} + +@article{Zeng_JChemTheoryComput_2023_v19_p1261, + annote = {fit relative energies}, + author = {Jinzhe Zeng and Yujun Tao and Timothy J Giese and Darrin M York}, + title = {{QD{\pi}: A Quantum Deep Potential Interaction Model for Drug Discovery}}, + journal = {J. Chem. Theory Comput.}, + year = 2023, + volume = 19, + issue = 4, + pages = {1261--1275}, + doi = {10.1021/acs.jctc.2c01172}, +} + +@article{Zeng_PhysRevB_2022_v105_p174109, + annote = {fit density of states}, + author = { + Qiyu Zeng and Bo Chen and Xiaoxiang Yu and Shen Zhang and Dongdong Kang and + Han Wang and Jiayu Dai + }, + title = { + {Towards large-scale and spatiotemporally resolved diagnosis of electronic + density of states by deep learning} + }, + journal = {Phys. Rev. B}, + year = 2022, + volume = 105, + issue = 17, + pages = 174109, + doi = {10.1103/PhysRevB.105.174109}, +} + +@article{Zhang_JChemPhys_2022_v156_p124107, + annote = {DPLR, se\_e2\_r, hybrid descriptor}, + author = { + Linfeng Zhang and Han Wang and Maria Carolina Muniz and Athanassios Z + Panagiotopoulos and Roberto Car and Weinan E + }, + title = {{A deep potential model with long-range electrostatic interactions}}, + journal = {J. Chem. Phys.}, + year = 2022, + volume = 156, + issue = 12, + pages = 124107, + doi = {10.1063/5.0083669}, +} + +@article{Zeng_JChemTheoryComput_2021_v17_p6993, + annote = {DPRc}, + title = { + {Development of Range-Corrected Deep Learning Potentials for Fast, Accurate + Quantum Mechanical/molecular Mechanical Simulations of Chemical Reactions + in Solution} + }, + author = { + Zeng, Jinzhe and Giese, Timothy J and Ekesan, {\c{S}}{\"o}len and York, + Darrin M + }, + journal = {J. Chem. Theory Comput.}, + year = 2021, + volume = 17, + issue = 11, + pages = {6993--7009}, + doi = {10.1021/acs.jctc.1c00201}, +} + +@article{Wang_ApplPhysLett_2019_v114_p244101, + annote = {Interpolation with a pair-wise potential}, + title = { + {Deep learning inter-atomic potential model for accurate irradiation damage + simulations} + }, + author = {Wang, Hao and Guo, Xun and Zhang, Linfeng and Wang, Han and Xue, Jianming}, + journal = {Appl. Phys. Lett.}, + volume = 114, + number = 24, + pages = 244101, + year = 2019, + publisher = {AIP Publishing LLC}, + doi = {10.1063/1.5098061}, +} + +@article{Zhang_PhysRevMater_2019_v3_p23804, + annote = {model deviation}, + title = { + {Active learning of uniformly accurate interatomic potentials for materials + simulation} + }, + author = {Linfeng Zhang and De-Ye Lin and Han Wang and Roberto Car and Weinan E}, + journal = {Phys. Rev. Mater.}, + volume = 3, + issue = 2, + pages = 23804, + year = 2019, + publisher = {American Physical Society}, + doi = {10.1103/PhysRevMaterials.3.023804}, +} + +@article{Lu_JChemTheoryComput_2022_v18_p5555, + annote = {DP Compress}, + author = { + Denghui Lu and Wanrun Jiang and Yixiao Chen and Linfeng Zhang and Weile Jia + and Han Wang and Mohan Chen + }, + title = { + {DP Compress: A Model Compression Scheme for Generating Efficient Deep + Potential Models} + }, + journal = {J. Chem. Theory Comput.}, + year = 2022, + volume = 18, + issue = 9, + pages = {5555--5567}, + doi = {10.1021/acs.jctc.2c00102}, +} + +@article{Mo_npjComputMater_2022_v8_p107, + annote = {NVNMD}, + author = { + Pinghui Mo and Chang Li and Dan Zhao and Yujia Zhang and Mengchao Shi and + Junhua Li and Jie Liu + }, + title = { + {Accurate and efficient molecular dynamics based on machine learning and + non von Neumann architecture} + }, + journal = {npj Comput. Mater.}, + year = 2022, + volume = 8, + issue = 1, + pages = 107, + doi = {10.1038/s41524-022-00773-z}, +} + +@article{Zeng_EnergyFuels_2021_v35_p762, + annote = {relative or atomic model deviation}, + author = {Jinzhe Zeng and Linfeng Zhang and Han Wang and Tong Zhu}, + title = { + {Exploring the Chemical Space of Linear Alkane Pyrolysis via Deep Potential + GENerator} + }, + journal = {Energy \& Fuels}, + volume = 35, + number = 1, + pages = {762--769}, + year = 2021, + doi = {10.1021/acs.energyfuels.0c03211}, +} + +@article{Zhang_2026_multitaskfinetuning, + annote = {fit properties and multi-task fine-tuning}, + author = { + Chengqian Zhang and Duo Zhang and Anyang Peng and Mingyu Guo and Yuzhi + Zhang and Lei Wang and Guolin Ke and Linfeng Zhang and Tiejun Li and Han + Wang + }, + title = { + {Multi-Task Fine-Tuning Enables Robust Out-of-Distribution Generalization + in Atomistic Models} + }, + journal = {arXiv}, + year = 2026, + pages = {2601.08486}, + doi = {10.48550/arXiv.2601.08486}, +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e43e23beb6..a8378350e4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,6 +7,7 @@ Welcome to [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit)! You can either make a code contribution, help improve our document or offer help to other users. Your help is always appreciated. Come and have fun! ### Code contribution + You can start from any one of the following items to help improve deepmd-kit - Smash a bug @@ -18,6 +19,7 @@ See [here](#before-you-contribute) for some before-hand heads-up. See [here](#how-to-contribute) to learn how to contribute. ### Document improvement + You can start from any one of the following items to help improve [DeePMD-kit Docs](https://deepmd.readthedocs.io/en/latest/?badge=latest): - Fix typos or format (punctuation, space, indentation, code block, etc.) @@ -26,111 +28,113 @@ You can start from any one of the following items to help improve [DeePMD-kit Do - Translate docs changes from English to Chinese ### Offer help + You can help other users of deepmd-kit in the following way - Submit, reply to, and resolve [issues](https://github.com/deepmodeling/deepmd-kit/issues) - (Advanced) Review Pull Requests created by others ## Before you contribute -### Overview of DeePMD-kit -Currently, we maintain two main branch: -- master: stable branch with version tag -- devel : branch for developers ### Developer guide -See [here](doc/development/index.md) for coding conventions, API and other needs-to-know of the code. + +See [documentation](https://deepmd.readthedocs.io/) for coding conventions, API and other needs-to-know of the code. ## How to contribute + Please perform the following steps to create your Pull Request to this repository. If don't like to use commands, you can also use [GitHub Desktop](https://desktop.github.com/), which is easier to get started. Go to [git documentation](https://git-scm.com/doc) if you want to really master git. ### Step 1: Fork the repository 1. Visit the project: -2. Click the **Fork** button on the top right and wait it to finish. +1. Click the **Fork** button on the top right and wait it to finish. ### Step 2: Clone the forked repository to local storage and set configurations -1. Clone your own repo, not the public repo (from deepmodeling) ! And change the branch to devel. - ```bash - git clone https://github.com/$username/deepmd-kit.git - # Replace `$username` with your GitHub ID +1. Clone your own repo, not the public repo (from deepmodeling) ! - git checkout devel - ``` + ```bash + git clone https://github.com/$username/deepmd-kit.git + # Replace `$username` with your GitHub ID + ``` -2. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation. - ```bash - git remote add upstream https://github.com/deepmodeling/deepmd-kit.git - # After you add a remote repo, your local repo will be automatically named "origin". +1. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation. - git fetch upstream + ```bash + git remote add upstream https://github.com/deepmodeling/deepmd-kit.git + # After you add a remote repo, your local repo will be automatically named "origin". - # If your current codes are behind the latest codes, you should merge latest codes first. - # Notice you should merge from "devel"! - git merge upstream/devel - ``` + git fetch upstream -3. Modify your codes and design unit tests. + # If your current codes are behind the latest codes, you should merge latest codes first. + git merge upstream/master + ``` -4. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` +1. Modify your codes and design unit tests. -5. Push the changed codes to your original repo on github. - ```bash - git push origin devel - ``` +1. Commit your changes to a new branch + + ```bash + git checkout -b branch1 + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` + +1. Push the changed codes to your original repo on github. + + ```bash + git push origin branch1 + ``` ### Alternatively: Create a new branch 1. Get your local master up-to-date with upstream/master. - ```bash - cd $working_dir/deepmd-kit - git fetch upstream - git checkout master - git rebase upstream/master - ``` + ```bash + cd $working_dir/deepmd-kit + git fetch upstream + git checkout master + git rebase upstream/master + ``` -2. Create a new branch based on the master branch. +1. Create a new branch based on the master branch. - ```bash - git checkout -b new-branch-name - ``` + ```bash + git checkout -b new-branch-name + ``` -3. Modify your codes and design unit tests. +1. Modify your codes and design unit tests. -4. Commit your changes +1. Commit your changes - ```bash - git status # Checks the local status - git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` - git commit -m "commit-message: update the xx" - ``` + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` -5. Keep your branch in sync with upstream/master +1. Keep your branch in sync with upstream/master - ```bash - # While on your new branch - git fetch upstream - git rebase upstream/master - ``` + ```bash + # While on your new branch + git fetch upstream + git rebase upstream/master + ``` -6. Push your changes to the remote +1. Push your changes to the remote - ```bash - git push -u origin new-branch-name # "-u" is used to track the remote branch from origin - ``` + ```bash + git push -u origin new-branch-name # "-u" is used to track the remote branch from origin + ``` ### Step 3: Create a pull request 1. Visit your fork at (replace `$username` with your GitHub ID) -2. Click `pull requests`, followed by `New pull request` and `Compare & pull request` to create your PR. +1. Click `pull requests`, followed by `New pull request` and `Compare & pull request` to create your PR. Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to DeePMD-kit. ## Contact us -E-mail: contact@deepmodeling.org + +E-mail: deepmodeling@deepmodeling.com diff --git a/README.md b/README.md index f837191d78..58ec1fec7f 100644 --- a/README.md +++ b/README.md @@ -1,169 +1,121 @@ [DeePMD-kit logo](./doc/logo.md) --------------------------------------------------------------------------------- +______________________________________________________________________ + +# DeePMD-kit -DeePMD-kit Manual -======== [![GitHub release](https://img.shields.io/github/release/deepmodeling/deepmd-kit.svg?maxAge=86400)](https://github.com/deepmodeling/deepmd-kit/releases) -[![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2020.107206) -[![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) [![offline packages](https://img.shields.io/github/downloads/deepmodeling/deepmd-kit/total?label=offline%20packages)](https://github.com/deepmodeling/deepmd-kit/releases) [![conda-forge](https://img.shields.io/conda/dn/conda-forge/deepmd-kit?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/deepmd-kit) [![pip install](https://img.shields.io/pypi/dm/deepmd-kit?label=pip%20install)](https://pypi.org/project/deepmd-kit) [![docker pull](https://img.shields.io/docker/pulls/deepmodeling/deepmd-kit)](https://hub.docker.com/r/deepmodeling/deepmd-kit) [![Documentation Status](https://readthedocs.org/projects/deepmd/badge/)](https://deepmd.readthedocs.io/) -# Table of contents -- [About DeePMD-kit](#about-deepmd-kit) - - [Highlights in v2.0](#highlights-in-deepmd-kit-v2.0) - - [Highlighted features](#highlighted-features) - - [License and credits](#license-and-credits) - - [Deep Potential in a nutshell](#deep-potential-in-a-nutshell) -- [Download and install](#download-and-install) -- [Use DeePMD-kit](#use-deepmd-kit) -- [Code structure](#code-structure) -- [Troubleshooting](#troubleshooting) - -# About DeePMD-kit +## About DeePMD-kit + DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems. For more information, check the [documentation](https://deepmd.readthedocs.io/). -# Highlights in DeePMD-kit v2.0 -* [Model compression](doc/freeze/compress.md). Accelerate the efficiency of model inference 4-15 times. -* [New descriptors](doc/model/overall.md). Including [`se_e2_r`](doc/model/train-se-e2-r.md) and [`se_e3`](doc/model/train-se-e3.md). -* [Hybridization of descriptors](doc/model/train-hybrid.md). Hybrid descriptor constructed from the concatenation of several descriptors. -* [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom-type embedding to decline training complexity and refine performance. -* Training and inference of the dipole (vector) and polarizability (matrix). -* Split of training and validation dataset. -* Optimized training on GPUs. - -## Highlighted features -* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition, Tensorboard can be used to visualize training procedures. -* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively. -* **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, insulators, etc. -* **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. -* **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. - -## License and credits +### Highlighted features + +- **interfaced with multiple backends**, including TensorFlow, PyTorch, JAX, and Paddle, the most popular deep learning frameworks, making the training process highly automatic and efficient. +- **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, including LAMMPS, i-PI, AMBER, CP2K, GROMACS, OpenMM, and ABACUS. +- **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems, including organic molecules, metals, semiconductors, insulators, etc. +- **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing. +- **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models. + +### License and credits + The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE). -If you use this code in any future publications, please cite this using -``Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184.`` +If you use this code in any future publications, please cite the following publications for general purpose: + +- Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184. + [![doi:10.1016/j.cpc.2018.03.016](https://img.shields.io/badge/DOI-10.1016%2Fj.cpc.2018.03.016-blue)](https://doi.org/10.1016/j.cpc.2018.03.016) + [![Citations](https://citations.njzjz.win/10.1016/j.cpc.2018.03.016)](https://badge.dimensions.ai/details/doi/10.1016/j.cpc.2018.03.016) +- Jinzhe Zeng, Duo Zhang, Denghui Lu, Pinghui Mo, Zeyu Li, Yixiao Chen, Marián Rynik, Li'ang Huang, Ziyao Li, Shaochen Shi, Yingze Wang, Haotian Ye, Ping Tuo, Jiabin Yang, Ye Ding, Yifan Li, Davide Tisi, Qiyu Zeng, Han Bao, Yu Xia, Jiameng Huang, Koki Muraoka, Yibo Wang, Junhan Chang, Fengbo Yuan, Sigbjørn Løland Bore, Chun Cai, Yinnian Lin, Bo Wang, Jiayan Xu, Jia-Xin Zhu, Chenxing Luo, Yuzhi Zhang, Rhys E. A. Goodall, Wenshuo Liang, Anurag Kumar Singh, Sikai Yao, Jingchao Zhang, Renata Wentzcovitch, Jiequn Han, Jie Liu, Weile Jia, Darrin M. York, Weinan E, Roberto Car, Linfeng Zhang, Han Wang. "DeePMD-kit v2: A software package for deep potential models." J. Chem. Phys. 159 (2023): 054801. + [![doi:10.1063/5.0155600](https://img.shields.io/badge/DOI-10.1063%2F5.0155600-blue)](https://doi.org/10.1063/5.0155600) + [![Citations](https://citations.njzjz.win/10.1063/5.0155600)](https://badge.dimensions.ai/details/doi/10.1063/5.0155600) +- Jinzhe Zeng, Duo Zhang, Anyang Peng, Xiangyu Zhang, Sensen He, Yan Wang, Xinzijian Liu, Hangrui Bi, Yifan Li, Chun Cai, Chengqian Zhang, Yiming Du, Jia-Xin Zhu, Pinghui Mo, Zhengtao Huang, Qiyu Zeng, Shaochen Shi, Xuejian Qin, Zhaoxi Yu, Chenxing Luo, Ye Ding, Yun-Pei Liu, Ruosong Shi, Zhenyu Wang, Sigbjørn Løland Bore, Junhan Chang, Zhe Deng, Zhaohan Ding, Siyuan Han, Wanrun Jiang, Guolin Ke, Zhaoqing Liu, Denghui Lu, Koki Muraoka, Hananeh Oliaei, Anurag Kumar Singh, Haohui Que, Weihong Xu, Zhangmancang Xu, Yong-Bin Zhuang, Jiayu Dai, Timothy J. Giese, Weile Jia, Ben Xu, Darrin M. York, Linfeng Zhang, Han Wang. "DeePMD-kit v3: A Multiple-Backend Framework for Machine Learning Potentials." J. Chem. Theory Comput. 21 (2025): 4375-4385. + [![doi:10.1021/acs.jctc.5c00340](https://img.shields.io/badge/DOI-10.1021%2Facs.jctc.5c00340-blue)](https://doi.org/10.1021/acs.jctc.5c00340) + [![Citations](https://citations.njzjz.win/10.1021/acs.jctc.5c00340)](https://badge.dimensions.ai/details/doi/10.1021/acs.jctc.5c00340) -## Deep Potential in a nutshell -The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called *atomic energy*. Summing up all the atomic energies gives the potential energy of the system. +In addition, please follow [the bib file](CITATIONS.bib) to cite the methods you used. -The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical *ab initio* molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. +### Highlights in major versions + +#### Initial version + +The goal of Deep Potential is to employ deep learning techniques and realize an inter-atomic potential energy model that is general, accurate, computationally efficient and scalable. The key component is to respect the extensive and symmetry-invariant properties of a potential energy model by assigning a local reference frame and a local environment to each atom. Each environment contains a finite number of atoms, whose local coordinates are arranged in a symmetry-preserving way. These local coordinates are then transformed, through a sub-network, to so-called _atomic energy_. Summing up all the atomic energies gives the potential energy of the system. + +The initial proof of concept is in the [Deep Potential][1] paper, which employed an approach that was devised to train the neural network model with the potential energy only. With typical _ab initio_ molecular dynamics (AIMD) datasets this is insufficient to reproduce the trajectories. The Deep Potential Molecular Dynamics ([DeePMD][2]) model overcomes this limitation. In addition, the learning process in DeePMD improves significantly over the Deep Potential method thanks to the introduction of a flexible family of loss functions. The NN potential constructed in this way reproduces accurately the AIMD trajectories, both classical and quantum (path integral), in extended and finite systems, at a cost that scales linearly with system size and is always several orders of magnitude lower than that of equivalent AIMD simulations. Although highly efficient, the original Deep Potential model satisfies the extensive and symmetry-invariant properties of a potential energy model at the price of introducing discontinuities in the model. This has negligible influence on a trajectory from canonical sampling but might not be sufficient for calculations of dynamical and mechanical properties. These points motivated us to develop the Deep Potential-Smooth Edition ([DeepPot-SE][3]) model, which replaces the non-smooth local frame with a smooth and adaptive embedding network. DeepPot-SE shows great ability in modeling many kinds of systems that are of interest in the fields of physics, chemistry, biology, and materials science. In addition to building up potential energy models, DeePMD-kit can also be used to build up coarse-grained models. In these models, the quantity that we want to parameterize is the free energy, or the coarse-grained potential, of the coarse-grained particles. See the [DeePCG paper][4] for more details. -# Download and install - -Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). - -DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker). - -One may manually install DeePMD-kit by following the instructions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS, i-PI or GROMACS. - - -# Use DeePMD-kit - -A quick start on using DeePMD-kit can be found as follows: - -- [Prepare data with dpdata](doc/data/dpdata.md) -- [Training a model](doc/train/training.md) -- [Freeze a model](doc/freeze/freeze.md) -- [Test a model](doc/test/test.md) -- [Run MD with LAMMPS](doc/third-party/lammps.md) - -A full [document](doc/train/train-input-auto.rst) on options in the training input script is available. - -# Advanced - -- [Installation](doc/install/index.md) - - [Easy install](doc/install/easy-install.md) - - [Install from source code](doc/install/install-from-source.md) - - [Install LAMMPS](doc/install/install-lammps.md) - - [Install i-PI](doc/install/install-ipi.md) - - [Install GROMACS](doc/install/install-gromacs.md) - - [Building conda packages](doc/install/build-conda.md) -- [Data](doc/data/index.md) - - [System](doc/data/system.md) - - [Formats of a system](doc/data/data-conv.md) - - [Prepare data with dpdata](doc/data/dpdata.md) -- [Model](doc/model/index.md) - - [Overall](doc/model/overall.md) - - [Descriptor `"se_e2_a"`](doc/model/train-se-e2-a.md) - - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md) - - [Descriptor `"se_e3"`](doc/model/train-se-e3.md) - - [Descriptor `"se_atten"`](doc/model/train-se-atten.md) - - [Descriptor `"hybrid"`](doc/model/train-hybrid.md) - - [Descriptor `sel`](doc/model/sel.md) - - [Fit energy](doc/model/train-energy.md) - - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md) - - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md) - - [Deep potential long-range](doc/model/dplr.md) - - [Deep Potential - Range Correction (DPRc)](doc/model/dprc.md) -- [Training](doc/train/index.md) - - [Training a model](doc/train/training.md) - - [Advanced options](doc/train/training-advanced.md) - - [Parallel training](doc/train/parallel-training.md) - - [Multi-task training](doc/train/multi-task-training.md) - - [TensorBoard Usage](doc/train/tensorboard.md) - - [Known limitations of using GPUs](doc/train/gpu-limitations.md) - - [Training Parameters](doc/train-input-auto.rst) -- [Freeze and Compress](doc/freeze/index.rst) - - [Freeze a model](doc/freeze/freeze.md) - - [Compress a model](doc/freeze/compress.md) -- [Test](doc/test/index.rst) - - [Test a model](doc/test/test.md) - - [Calculate Model Deviation](doc/test/model-deviation.md) -- [Inference](doc/inference/index.rst) - - [Python interface](doc/inference/python.md) - - [C++ interface](doc/inference/cxx.md) -- [Integrate with third-party packages](doc/third-party/index.rst) - - [Use deep potential with ASE](doc/third-party/ase.md) - - [Run MD with LAMMPS](doc/third-party/lammps.md) - - [LAMMPS commands](doc/third-party/lammps-command.md) - - [Run path-integral MD with i-PI](doc/third-party/ipi.md) - - [Run MD with GROMACS](doc/third-party/gromacs.md) - - [Interfaces out of DeePMD-kit](doc/third-party/out-of-deepmd-kit.md) -- [Use NVNMD](doc/nvnmd/index.md) - -# Code structure +#### v1 + +- Code refactor to make it highly modularized. +- GPU support for descriptors. + +#### v2 + +- Model compression. Accelerate the efficiency of model inference 4-15 times. +- New descriptors. Including `se_e2_r`, `se_e3`, and `se_atten` (DPA-1). +- Hybridization of descriptors. Hybrid descriptor constructed from the concatenation of several descriptors. +- Atom type embedding. Enable atom-type embedding to decline training complexity and refine performance. +- Training and inference of the dipole (vector) and polarizability (matrix). +- Split of training and validation dataset. +- Optimized training on GPUs, including CUDA and ROCm. +- Non-von-Neumann. +- C API to interface with the third-party packages. + +See [our v2 paper](https://doi.org/10.1063/5.0155600) for details of all features until v2.2.3. + +#### v3 + +- Multiple backends supported. Add PyTorch and JAX backends. +- The DPA2 and DPA3 models. +- Plugin mechanisms for external models. + +See [our v3 paper](https://doi.org/10.1021/acs.jctc.5c00340) for details of all features until v3.0. + +## Install and use DeePMD-kit + +Just copy and paste in 1s, and let it run. + +```sh +curl -fsSL https://dp1s.deepmodeling.com | bash +``` + +Please read the [online documentation](https://deepmd.readthedocs.io/) for details and alternative installation methods. + +Then, read on for a brief overview of the usage of DeePMD-kit. You may start with the first step: + +```sh +dp +``` + +## Code structure The code is organized as follows: -* `data/raw`: tools manipulating the raw data files. -* `examples`: examples. -* `deepmd`: DeePMD-kit python modules. -* `source/api_cc`: source code of DeePMD-kit C++ API. -* `source/ipi`: source code of i-PI client. -* `source/lib`: source code of DeePMD-kit library. -* `source/lmp`: source code of Lammps module. -* `source/gmx`: source code of Gromacs plugin. -* `source/op`: TensorFlow op implementation. working with the library. - - -# Troubleshooting - -- [Model compatibility](doc/troubleshooting/model_compatability.md) -- [Installation](doc/troubleshooting/installation.md) -- [The temperature undulates violently during the early stages of MD](doc/troubleshooting/md_energy_undulation.md) -- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](doc/troubleshooting/md_version_compatibility.md) -- [Do we need to set rcut < half boxsize?](doc/troubleshooting/howtoset_rcut.md) -- [How to set sel?](doc/troubleshooting/howtoset_sel.md) -- [How to control the parallelism of a job?](doc/troubleshooting/howtoset_num_nodes.md) -- [How to tune Fitting/embedding-net size?](doc/troubleshooting/howtoset_netsize.md) -- [Why does a model have low precision?](doc/troubleshooting/precision.md) +- `examples`: examples. +- `deepmd`: DeePMD-kit python modules. +- `source/lib`: source code of the core library. +- `source/op`: Operator (OP) implementation. +- `source/api_cc`: source code of DeePMD-kit C++ API. +- `source/api_c`: source code of the C API. +- `source/nodejs`: source code of the Node.js API. +- `source/ipi`: source code of i-PI client. +- `source/lmp`: source code of LAMMPS module. +- `source/gmx`: source code of Gromacs plugin. # Contributing See [DeePMD-kit Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓 - [1]: https://arxiv.org/abs/1707.01478 [2]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 [3]: https://arxiv.org/abs/1805.09003 diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/backend/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/backend/dp_backend.py b/backend/dp_backend.py index 97fa1578c7..e32d5db38b 100644 --- a/backend/dp_backend.py +++ b/backend/dp_backend.py @@ -1,14 +1,19 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later """A PEP-517 backend to find TensorFlow.""" -from typing import ( - List, -) -from find_tensorflow import ( +import os + +from scikit_build_core import build as _orig + +from .find_pytorch import ( + find_pytorch, +) +from .find_tensorflow import ( find_tensorflow, ) - -# TODO: switch to scikit_build_core after it is available -from setuptools import build_meta as _orig +from .read_env import ( + set_scikit_build_env, +) __all__ = [ "build_sdist", @@ -19,23 +24,40 @@ ] -def __dir__() -> List[str]: +def __dir__() -> list[str]: return __all__ +set_scikit_build_env() + prepare_metadata_for_build_wheel = _orig.prepare_metadata_for_build_wheel build_wheel = _orig.build_wheel build_sdist = _orig.build_sdist get_requires_for_build_sdist = _orig.get_requires_for_build_sdist +prepare_metadata_for_build_editable = _orig.prepare_metadata_for_build_editable +build_editable = _orig.build_editable def get_requires_for_build_wheel( config_settings: dict, -) -> List[str]: - return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1] - - -# TODO: export get_requires_for_build_editable, prepare_metadata_for_build_editable, build_editable -# after scikit-build is ready -# See https://github.com/scikit-build/scikit-build/issues/740 -# Now we use the legacy-editable mode +) -> list[str]: + if os.environ.get("CIBUILDWHEEL", "0") == "1": + cibw_deps = ["mpich"] + else: + cibw_deps = [] + return ( + _orig.get_requires_for_build_wheel(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + + cibw_deps + ) + + +def get_requires_for_build_editable( + config_settings: dict, +) -> list[str]: + return ( + _orig.get_requires_for_build_editable(config_settings) + + find_tensorflow()[1] + + find_pytorch()[1] + ) diff --git a/backend/dynamic_metadata.py b/backend/dynamic_metadata.py new file mode 100644 index 0000000000..e7763cac84 --- /dev/null +++ b/backend/dynamic_metadata.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import sys +from pathlib import ( + Path, +) + +from .find_pytorch import ( + get_pt_requirement, +) +from .find_tensorflow import ( + get_tf_requirement, +) +from .read_env import ( + get_argument_from_env, +) + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + +__all__ = ["dynamic_metadata"] + + +def __dir__() -> list[str]: + return __all__ + + +def dynamic_metadata( + field: str, + settings: dict[str, object] | None = None, +): + assert field in ["optional-dependencies", "entry-points", "scripts"] + _, _, find_libpython_requires, extra_scripts, tf_version, pt_version = ( + get_argument_from_env() + ) + with Path("pyproject.toml").open("rb") as f: + pyproject = tomllib.load(f) + + if field == "scripts": + return { + **pyproject["tool"]["deepmd_build_backend"]["scripts"], + **extra_scripts, + } + elif field == "optional-dependencies": + optional_dependencies = pyproject["tool"]["deepmd_build_backend"][ + "optional-dependencies" + ] + optional_dependencies["lmp"].extend(find_libpython_requires) + optional_dependencies["ipi"].extend(find_libpython_requires) + return { + **optional_dependencies, + **get_tf_requirement(tf_version), + **get_pt_requirement(pt_version), + } diff --git a/backend/find_paddle.py b/backend/find_paddle.py new file mode 100644 index 0000000000..c9b0319504 --- /dev/null +++ b/backend/find_paddle.py @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import importlib +import os +import site +from functools import ( + lru_cache, +) +from importlib.machinery import ( + FileFinder, +) +from importlib.util import ( + find_spec, +) +from pathlib import ( + Path, +) +from sysconfig import ( + get_path, +) + + +@lru_cache +def find_paddle() -> tuple[str | None, list[str]]: + """Find PaddlePadle library. + + Tries to find PaddlePadle in the order of: + + 1. Environment variable `PADDLE_ROOT` if set + 2. The current Python environment. + 3. user site packages directory if enabled + 4. system site packages directory (purelib) + + Considering the default PaddlePadle package still uses old CXX11 ABI, we + cannot install it automatically. + + Returns + ------- + str, optional + PaddlePadle library path if found. + list of str + Paddle requirement if not found. Empty if found. + """ + if os.environ.get("DP_ENABLE_PADDLE", "0") == "0": + return None, [] + requires = [] + pd_spec = None + + if (pd_spec is None or not pd_spec) and os.environ.get("PADDLE_ROOT") is not None: + site_packages = Path(os.environ.get("PADDLE_ROOT")).parent.absolute() + pd_spec = FileFinder(str(site_packages)).find_spec("paddle") + + # get paddle spec + # note: isolated build will not work for backend + if pd_spec is None or not pd_spec: + pd_spec = find_spec("paddle") + + if not pd_spec and site.ENABLE_USER_SITE: + # first search TF from user site-packages before global site-packages + site_packages = site.getusersitepackages() + if site_packages: + pd_spec = FileFinder(site_packages).find_spec("paddle") + + if not pd_spec: + # purelib gets site-packages path + site_packages = get_path("purelib") + if site_packages: + pd_spec = FileFinder(site_packages).find_spec("paddle") + + # get install dir from spec + try: + pd_install_dir = pd_spec.submodule_search_locations[0] # type: ignore + # AttributeError if ft_spec is None + # TypeError if submodule_search_locations are None + # IndexError if submodule_search_locations is an empty list + except (AttributeError, TypeError, IndexError): + pd_install_dir = None + requires.extend(get_pd_requirement()["paddle"]) + return pd_install_dir, requires + + +@lru_cache +def get_pd_requirement(pd_version: str = "") -> dict: + """Get PaddlePadle requirement when Paddle is not installed. + + If pd_version is not given and the environment variable `PADDLE_VERSION` is set, use it as the requirement. + + Parameters + ---------- + pd_version : str, optional + Paddle version + + Returns + ------- + dict + PaddlePadle requirement. + """ + if pd_version is None: + return {"paddle": []} + if pd_version == "": + pd_version = os.environ.get("PADDLE_VERSION", "") + + return { + "paddle": [ + "paddlepaddle>=3.0.0", + ], + } + + +@lru_cache +def get_pd_version(pd_path: str | Path | None) -> str: + """Get Paddle version from a Paddle Python library path. + + Parameters + ---------- + pd_path : str or Path + Paddle Python library path, e.g. "/python3.10/site-packages/paddle/" + + Returns + ------- + str + version + """ + if pd_path is None or pd_path == "": + return "" + version_file = Path(pd_path) / "version" / "__init__.py" + spec = importlib.util.spec_from_file_location("paddle.version", version_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.full_version diff --git a/backend/find_pytorch.py b/backend/find_pytorch.py new file mode 100644 index 0000000000..d50f57bf5e --- /dev/null +++ b/backend/find_pytorch.py @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import importlib +import os +import site +from functools import ( + lru_cache, +) +from importlib.machinery import ( + FileFinder, +) +from importlib.util import ( + find_spec, +) +from pathlib import ( + Path, +) +from sysconfig import ( + get_path, +) + +from packaging.specifiers import ( + SpecifierSet, +) +from packaging.version import ( + Version, +) + +from .utils import ( + read_dependencies_from_dependency_group, +) + + +@lru_cache +def find_pytorch() -> tuple[str | None, list[str]]: + """Find PyTorch library. + + Tries to find PyTorch in the order of: + + 1. Environment variable `PYTORCH_ROOT` if set + 2. The current Python environment. + 3. user site packages directory if enabled + 4. system site packages directory (purelib) + + Considering the default PyTorch package still uses old CXX11 ABI, we + cannot install it automatically. + + Returns + ------- + str, optional + PyTorch library path if found. + list of str + TensorFlow requirement if not found. Empty if found. + """ + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "0": + return None, [] + requires = [] + pt_spec = None + + if (pt_spec is None or not pt_spec) and os.environ.get("PYTORCH_ROOT") is not None: + site_packages = Path(os.environ.get("PYTORCH_ROOT")).parent.absolute() + pt_spec = FileFinder(str(site_packages)).find_spec("torch") + + # get pytorch spec + # note: isolated build will not work for backend + if pt_spec is None or not pt_spec: + pt_spec = find_spec("torch") + + if not pt_spec and site.ENABLE_USER_SITE: + # first search TF from user site-packages before global site-packages + site_packages = site.getusersitepackages() + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + if not pt_spec: + # purelib gets site-packages path + site_packages = get_path("purelib") + if site_packages: + pt_spec = FileFinder(site_packages).find_spec("torch") + + # get install dir from spec + try: + pt_install_dir = pt_spec.submodule_search_locations[0] # type: ignore + # AttributeError if ft_spec is None + # TypeError if submodule_search_locations are None + # IndexError if submodule_search_locations is an empty list + except (AttributeError, TypeError, IndexError): + pt_install_dir = None + requires.extend(get_pt_requirement()["torch"]) + return pt_install_dir, requires + + +@lru_cache +def get_pt_requirement(pt_version: str = "") -> dict: + """Get PyTorch requirement when PT is not installed. + + If pt_version is not given and the environment variable `PYTORCH_VERSION` is set, use it as the requirement. + + Parameters + ---------- + pt_version : str, optional + PT version + + Returns + ------- + dict + PyTorch requirement. + """ + if pt_version is None: + return {"torch": []} + cibw_requirement = [] + if os.environ.get("CIBUILDWHEEL", "0") == "1": + cuda_version = os.environ.get("CUDA_VERSION", "12.2") + if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): + # CUDA 12.2, cudnn 9 + # or CPU builds + cibw_requirement = read_dependencies_from_dependency_group( + "pin_pytorch_cpu" + ) + else: + raise RuntimeError("Unsupported CUDA version") from None + if pt_version == "": + pt_version = os.environ.get("PYTORCH_VERSION", "") + if os.environ.get("CIBUILDWHEEL", "0") == "1": + # PyTorch OP library is built against mpich + mpi_requirement = ["mpich"] + else: + mpi_requirement = [] + + return { + "torch": [ + # uv has different local version behaviors, i.e. `==2.3.1` cannot match `==2.3.1+cpu` + # https://github.com/astral-sh/uv/blob/main/PIP_COMPATIBILITY.md#local-version-identifiers + # luckily, .* (prefix matching) defined in PEP 440 can match any local version + # https://peps.python.org/pep-0440/#version-matching + f"torch=={Version(pt_version).base_version}.*" + if pt_version != "" + # https://github.com/pytorch/pytorch/commit/7e0c26d4d80d6602aed95cb680dfc09c9ce533bc + else "torch>=2.1.0", + *mpi_requirement, + *cibw_requirement, + ], + } + + +@lru_cache +def get_pt_version(pt_path: str | Path | None) -> str: + """Get TF version from a TF Python library path. + + Parameters + ---------- + pt_path : str or Path + PT Python library path + + Returns + ------- + str + version + """ + if pt_path is None or pt_path == "": + return "" + version_file = Path(pt_path) / "version.py" + spec = importlib.util.spec_from_file_location("torch.version", version_file) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.__version__ diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 52c7fc5286..b1528f1a47 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -1,5 +1,10 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later import os +import re import site +from functools import ( + lru_cache, +) from importlib.machinery import ( FileFinder, ) @@ -12,19 +17,18 @@ from sysconfig import ( get_path, ) -from typing import ( - List, - Optional, - Tuple, - Union, -) from packaging.specifiers import ( SpecifierSet, ) +from .utils import ( + read_dependencies_from_dependency_group, +) + -def find_tensorflow() -> Tuple[Optional[str], List[str]]: +@lru_cache +def find_tensorflow() -> tuple[str | None, list[str]]: """Find TensorFlow library. Tries to find TensorFlow in the order of: @@ -42,12 +46,21 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: list of str TensorFlow requirement if not found. Empty if found. """ + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "0": + return None, [] requires = [] tf_spec = None - if os.environ.get("TENSORFLOW_ROOT") is not None: + + if (tf_spec is None or not tf_spec) and os.environ.get( + "TENSORFLOW_ROOT" + ) is not None: site_packages = Path(os.environ.get("TENSORFLOW_ROOT")).parent.absolute() tf_spec = FileFinder(str(site_packages)).find_spec("tensorflow") + if tf_spec is None: + raise RuntimeError( + f"cannot find TensorFlow under TENSORFLOW_ROOT {os.environ.get('TENSORFLOW_ROOT')}" + ) # get tensorflow spec # note: isolated build will not work for backend @@ -73,12 +86,24 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # TypeError if submodule_search_locations are None # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): - requires.extend(get_tf_requirement()["cpu"]) + tf_version = "" + if os.environ.get("CIBUILDWHEEL", "0") == "1": + cuda_version = os.environ.get("CUDA_VERSION", "12.2") + if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): + # CUDA 12.2, cudnn 9 + # or CPU builds + requires.extend( + read_dependencies_from_dependency_group("pin_tensorflow_cpu") + ) + else: + raise RuntimeError("Unsupported CUDA version") from None + requires.extend(get_tf_requirement(tf_version)["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None return tf_install_dir, requires +@lru_cache def get_tf_requirement(tf_version: str = "") -> dict: """Get TensorFlow requirement (CPU) when TF is not installed. @@ -94,47 +119,84 @@ def get_tf_requirement(tf_version: str = "") -> dict: dict TensorFlow requirement, including cpu and gpu. """ + if tf_version is None: + return { + "cpu": [], + "gpu": [], + "mpi": [], + } if tf_version == "": tf_version = os.environ.get("TENSORFLOW_VERSION", "") + extra_requires = [] + extra_select = {} + if not (tf_version == "" or tf_version in SpecifierSet(">=2.12", prereleases=True)): + extra_requires.append("protobuf<3.20") + # keras 3 is not compatible with tf.compat.v1 + # 2024/04/24: deepmd.tf doesn't import tf.keras any more + + if tf_version == "" or tf_version in SpecifierSet(">=1.15", prereleases=True): + extra_select["mpi"] = [ + "horovod", + "mpi4py", + ] + else: + extra_select["mpi"] = [] + if tf_version == "": return { "cpu": [ - "tensorflow-cpu; platform_machine!='aarch64'", - "tensorflow; platform_machine=='aarch64'", + "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", + "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", + # https://github.com/tensorflow/tensorflow/issues/61830 + # Since TF 2.20, not all symbols are exported to the public API. + "tensorflow-cpu!=2.15.*,<2.20; platform_system=='Windows'", + # https://github.com/h5py/h5py/issues/2408 + "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", + *extra_requires, ], "gpu": [ - "tensorflow; platform_machine!='aarch64'", - "tensorflow; platform_machine=='aarch64'", + "tensorflow", + "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", + # See above. + "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", + *extra_requires, ], + **extra_select, } - elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet( - ">=2.0,<2.1" - ): + elif tf_version in SpecifierSet( + "<1.15", prereleases=True + ) or tf_version in SpecifierSet(">=2.0,<2.1", prereleases=True): return { "cpu": [ - f"tensorflow=={tf_version}; platform_machine!='aarch64'", - f"tensorflow=={tf_version}; platform_machine=='aarch64'", + f"tensorflow=={tf_version}", + *extra_requires, ], "gpu": [ f"tensorflow-gpu=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'", + *extra_requires, ], + **extra_select, } else: return { "cpu": [ - f"tensorflow-cpu=={tf_version}; platform_machine!='aarch64'", - f"tensorflow=={tf_version}; platform_machine=='aarch64'", + f"tensorflow-cpu=={tf_version}; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", + f"tensorflow=={tf_version}; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", + *extra_requires, ], "gpu": [ - f"tensorflow=={tf_version}; platform_machine!='aarch64'", - f"tensorflow=={tf_version}; platform_machine=='aarch64'", + f"tensorflow=={tf_version}", + "tensorflow-metal; platform_machine=='arm64' and platform_system == 'Darwin'", + *extra_requires, ], + **extra_select, } -def get_tf_version(tf_path: Union[str, Path]) -> str: +@lru_cache +def get_tf_version(tf_path: str | Path | None) -> str: """Get TF version from a TF Python library path. Parameters @@ -163,6 +225,22 @@ def get_tf_version(tf_path: Union[str, Path]) -> str: patch = line.split()[-1] elif line.startswith("#define TF_VERSION_SUFFIX"): suffix = line.split()[-1].strip('"') + if None in (major, minor, patch): + # since TF 2.20.0, version information is no more contained in version.h + # try to read version from tools/pip_package/setup.py + # _VERSION = '2.20.0' + setup_file = Path(tf_path) / "tools" / "pip_package" / "setup.py" + if setup_file.exists(): + with open(setup_file) as f: + for line in f: + # parse with regex + match = re.search( + r"_VERSION[ \t]*=[ \t]*'(\d+)\.(\d+)\.(\d+)([a-zA-Z0-9]*)?'", + line, + ) + if match: + major, minor, patch, suffix = match.groups() + break if None in (major, minor, patch): raise RuntimeError("Failed to read TF version") return ".".join((major, minor, patch)) + suffix diff --git a/backend/read_env.py b/backend/read_env.py new file mode 100644 index 0000000000..8a173513f9 --- /dev/null +++ b/backend/read_env.py @@ -0,0 +1,139 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Read environment variables to configure the build.""" + +import os +from functools import ( + lru_cache, +) + +from packaging.version import ( + Version, +) + +from .find_pytorch import ( + find_pytorch, + get_pt_version, +) +from .find_tensorflow import ( + find_tensorflow, + get_tf_version, +) + + +@lru_cache +def get_argument_from_env() -> tuple[str, list, list, dict, str, str]: + """Get the arguments from environment variables. + + The environment variables are assumed to be not changed during the build. + + Returns + ------- + str + The minimum required CMake version. + list of str + The CMake arguments. + list of str + The requirements for the build. + dict + The extra scripts to be installed. + str + The TensorFlow version. + str + The PyTorch version. + """ + cmake_args = [] + extra_scripts = {} + # get variant option from the environment variables, available: cpu, cuda, rocm + dp_variant = os.environ.get("DP_VARIANT", "cpu").lower() + if dp_variant == "cpu" or dp_variant == "": + cmake_minimum_required_version = "3.25.2" + elif dp_variant == "cuda": + cmake_minimum_required_version = "3.25.2" + cmake_args.append("-DUSE_CUDA_TOOLKIT:BOOL=TRUE") + cuda_root = os.environ.get("CUDAToolkit_ROOT") + if cuda_root: + cmake_args.append(f"-DCUDAToolkit_ROOT:STRING={cuda_root}") + elif dp_variant == "rocm": + cmake_minimum_required_version = "3.25.2" + cmake_args.append("-DUSE_ROCM_TOOLKIT:BOOL=TRUE") + rocm_root = os.environ.get("ROCM_ROOT") + if not rocm_root: + rocm_root = os.environ.get("ROCM_PATH") + if rocm_root: + cmake_args.append(f"-DCMAKE_HIP_COMPILER_ROCM_ROOT:STRING={rocm_root}") + hipcc_flags = os.environ.get("HIP_HIPCC_FLAGS") + if hipcc_flags is not None: + os.environ["HIPFLAGS"] = os.environ.get("HIPFLAGS", "") + " " + hipcc_flags + else: + raise RuntimeError(f"Unsupported DP_VARIANT option: {dp_variant}") + + if os.environ.get("DP_BUILD_TESTING", "0") == "1": + cmake_args.append("-DBUILD_TESTING:BOOL=TRUE") + if os.environ.get("DP_ENABLE_NATIVE_OPTIMIZATION", "0") == "1": + cmake_args.append("-DENABLE_NATIVE_OPTIMIZATION:BOOL=TRUE") + dp_lammps_version = os.environ.get("DP_LAMMPS_VERSION", "") + dp_ipi = os.environ.get("DP_ENABLE_IPI", "0") + if dp_lammps_version != "" or dp_ipi == "1": + cmake_args.append("-DBUILD_CPP_IF:BOOL=TRUE") + cmake_args.append("-DUSE_TF_PYTHON_LIBS:BOOL=TRUE") + else: + cmake_args.append("-DBUILD_CPP_IF:BOOL=FALSE") + + if dp_lammps_version != "": + cmake_args.append(f"-DLAMMPS_VERSION={dp_lammps_version}") + if dp_ipi == "1": + cmake_args.append("-DENABLE_IPI:BOOL=TRUE") + extra_scripts["dp_ipi"] = "deepmd.entrypoints.ipi:dp_ipi" + + if os.environ.get("DP_ENABLE_TENSORFLOW", "1") == "1": + tf_install_dir, _ = find_tensorflow() + tf_version = get_tf_version(tf_install_dir) + if tf_version == "" or Version(tf_version) >= Version("2.12"): + find_libpython_requires = [] + else: + find_libpython_requires = ["find_libpython"] + cmake_args.extend( + [ + "-DENABLE_TENSORFLOW=ON", + f"-DTENSORFLOW_VERSION={tf_version}", + f"-DTENSORFLOW_ROOT:PATH={tf_install_dir}", + ] + ) + else: + find_libpython_requires = [] + cmake_args.append("-DENABLE_TENSORFLOW=OFF") + tf_version = None + + if os.environ.get("DP_ENABLE_PYTORCH", "0") == "1": + pt_install_dir, _ = find_pytorch() + pt_version = get_pt_version(pt_install_dir) + cmake_args.extend( + [ + "-DENABLE_PYTORCH=ON", + f"-DCMAKE_PREFIX_PATH={pt_install_dir}", + ] + ) + else: + cmake_args.append("-DENABLE_PYTORCH=OFF") + pt_version = None + + cmake_args = [ + "-DBUILD_PY_IF:BOOL=TRUE", + f"-DCIBUILDWHEEL={os.environ.get('CIBUILDWHEEL', '0')}", + *cmake_args, + ] + return ( + cmake_minimum_required_version, + cmake_args, + find_libpython_requires, + extra_scripts, + tf_version, + pt_version, + ) + + +def set_scikit_build_env() -> None: + """Set scikit-build environment variables before executing scikit-build.""" + cmake_minimum_required_version, cmake_args, _, _, _, _ = get_argument_from_env() + os.environ["SKBUILD_CMAKE_MINIMUM_VERSION"] = cmake_minimum_required_version + os.environ["SKBUILD_CMAKE_ARGS"] = ";".join(cmake_args) diff --git a/backend/utils.py b/backend/utils.py new file mode 100644 index 0000000000..0769879d24 --- /dev/null +++ b/backend/utils.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import sys +from pathlib import ( + Path, +) + +from dependency_groups import ( + resolve, +) + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + + +def read_dependencies_from_dependency_group(group: str) -> tuple[str, ...]: + """ + Reads dependencies from a dependency group. + + Parameters + ---------- + group : str + The name of the dependency group. + + Returns + ------- + tuple[str, ...] + A tuple of dependencies in the specified group. + """ + with Path("pyproject.toml").open("rb") as f: + pyproject = tomllib.load(f) + + groups = pyproject["dependency-groups"] + + return resolve(groups, group) diff --git a/codecov.yml b/codecov.yml index 54ab488cf8..5b700bdddd 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,5 +1,6 @@ ignore: - "source/**/tests" + - "source/3rdparty" coverage: status: project: @@ -39,3 +40,11 @@ component_management: name: LAMMPS paths: - source/lmp/** + - component_id: module_ipi + name: i-PI + paths: + - source/ipi/** +codecov: + notify: + # 12 Python + 2 C++ + after_n_builds: 14 diff --git a/data/json/json2yaml.py b/data/json/json2yaml.py index 6c97771bec..8a07b4a6eb 100644 --- a/data/json/json2yaml.py +++ b/data/json/json2yaml.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: LGPL-3.0-or-later import argparse import json from pathlib import ( @@ -12,14 +13,14 @@ import yaml -def _main(): +def _main() -> None: parser = argparse.ArgumentParser( description="convert json config file to yaml", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # get all json files in dir - jsons = [p for p in Path.cwd().glob("*.json")] + jsons = list(Path.cwd().glob("*.json")) # use the newest as autosuggestion jsons.sort(key=lambda x: x.stat().st_mtime, reverse=True) jfile = jsons[0] diff --git a/data/raw/copy_raw.py b/data/raw/copy_raw.py index 073b0fbfd7..a0ea45277e 100755 --- a/data/raw/copy_raw.py +++ b/data/raw/copy_raw.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: LGPL-3.0-or-later import argparse import os import os.path @@ -7,7 +8,7 @@ import numpy as np -def copy(in_dir, out_dir, ncopies=[1, 1, 1]): +def copy(in_dir, out_dir, ncopies=[1, 1, 1]) -> None: has_energy = os.path.isfile(in_dir + "/energy.raw") has_force = os.path.isfile(in_dir + "/force.raw") has_virial = os.path.isfile(in_dir + "/virial.raw") @@ -70,7 +71,7 @@ def copy(in_dir, out_dir, ncopies=[1, 1, 1]): np.savetxt(out_dir + "/ncopies.raw", ncopies, fmt="%d") -def _main(): +def _main() -> None: parser = argparse.ArgumentParser(description="parse copy raw args") parser.add_argument("INPUT", default=".", help="input dir of raw files") parser.add_argument("OUTPUT", default=".", help="output dir of copied raw files") @@ -84,7 +85,7 @@ def _main(): ) args = parser.parse_args() - print("# copy the system by %s copies" % args.ncopies) + print(f"# copy the system by {args.ncopies} copies") # noqa: T201 assert np.all( np.array(args.ncopies, dtype=int) >= np.array([1, 1, 1], dtype=int) ), "number of copies should be larger than or equal to 1" diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh index 1752f8d641..3971a9c279 100755 --- a/data/raw/raw_to_set.sh +++ b/data/raw/raw_to_set.sh @@ -41,69 +41,69 @@ for ii in $(seq 0 $nset_1); do test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw cd set.$pi - python -c 'import numpy as np; data = np.loadtxt("box.raw" , ndmin = 2); data = data.astype (np.float32); np.save ("box", data)' - python -c 'import numpy as np; data = np.loadtxt("coord.raw" , ndmin = 2); data = data.astype (np.float32); np.save ("coord", data)' + python -c 'import numpy as np; data = np.loadtxt("box.raw" , ndmin = 2); data = data.astype (np.float64); np.save ("box", data)' + python -c 'import numpy as np; data = np.loadtxt("coord.raw" , ndmin = 2); data = data.astype (np.float64); np.save ("coord", data)' python -c \ 'import numpy as np; import os.path; if os.path.isfile("energy.raw"): data = np.loadtxt("energy.raw"); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("energy", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("force.raw" ): data = np.loadtxt("force.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("force", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("virial.raw"): data = np.loadtxt("virial.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("virial", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("atom_ener.raw"): data = np.loadtxt("atom_ener.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("atom_ener", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("fparam.raw"): data = np.loadtxt("fparam.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("fparam", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("dipole.raw"): data = np.loadtxt("dipole.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("dipole", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("polarizability.raw"): data = np.loadtxt("polarizability.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("polarizability", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("atomic_dipole.raw"): data = np.loadtxt("atomic_dipole.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("atomic_dipole", data) ' python -c \ 'import numpy as np; import os.path; if os.path.isfile("atomic_polarizability.raw"): data = np.loadtxt("atomic_polarizability.raw", ndmin = 2); - data = data.astype (np.float32); + data = data.astype (np.float64); np.save ("atomic_polarizability", data) ' rm *.raw diff --git a/data/raw/shuffle_raw.py b/data/raw/shuffle_raw.py index 2c99188aa4..690307cc2c 100755 --- a/data/raw/shuffle_raw.py +++ b/data/raw/shuffle_raw.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +# SPDX-License-Identifier: LGPL-3.0-or-later import argparse import os @@ -29,14 +30,14 @@ def detect_raw(path): return raws -def _main(): +def _main() -> None: args = _parse_args() raws = args.raws inpath = args.INPUT outpath = args.OUTPUT if not os.path.isdir(inpath): - print("# no input dir " + inpath + ", exit") + print("# no input dir " + inpath + ", exit") # noqa: T201 return if not os.path.isdir(outpath): @@ -46,16 +47,16 @@ def _main(): raws = detect_raw(inpath) if len(raws) == 0: - print("# no file to shuffle, exit") + print("# no file to shuffle, exit") # noqa: T201 return assert "box.raw" in raws tmp = np.loadtxt(os.path.join(inpath, "box.raw")) tmp = np.reshape(tmp, [-1, 9]) nframe = tmp.shape[0] - print(nframe) + print(nframe) # noqa: T201 - print( + print( # noqa: T201 "# will shuffle raw files " + str(raws) + " in dir " @@ -68,7 +69,8 @@ def _main(): tmp = np.reshape(tmp, [nframe, -1]) nframe = tmp.shape[0] idx = np.arange(nframe) - np.random.shuffle(idx) + rng = np.random.default_rng() + rng.shuffle(idx) for ii in raws: data = np.loadtxt(inpath + "/" + ii) diff --git a/deepmd/.gitignore b/deepmd/.gitignore index b2b9057ea2..b2d7614637 100644 --- a/deepmd/.gitignore +++ b/deepmd/.gitignore @@ -1,4 +1,2 @@ -op/_*.py pkg_config run_config.ini -!op/__init__.py diff --git a/deepmd/__about__.py b/deepmd/__about__.py deleted file mode 100644 index d5cfca6473..0000000000 --- a/deepmd/__about__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "unknown" diff --git a/deepmd/__init__.py b/deepmd/__init__.py index d96d7eb4a4..bc351ee59b 100644 --- a/deepmd/__init__.py +++ b/deepmd/__init__.py @@ -1,60 +1,53 @@ -"""Root of the deepmd package, exposes all public classes and submodules.""" +# SPDX-License-Identifier: LGPL-3.0-or-later +"""DeePMD-kit is a package written in Python/C++, designed to +minimize the effort required to build deep learning-based model +of interatomic potential energy and force field and to perform +molecular dynamics (MD). -try: - from importlib import ( - metadata, - ) -except ImportError: # for Python<3.8 - import importlib_metadata as metadata - -import deepmd.utils.network as network - -from . import ( - cluster, - descriptor, - fit, - loss, - nvnmd, - utils, -) -from .env import ( - set_mkl, -) -from .infer import ( - DeepEval, - DeepPotential, -) -from .infer.data_modifier import ( - DipoleChargeModifier, +The top module (deepmd.__init__) should not import any third-party +modules for performance. +""" + +from typing import ( + TYPE_CHECKING, + Any, ) -set_mkl() +if TYPE_CHECKING: + from deepmd.infer import DeepPotential as DeepPotentialType try: - from ._version import version as __version__ + from deepmd._version import version as __version__ except ImportError: from .__about__ import ( __version__, ) -# load third-party plugins -try: - eps = metadata.entry_points(group="deepmd") -except TypeError: - eps = metadata.entry_points().get("deepmd", []) -for ep in eps: - ep.load() + +def DeepPotential(*args: Any, **kwargs: Any) -> "DeepPotentialType": + """Factory function that forwards to DeepEval (for compatibility + and performance). + + Parameters + ---------- + *args + positional arguments + **kwargs + keyword arguments + + Returns + ------- + DeepEval + potentials + """ + from deepmd.infer import ( + DeepPotential, + ) + + return DeepPotential(*args, **kwargs) + __all__ = [ - "__version__", - "descriptor", - "fit", - "loss", - "utils", - "cluster", - "network", - "DeepEval", "DeepPotential", - "DipoleChargeModifier", - "nvnmd", + "__version__", ] diff --git a/deepmd/__main__.py b/deepmd/__main__.py index 8e3011bc7b..a31379b5e3 100644 --- a/deepmd/__main__.py +++ b/deepmd/__main__.py @@ -1,6 +1,7 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later """Package dp entry point.""" -from .entrypoints.main import ( +from deepmd.main import ( main, ) diff --git a/deepmd/backend/__init__.py b/deepmd/backend/__init__.py new file mode 100644 index 0000000000..fef9526294 --- /dev/null +++ b/deepmd/backend/__init__.py @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Backends. + +Avoid directly importing third-party libraries in this module for performance. +""" + +# copy from dpdata +from importlib import ( + import_module, +) +from pathlib import ( + Path, +) + +from deepmd.utils.entry_point import ( + load_entry_point, +) + +PACKAGE_BASE = "deepmd.backend" +NOT_LOADABLE = ("__init__.py",) + +for module_file in Path(__file__).parent.glob("*.py"): + if module_file.name not in NOT_LOADABLE: + module_name = f".{module_file.stem}" + import_module(module_name, PACKAGE_BASE) + +load_entry_point("deepmd.backend") diff --git a/deepmd/backend/backend.py b/deepmd/backend/backend.py new file mode 100644 index 0000000000..58dcfe427d --- /dev/null +++ b/deepmd/backend/backend.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from abc import ( + abstractmethod, +) +from collections.abc import ( + Callable, +) +from enum import ( + Flag, + auto, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.utils.plugin import ( + PluginVariant, + make_plugin_registry, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +class Backend(PluginVariant, make_plugin_registry("backend")): + r"""General backend class. + + Examples + -------- + >>> @Backend.register("tf") + >>> @Backend.register("tensorflow") + >>> class TensorFlowBackend(Backend): + ... pass + """ + + @staticmethod + def get_backend(key: str) -> type["Backend"]: + """Get the backend by key. + + Parameters + ---------- + key : str + the key of a backend + + Returns + ------- + Backend + the backend + """ + return Backend.get_class_by_type(key) + + @staticmethod + def get_backends() -> dict[str, type["Backend"]]: + """Get all the registered backend names. + + Returns + ------- + list + all the registered backends + """ + return Backend.get_plugins() + + @staticmethod + def get_backends_by_feature( + feature: "Backend.Feature", + ) -> dict[str, type["Backend"]]: + """Get all the registered backend names with a specific feature. + + Parameters + ---------- + feature : Backend.Feature + the feature flag + + Returns + ------- + list + all the registered backends with the feature + """ + return { + key: backend + for key, backend in Backend.get_backends().items() + if backend.features & feature + } + + @staticmethod + def detect_backend_by_model(filename: str) -> type["Backend"]: + """Detect the backend of the given model file. + + Parameters + ---------- + filename : str + The model file name + """ + filename = str(filename).lower() + for backend in Backend.get_backends().values(): + for suffix in backend.suffixes: + if filename.endswith(suffix): + return backend + raise ValueError(f"Cannot detect the backend of the model file {filename}.") + + class Feature(Flag): + """Feature flag to indicate whether the backend supports certain features.""" + + ENTRY_POINT = auto() + """Support entry point hook.""" + DEEP_EVAL = auto() + """Support Deep Eval backend.""" + NEIGHBOR_STAT = auto() + """Support neighbor statistics.""" + IO = auto() + """Support IO hook.""" + + name: ClassVar[str] = "Unknown" + """The formal name of the backend. + + To be consistent, this name should be also registered in the plugin system.""" + + features: ClassVar[Feature] = Feature(0) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [] + """The supported suffixes of the saved model. + + The first element is considered as the default suffix.""" + + @abstractmethod + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + + @property + @abstractmethod + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + pass + + @property + @abstractmethod + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + pass + + @property + @abstractmethod + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + pass + + @property + @abstractmethod + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + pass + + @property + @abstractmethod + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + pass diff --git a/deepmd/backend/dpmodel.py b/deepmd/backend/dpmodel.py new file mode 100644 index 0000000000..31585aa7a6 --- /dev/null +++ b/deepmd/backend/dpmodel.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("dp") +@Backend.register("dpmodel") +@Backend.register("np") +@Backend.register("numpy") +class DPModelBackend(Backend): + """DPModel backend that uses NumPy as the reference implementation.""" + + name = "DPModel" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.DEEP_EVAL | Backend.Feature.NEIGHBOR_STAT | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".dp", ".yaml", ".yml"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return True + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + raise NotImplementedError(f"Unsupported backend: {self.name}") + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.dpmodel.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.dpmodel.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.dpmodel.utils.serialization import ( + load_dp_model, + ) + + return load_dp_model + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.dpmodel.utils.serialization import ( + save_dp_model, + ) + + return save_dp_model diff --git a/deepmd/backend/jax.py b/deepmd/backend/jax.py new file mode 100644 index 0000000000..9c0055b4f2 --- /dev/null +++ b/deepmd/backend/jax.py @@ -0,0 +1,125 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("jax") +class JAXBackend(Backend): + """JAX backend.""" + + name = "JAX" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.IO + | Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".hlo", ".jax", ".savedmodel"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("jax") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + raise NotImplementedError + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.jax.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.jax.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.jax.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.jax.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/paddle.py b/deepmd/backend/paddle.py new file mode 100644 index 0000000000..670130e86a --- /dev/null +++ b/deepmd/backend/paddle.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pd") +@Backend.register("paddle") +class PaddleBackend(Backend): + """Paddle backend.""" + + name = "Paddle" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".json", ".pd"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("paddle") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.pd.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.pd.infer.deep_eval import DeepEval as DeepEvalPD + + return DeepEvalPD + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.pd.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.pd.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.pd.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/pretrained.py b/deepmd/backend/pretrained.py new file mode 100644 index 0000000000..f6233fc3a0 --- /dev/null +++ b/deepmd/backend/pretrained.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) +from deepmd.pretrained.registry import ( + available_model_names, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pretrained") +class PretrainedBackend(Backend): + """Internal virtual backend for pretrained model-name alias dispatch. + + This backend is not intended to be selected explicitly by users as a real + compute backend (such as TensorFlow/PyTorch/Paddle/JAX). It only bridges + built-in pretrained model names into the regular deep-eval loading path. + + For convenience, all built-in pretrained model names are registered as + suffix-like aliases, so users can pass model names directly, e.g. + ``DeepPot("DPA-3.2-5M")``. + """ + + name = "Pretrained" + features: ClassVar[Backend.Feature] = Backend.Feature.DEEP_EVAL + suffixes: ClassVar[list[str]] = [ + *[model_name.lower() for model_name in available_model_names()], + ] + + def is_available(self) -> bool: + return True + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + raise NotImplementedError("Unsupported backend: pretrained") + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + from deepmd.pretrained.deep_eval import ( + PretrainedDeepEvalBackend, + ) + + return PretrainedDeepEvalBackend + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + raise NotImplementedError("Unsupported backend: pretrained") + + @property + def serialize_hook(self) -> Callable[[str], dict]: + raise NotImplementedError("Unsupported backend: pretrained") + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + raise NotImplementedError("Unsupported backend: pretrained") diff --git a/deepmd/backend/pt_expt.py b/deepmd/backend/pt_expt.py new file mode 100644 index 0000000000..b16a6f7f08 --- /dev/null +++ b/deepmd/backend/pt_expt.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pt-expt") +@Backend.register("pytorch-exportable") +class PyTorchExportableBackend(Backend): + """PyTorch exportable backend.""" + + name = "PyTorch-Exportable" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".pte", ".pt2"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("torch") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.pt_expt.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.pt_expt.infer.deep_eval import ( + DeepEval, + ) + + return DeepEval + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.pt_expt.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.pt_expt.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.pt_expt.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/pytorch.py b/deepmd/backend/pytorch.py new file mode 100644 index 0000000000..d155ef1f41 --- /dev/null +++ b/deepmd/backend/pytorch.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("pt") +@Backend.register("pytorch") +class PyTorchBackend(Backend): + """PyTorch backend.""" + + name = "PyTorch" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".pth", ".pt"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + return find_spec("torch") is not None + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.pt.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.pt.infer.deep_eval import DeepEval as DeepEvalPT + + return DeepEvalPT + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.pt.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.pt.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/backend/suffix.py b/deepmd/backend/suffix.py new file mode 100644 index 0000000000..3d1602da0e --- /dev/null +++ b/deepmd/backend/suffix.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import functools +import operator +from pathlib import ( + Path, +) + +from deepmd.backend.backend import ( + Backend, +) + + +def format_model_suffix( + filename: str, + feature: Backend.Feature | None = None, + preferred_backend: str | type["Backend"] | None = None, + strict_prefer: bool | None = None, +) -> str: + """Check and format the suffixes of a filename. + + When preferred_backend is not given, this method checks the suffix of the filename + is within the suffixes of the any backends (with the given feature) and doesn't do formatting. + When preferred_backend is given, strict_prefer must be given. + If strict_prefer is True and the suffix is not within the suffixes of the preferred backend, + or strict_prefer is False and the suffix is not within the suffixes of the any backend with the given feature, + the filename will be formatted with the preferred suffix of the preferred backend. + + Parameters + ---------- + filename : str + The filename to be formatted. + feature : Backend.Feature, optional + The feature of the backend, by default None + preferred_backend : str or type of Backend, optional + The preferred backend, by default None + strict_prefer : bool, optional + Whether to strictly prefer the preferred backend, by default None + + Returns + ------- + str + The formatted filename with the correct suffix. + + Raises + ------ + ValueError + When preferred_backend is not given and the filename is not supported by any backend. + """ + if preferred_backend is not None and strict_prefer is None: + raise ValueError("strict_prefer must be given when preferred_backend is given.") + if isinstance(preferred_backend, str): + preferred_backend = Backend.get_backend(preferred_backend) + if preferred_backend is not None and strict_prefer: + all_backends = [preferred_backend] + elif feature is None: + all_backends = list(Backend.get_backends().values()) + else: + all_backends = list(Backend.get_backends_by_feature(feature).values()) + + all_suffixes = set( + functools.reduce( + operator.iconcat, [backend.suffixes for backend in all_backends], [] + ) + ) + pp = Path(filename) + current_suffix = pp.suffix + if current_suffix not in all_suffixes: + if preferred_backend is not None: + return str(pp) + preferred_backend.suffixes[0] + raise ValueError(f"Unsupported model file format: {filename}") + return filename diff --git a/deepmd/backend/tensorflow.py b/deepmd/backend/tensorflow.py new file mode 100644 index 0000000000..244b4d9980 --- /dev/null +++ b/deepmd/backend/tensorflow.py @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from collections.abc import ( + Callable, +) +from importlib.util import ( + find_spec, +) +from typing import ( + TYPE_CHECKING, + ClassVar, +) + +from deepmd.backend.backend import ( + Backend, +) + +if TYPE_CHECKING: + from argparse import ( + Namespace, + ) + + from deepmd.infer.deep_eval import ( + DeepEvalBackend, + ) + from deepmd.utils.neighbor_stat import ( + NeighborStat, + ) + + +@Backend.register("tf") +@Backend.register("tensorflow") +class TensorFlowBackend(Backend): + """TensorFlow backend.""" + + name = "TensorFlow" + """The formal name of the backend.""" + features: ClassVar[Backend.Feature] = ( + Backend.Feature.ENTRY_POINT + | Backend.Feature.DEEP_EVAL + | Backend.Feature.NEIGHBOR_STAT + | Backend.Feature.IO + ) + """The features of the backend.""" + suffixes: ClassVar[list[str]] = [".pb"] + """The suffixes of the backend.""" + + def is_available(self) -> bool: + """Check if the backend is available. + + Returns + ------- + bool + Whether the backend is available. + """ + # deepmd.env imports expensive numpy + # avoid import outside the method + from deepmd.env import ( + GLOBAL_CONFIG, + ) + + return ( + find_spec("tensorflow") is not None + and GLOBAL_CONFIG["enable_tensorflow"] != "0" + ) + + @property + def entry_point_hook(self) -> Callable[["Namespace"], None]: + """The entry point hook of the backend. + + Returns + ------- + Callable[[Namespace], None] + The entry point hook of the backend. + """ + from deepmd.tf.entrypoints.main import main as deepmd_main + + return deepmd_main + + @property + def deep_eval(self) -> type["DeepEvalBackend"]: + """The Deep Eval backend of the backend. + + Returns + ------- + type[DeepEvalBackend] + The Deep Eval backend of the backend. + """ + from deepmd.tf.infer.deep_eval import DeepEval as DeepEvalTF + + return DeepEvalTF + + @property + def neighbor_stat(self) -> type["NeighborStat"]: + """The neighbor statistics of the backend. + + Returns + ------- + type[NeighborStat] + The neighbor statistics of the backend. + """ + from deepmd.tf.utils.neighbor_stat import ( + NeighborStat, + ) + + return NeighborStat + + @property + def serialize_hook(self) -> Callable[[str], dict]: + """The serialize hook to convert the model file to a dictionary. + + Returns + ------- + Callable[[str], dict] + The serialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + serialize_from_file, + ) + + return serialize_from_file + + @property + def deserialize_hook(self) -> Callable[[str, dict], None]: + """The deserialize hook to convert the dictionary to a model file. + + Returns + ------- + Callable[[str, dict], None] + The deserialize hook of the backend. + """ + from deepmd.tf.utils.serialization import ( + deserialize_to_file, + ) + + return deserialize_to_file diff --git a/deepmd/calculator.py b/deepmd/calculator.py index cd361eed5f..1d8e955de7 100644 --- a/deepmd/calculator.py +++ b/deepmd/calculator.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later """ASE calculator interface module.""" from pathlib import ( @@ -5,8 +6,8 @@ ) from typing import ( TYPE_CHECKING, - Dict, - List, + Any, + ClassVar, Optional, Union, ) @@ -17,14 +18,17 @@ all_changes, ) -from deepmd import ( - DeepPotential, +from deepmd.infer import ( + DeepPot, ) if TYPE_CHECKING: from ase import ( Atoms, ) + from ase.neighborlist import ( + NeighborList, + ) __all__ = ["DP"] @@ -32,7 +36,7 @@ class DP(Calculator): """Implementation of ASE deepmd calculator. - Implemented propertie are `energy`, `forces` and `stress` + Implemented properties are `energy`, `forces` and `stress` Parameters ---------- @@ -40,16 +44,20 @@ class DP(Calculator): path to the model label : str, optional calculator label, by default "DP" - type_dict : Dict[str, int], optional + type_dict : dict[str, int], optional mapping of element types and their numbers, best left None and the calculator will infer this information from model, by default None + neighbor_list : ase.neighborlist.NeighborList, optional + The neighbor list object. If None, then build the native neighbor list. + head : Union[str, None], optional + a specific model branch choosing from pretrained model, by default None Examples -------- Compute potential energy >>> from ase import Atoms - >>> from deepmd.calculator import DP + >>> from deepmd.tf.calculator import DP >>> water = Atoms('H2O', >>> positions=[(0.7601, 1.9270, 1), >>> (1.9575, 1, 1), @@ -68,40 +76,52 @@ class DP(Calculator): """ name = "DP" - implemented_properties = ["energy", "free_energy", "forces", "virial", "stress"] + implemented_properties: ClassVar[list[str]] = [ + "energy", + "free_energy", + "forces", + "virial", + "stress", + ] def __init__( self, model: Union[str, "Path"], label: str = "DP", - type_dict: Dict[str, int] = None, - **kwargs + type_dict: dict[str, int] | None = None, + neighbor_list: Optional["NeighborList"] = None, + head: str | None = None, + **kwargs: Any, ) -> None: Calculator.__init__(self, label=label, **kwargs) - self.dp = DeepPotential(str(Path(model).resolve())) + self.dp = DeepPot( + str(Path(model).resolve()), + neighbor_list=neighbor_list, + head=head, + ) if type_dict: self.type_dict = type_dict else: self.type_dict = dict( - zip(self.dp.get_type_map(), range(self.dp.get_ntypes())) + zip(self.dp.get_type_map(), range(self.dp.get_ntypes()), strict=True) ) def calculate( self, atoms: Optional["Atoms"] = None, - properties: List[str] = ["energy", "forces", "virial"], - system_changes: List[str] = all_changes, - ): + properties: list[str] = ["energy", "forces", "virial"], + system_changes: list[str] = all_changes, + ) -> None: """Run calculation with deepmd model. Parameters ---------- atoms : Optional[Atoms], optional atoms object to run the calculation on, by default None - properties : List[str], optional + properties : list[str], optional unused, only for function signature compatibility, by default ["energy", "forces", "stress"] - system_changes : List[str], optional + system_changes : list[str], optional unused, only for function signature compatibility, by default all_changes """ if atoms is not None: @@ -114,7 +134,12 @@ def calculate( cell = None symbols = self.atoms.get_chemical_symbols() atype = [self.type_dict[k] for k in symbols] - e, f, v = self.dp.eval(coords=coord, cells=cell, atom_types=atype) + + fparam = self.atoms.info.get("fparam", None) + aparam = self.atoms.info.get("aparam", None) + e, f, v = self.dp.eval( + coords=coord, cells=cell, atom_types=atype, fparam=fparam, aparam=aparam + )[:3] self.results["energy"] = e[0][0] # see https://gitlab.com/ase/ase/-/merge_requests/2485 self.results["free_energy"] = e[0][0] @@ -122,12 +147,13 @@ def calculate( self.results["virial"] = v[0].reshape(3, 3) # convert virial into stress for lattice relaxation - if "stress" in properties: - if sum(atoms.get_pbc()) > 0: - # the usual convention (tensile stress is positive) - # stress = -virial / volume - stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume() - # Voigt notation - self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]] - else: - raise PropertyNotImplementedError + if cell is not None: + # the usual convention (tensile stress is positive) + # stress = -virial / volume + stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume() + # Voigt notation + self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]] + elif "stress" in properties: + raise PropertyNotImplementedError + else: + pass diff --git a/deepmd/cluster/__init__.py b/deepmd/cluster/__init__.py deleted file mode 100644 index 4cba0c10b9..0000000000 --- a/deepmd/cluster/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Module that reads node resources, auto detects if running local or on SLURM.""" - -import os -from typing import ( - List, - Optional, - Tuple, -) - -from .local import get_resource as get_local_res -from .slurm import get_resource as get_slurm_res - -__all__ = ["get_resource"] - - -def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: - """Get local or slurm resources: nodename, nodelist, and gpus. - - Returns - ------- - Tuple[str, List[str], Optional[List[int]]] - nodename, nodelist, and gpus - """ - if "SLURM_JOB_NODELIST" in os.environ: - return get_slurm_res() - else: - return get_local_res() diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py deleted file mode 100644 index 2be6b438f2..0000000000 --- a/deepmd/cluster/slurm.py +++ /dev/null @@ -1,58 +0,0 @@ -"""MOdule to get resources on SLURM cluster. - -References ----------- -https://github.com/deepsense-ai/tensorflow_on_slurm #### -""" - -import os -from typing import ( - List, - Optional, - Tuple, -) - -import hostlist - -from deepmd.cluster import ( - local, -) - -__all__ = ["get_resource"] - - -def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: - """Get SLURM resources: nodename, nodelist, and gpus. - - Returns - ------- - Tuple[str, List[str], Optional[List[int]]] - nodename, nodelist, and gpus - - Raises - ------ - RuntimeError - if number of nodes could not be retrieved - ValueError - list of nodes is not of the same length sa number of nodes - ValueError - if current nodename is not found in node list - """ - nodelist = hostlist.expand_hostlist(os.environ["SLURM_JOB_NODELIST"]) - nodename = os.environ["SLURMD_NODENAME"] - num_nodes_env = os.getenv("SLURM_JOB_NUM_NODES") - if num_nodes_env: - num_nodes = int(num_nodes_env) - else: - raise RuntimeError("Could not get SLURM number of nodes") - - if len(nodelist) != num_nodes: - raise ValueError( - f"Number of slurm nodes {len(nodelist)} not equal to {num_nodes}" - ) - if nodename not in nodelist: - raise ValueError( - f"Nodename({nodename}) not in nodelist({nodelist}). This should not happen!" - ) - gpus = local.get_gpus() - return nodename, nodelist, gpus diff --git a/deepmd/common.py b/deepmd/common.py index d0afbf0784..98cf2461bd 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -1,9 +1,12 @@ -"""Collection of functions and classes used throughout the whole package.""" - +# SPDX-License-Identifier: LGPL-3.0-or-later +import glob import json +import os +import platform +import shutil import warnings -from functools import ( - wraps, +from hashlib import ( + sha1, ) from pathlib import ( Path, @@ -11,172 +14,61 @@ from typing import ( TYPE_CHECKING, Any, - Callable, - Dict, - List, - Optional, TypeVar, - Union, + get_args, ) +try: + from typing import Literal # python >=3.8 +except ImportError: + from typing import Literal # type: ignore + import numpy as np -import tensorflow import yaml -from tensorflow.python.framework import ( - tensor_util, -) from deepmd.env import ( GLOBAL_NP_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - op_module, - tf, ) from deepmd.utils.path import ( DPPath, ) +__all__ = [ + "GLOBAL_NP_FLOAT_PRECISION", + "VALID_ACTIVATION", + "VALID_PRECISION", + "expand_sys_str", + "get_np_precision", + "j_loader", + "make_default_mesh", + "select_idx_map", +] + +_PRECISION = Literal["default", "float16", "bfloat16", "float32", "float64"] +_ACTIVATION = Literal[ + "relu", + "relu6", + "softplus", + "sigmoid", + "tanh", + "gelu", + "gelu_tf", + "silu", + "silut", + "none", + "linear", +] +# get_args is new in py38 +VALID_PRECISION: set[_PRECISION] = set(get_args(_PRECISION)) +VALID_ACTIVATION: set[_ACTIVATION] = set(get_args(_ACTIVATION)) + if TYPE_CHECKING: _DICT_VAL = TypeVar("_DICT_VAL") - _OBJ = TypeVar("_OBJ") - try: - from typing import Literal # python >3.6 - except ImportError: - from typing_extensions import Literal # type: ignore - _ACTIVATION = Literal[ - "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf" + __all__ += [ + "_ACTIVATION", + "_DICT_VAL", + "_PRECISION", ] - _PRECISION = Literal["default", "float16", "float32", "float64"] - -# define constants -PRECISION_DICT = { - "default": GLOBAL_TF_FLOAT_PRECISION, - "float16": tf.float16, - "float32": tf.float32, - "float64": tf.float64, - "bfloat16": tf.bfloat16, -} - - -def gelu(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU, implemented by custom operator. - - Parameters - ---------- - x : tf.Tensor - float Tensor to perform activation - - Returns - ------- - tf.Tensor - `x` with the GELU activation applied - - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 - """ - return op_module.gelu_custom(x) - - -def gelu_tf(x: tf.Tensor) -> tf.Tensor: - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU, implemented by TF. - - Parameters - ---------- - x : tf.Tensor - float Tensor to perform activation - - Returns - ------- - tf.Tensor - `x` with the GELU activation applied - - References - ---------- - Original paper - https://arxiv.org/abs/1606.08415 - """ - - def gelu_wrapper(x): - try: - return tensorflow.nn.gelu(x, approximate=True) - except AttributeError: - warnings.warn( - "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator." - ) - return op_module.gelu_custom(x) - - return (lambda x: gelu_wrapper(x))(x) - - -# TODO this is not a good way to do things. This is some global variable to which -# TODO anyone can write and there is no good way to keep track of the changes -data_requirement = {} - -ACTIVATION_FN_DICT = { - "relu": tf.nn.relu, - "relu6": tf.nn.relu6, - "softplus": tf.nn.softplus, - "sigmoid": tf.sigmoid, - "tanh": tf.nn.tanh, - "gelu": gelu, - "gelu_tf": gelu_tf, - "None": None, - "none": None, -} - - -def add_data_requirement( - key: str, - ndof: int, - atomic: bool = False, - must: bool = False, - high_prec: bool = False, - type_sel: Optional[bool] = None, - repeat: int = 1, - default: float = 0.0, - dtype: Optional[np.dtype] = None, -): - """Specify data requirements for training. - - Parameters - ---------- - key : str - type of data stored in corresponding `*.npy` file e.g. `forces` or `energy` - ndof : int - number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces - have `atomic=True` and `ndof=3` - atomic : bool, optional - specifies whwther the `ndof` keyworrd applies to per atom quantity or not, - by default False - must : bool, optional - specifi if the `*.npy` data file must exist, by default False - high_prec : bool, optional - if true load data to `np.float64` else `np.float32`, by default False - type_sel : bool, optional - select only certain type of atoms, by default None - repeat : int, optional - if specify repaeat data `repeat` times, by default 1 - default : float, optional, default=0. - default value of data - dtype : np.dtype, optional - the dtype of data, overwrites `high_prec` if provided - """ - data_requirement[key] = { - "ndof": ndof, - "atomic": atomic, - "must": must, - "high_prec": high_prec, - "type_sel": type_sel, - "repeat": repeat, - "default": default, - "dtype": dtype, - } def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray: @@ -185,7 +77,7 @@ def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarr Parameters ---------- atom_types : np.ndarray - array specifing type for each atoms as integer + array specifying type for each atoms as integer select_types : np.ndarray types of atoms you want to find indices for @@ -205,38 +97,45 @@ def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarr return np.concatenate(idx_map) -# TODO not really sure if the docstring is right the purpose of this is a bit unclear -def make_default_mesh(test_box: np.ndarray, cell_size: float = 3.0) -> np.ndarray: - """Get number of cells of size=`cell_size` fit into average box. +def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray: + """Make mesh. + + Only the size of mesh matters, not the values: + * 6 for PBC, no mixed types + * 0 for no PBC, no mixed types + * 7 for PBC, mixed types + * 1 for no PBC, mixed types Parameters ---------- - test_box : np.ndarray - numpy array with cells of shape Nx9 - cell_size : float, optional - length of one cell, by default 3.0 + pbc : bool + if True, the mesh will be made for periodic boundary conditions + mixed_type : bool + if True, the mesh will be made for mixed types Returns ------- np.ndarray - mesh for supplied boxes, how many cells fit in each direction + mesh """ - cell_lengths = np.linalg.norm(test_box.reshape([-1, 3, 3]), axis=2) - avg_cell_lengths = np.average(cell_lengths, axis=0) - ncell = (avg_cell_lengths / cell_size).astype(np.int32) - ncell[ncell < 2] = 2 - default_mesh = np.zeros(6, dtype=np.int32) - default_mesh[3:6] = ncell + mesh_size = int(pbc) * 6 + int(mixed_type) + default_mesh = np.zeros(mesh_size, dtype=np.int32) return default_mesh -# TODO maybe rename this to j_deprecated and only warn about deprecated keys, -# TODO if the deprecated_key argument is left empty function puppose is only custom -# TODO error since dict[key] already raises KeyError when the key is missing -def j_must_have( - jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = [] +def j_deprecated( + jdata: dict[str, "_DICT_VAL"], key: str, deprecated_key: list[str] = [] ) -> "_DICT_VAL": - """Assert that supplied dictionary conaines specified key. + """Assert that supplied dictionary contains specified key. + + Parameters + ---------- + jdata : dict[str, _DICT_VAL] + dictionary to check + key : str + key to check + deprecated_key : list[str], optional + list of deprecated keys, by default [] Returns ------- @@ -259,7 +158,7 @@ def j_must_have( return jdata[key] -def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: +def j_loader(filename: str | Path) -> dict[str, Any]: """Load yaml or json settings file. Parameters @@ -269,7 +168,7 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: Returns ------- - Dict[str, Any] + dict[str, Any] loaded dictionary Raises @@ -288,75 +187,48 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: raise TypeError("config file must be json, or yaml/yml") -def get_activation_func( - activation_fn: Union["_ACTIVATION", None], -) -> Union[Callable[[tf.Tensor], tf.Tensor], None]: - """Get activation function callable based on string name. - - Parameters - ---------- - activation_fn : _ACTIVATION - one of the defined activation functions - - Returns - ------- - Callable[[tf.Tensor], tf.Tensor] - correspondingg TF callable - - Raises - ------ - RuntimeError - if unknown activation function is specified - """ - if activation_fn is None: - return None - if activation_fn not in ACTIVATION_FN_DICT: - raise RuntimeError(f"{activation_fn} is not a valid activation function") - return ACTIVATION_FN_DICT[activation_fn] - - -def get_precision(precision: "_PRECISION") -> Any: - """Convert str to TF DType constant. +def expand_sys_str(root_dir: str | Path) -> list[str]: + """Recursively iterate over directories taking those that contain `type.raw` file. Parameters ---------- - precision : _PRECISION - one of the allowed precisions + root_dir : Union[str, Path] + starting directory Returns ------- - tf.python.framework.dtypes.DType - appropriate TF constant - - Raises - ------ - RuntimeError - if supplied precision string does not have acorresponding TF constant + list[str] + list of string pointing to system directories """ - if precision not in PRECISION_DICT: - raise RuntimeError(f"{precision} is not a valid precision") - return PRECISION_DICT[precision] + root_dir = DPPath(root_dir) + matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] + if (root_dir / "type.raw").is_file(): + matches.append(str(root_dir)) + return matches -# TODO port completely to pathlib when all callers are ported -def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: +def rglob_sys_str(root_dir: str, patterns: list[str]) -> list[str]: """Recursively iterate over directories taking those that contain `type.raw` file. Parameters ---------- - root_dir : Union[str, Path] + root_dir : str, Path starting directory + patterns : list[str] + list of glob patterns to match directories Returns ------- - List[str] + list[str] list of string pointing to system directories """ - root_dir = DPPath(root_dir) - matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] - if (root_dir / "type.raw").is_file(): - matches.append(str(root_dir)) - return matches + root_dir = Path(root_dir) + matches = [] + for pattern in patterns: + matches.extend( + [str(d) for d in root_dir.rglob(pattern) if (d / "type.raw").is_file()] + ) + return list(set(matches)) # remove duplicates def get_np_precision(precision: "_PRECISION") -> np.dtype: @@ -370,116 +242,74 @@ def get_np_precision(precision: "_PRECISION") -> np.dtype: Returns ------- np.dtype - numpy presicion constant + numpy precision constant Raises ------ RuntimeError if string is invalid """ - if precision == "default": - return GLOBAL_NP_FLOAT_PRECISION - elif precision == "float16": - return np.float16 - elif precision == "float32": - return np.float32 - elif precision == "float64": - return np.float64 - else: - raise RuntimeError(f"{precision} is not a valid precision") + from deepmd.dpmodel.common import ( + get_xp_precision, + ) + + return get_xp_precision(np, precision) -def safe_cast_tensor( - input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType -) -> tf.Tensor: - """Convert a Tensor from a precision to another precision. +def symlink_prefix_files(old_prefix: str, new_prefix: str) -> None: + """Create symlinks from old checkpoint prefix to new one. - If input is not a Tensor or without the specific precision, the method will not - cast it. + On Windows this function will copy files instead of creating symlinks. Parameters ---------- - input : tf.Tensor - input tensor - from_precision : tf.DType - Tensor data type that is casted from - to_precision : tf.DType - Tensor data type that casts to - - Returns - ------- - tf.Tensor - casted Tensor + old_prefix : str + old checkpoint prefix, all files with this prefix will be symlinked + new_prefix : str + new checkpoint prefix """ - if tensor_util.is_tensor(input) and input.dtype == from_precision: - return tf.cast(input, to_precision) - return input - - -def cast_precision(func: Callable) -> Callable: - """A decorator that casts and casts back the input - and output tensor of a method. - - The decorator should be used in a classmethod. + original_files = glob.glob(old_prefix + ".*") + for ori_ff in original_files: + new_ff = new_prefix + ori_ff[len(old_prefix) :] + try: + # remove old one + os.remove(new_ff) + except OSError: + pass + if platform.system() != "Windows": + # by default one does not have access to create symlink on Windows + os.symlink(os.path.relpath(ori_ff, os.path.dirname(new_ff)), new_ff) + else: + shutil.copyfile(ori_ff, new_ff) - The decorator will do the following thing: - (1) It casts input Tensors from `GLOBAL_TF_FLOAT_PRECISION` - to precision defined by property `precision`. - (2) It casts output Tensors from `precision` to - `GLOBAL_TF_FLOAT_PRECISION`. - (3) It checks inputs and outputs and only casts when - input or output is a Tensor and its dtype matches - `GLOBAL_TF_FLOAT_PRECISION` and `precision`, respectively. - If it does not match (e.g. it is an integer), the decorator - will do nothing on it. - Returns - ------- - Callable - a decorator that casts and casts back the input and - output tensor of a method +def get_hash(obj: Any) -> str: + """Get hash of object. - Examples - -------- - >>> class A: - ... @property - ... def precision(self): - ... return tf.float32 - ... - ... @cast_precision - ... def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor: - ... return x ** 2 + y + Parameters + ---------- + obj + object to hash """ + return sha1(json.dumps(obj).encode("utf-8")).hexdigest() - @wraps(func) - def wrapper(self, *args, **kwargs): - # only convert tensors - returned_tensor = func( - self, - *[ - safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for vv in args - ], - **{ - kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) - for kk, vv in kwargs.items() - }, - ) - if isinstance(returned_tensor, tuple): - return tuple( - safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION) - for vv in returned_tensor - ) - else: - return safe_cast_tensor( - returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION - ) - return wrapper +def j_get_type(data: dict, class_name: str = "object") -> str: + """Get the type from the data. + Parameters + ---------- + data : dict + the data + class_name : str, optional + the name of the class for error message, by default "object" -def clear_session(): - """Reset all state generated by DeePMD-kit.""" - tf.reset_default_graph() - # TODO: remove this line when data_requirement is not a global variable - data_requirement.clear() + Returns + ------- + str + the type + """ + try: + return data["type"] + except KeyError as e: + raise KeyError(f"the type of the {class_name} should be set by `type`") from e diff --git a/deepmd/descriptor/__init__.py b/deepmd/descriptor/__init__.py deleted file mode 100644 index 3b55a4f97a..0000000000 --- a/deepmd/descriptor/__init__.py +++ /dev/null @@ -1,45 +0,0 @@ -from .descriptor import ( - Descriptor, -) -from .hybrid import ( - DescrptHybrid, -) -from .loc_frame import ( - DescrptLocFrame, -) -from .se_a import ( - DescrptSeA, -) -from .se_a_ebd import ( - DescrptSeAEbd, -) -from .se_a_ef import ( - DescrptSeAEf, - DescrptSeAEfLower, -) -from .se_a_mask import ( - DescrptSeAMask, -) -from .se_atten import ( - DescrptSeAtten, -) -from .se_r import ( - DescrptSeR, -) -from .se_t import ( - DescrptSeT, -) - -__all__ = [ - "Descriptor", - "DescrptHybrid", - "DescrptLocFrame", - "DescrptSeA", - "DescrptSeAEbd", - "DescrptSeAEf", - "DescrptSeAEfLower", - "DescrptSeAMask", - "DescrptSeAtten", - "DescrptSeR", - "DescrptSeT", -] diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py deleted file mode 100644 index 15f3cfaa4c..0000000000 --- a/deepmd/descriptor/descriptor.py +++ /dev/null @@ -1,523 +0,0 @@ -from abc import ( - abstractmethod, -) -from typing import ( - Any, - Dict, - List, - Optional, - Tuple, -) - -import numpy as np - -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - tf, -) -from deepmd.utils import ( - Plugin, - PluginVariant, -) - - -class Descriptor(PluginVariant): - r"""The abstract class for descriptors. All specific descriptors should - be based on this class. - - The descriptor :math:`\mathcal{D}` describes the environment of an atom, - which should be a function of coordinates and types of its neighbour atoms. - - Examples - -------- - >>> descript = Descriptor(type="se_e2_a", rcut=6., rcut_smth=0.5, sel=[50]) - >>> type(descript) - - - Notes - ----- - Only methods and attributes defined in this class are generally public, - that can be called by other classes. - """ - - __plugins = Plugin() - - @staticmethod - def register(key: str) -> "Descriptor": - """Regiester a descriptor plugin. - - Parameters - ---------- - key : str - the key of a descriptor - - Returns - ------- - Descriptor - the regiestered descriptor - - Examples - -------- - >>> @Descriptor.register("some_descrpt") - class SomeDescript(Descriptor): - pass - """ - return Descriptor.__plugins.register(key) - - def __new__(cls, *args, **kwargs): - if cls is Descriptor: - try: - descrpt_type = kwargs["type"] - except KeyError: - raise KeyError("the type of descriptor should be set by `type`") - if descrpt_type in Descriptor.__plugins.plugins: - cls = Descriptor.__plugins.plugins[descrpt_type] - else: - raise RuntimeError("Unknown descriptor type: " + descrpt_type) - return super().__new__(cls) - - @abstractmethod - def get_rcut(self) -> float: - """Returns the cut-off radius. - - Returns - ------- - float - the cut-off radius - - Notes - ----- - This method must be implemented, as it's called by other classes. - """ - - @abstractmethod - def get_ntypes(self) -> int: - """Returns the number of atom types. - - Returns - ------- - int - the number of atom types - - Notes - ----- - This method must be implemented, as it's called by other classes. - """ - - @abstractmethod - def get_dim_out(self) -> int: - """Returns the output dimension of this descriptor. - - Returns - ------- - int - the output dimension of this descriptor - - Notes - ----- - This method must be implemented, as it's called by other classes. - """ - - def get_dim_rot_mat_1(self) -> int: - """Returns the first dimension of the rotation matrix. The rotation is of shape - dim_1 x 3. - - Returns - ------- - int - the first dimension of the rotation matrix - """ - # TODO: I think this method should be implemented as it's called by dipole and - # polar fitting network. However, currently not all descriptors have this - # method. - raise NotImplementedError - - def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: - """Returns neighbor information. - - Returns - ------- - nlist : tf.Tensor - Neighbor list - rij : tf.Tensor - The relative distance between the neighbor and the center atom. - sel_a : list[int] - The number of neighbors with full information - sel_r : list[int] - The number of neighbors with only radial information - """ - # TODO: I think this method should be implemented as it's called by energy - # model. However, se_ar and hybrid doesn't have this method. - raise NotImplementedError - - @abstractmethod - def compute_input_stats( - self, - data_coord: List[np.ndarray], - data_box: List[np.ndarray], - data_atype: List[np.ndarray], - natoms_vec: List[np.ndarray], - mesh: List[np.ndarray], - input_dict: Dict[str, List[np.ndarray]], - ) -> None: - """Compute the statisitcs (avg and std) of the training data. The input will be - normalized by the statistics. - - Parameters - ---------- - data_coord : list[np.ndarray] - The coordinates. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` - data_box : list[np.ndarray] - The box. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` - data_atype : list[np.ndarray] - The atom types. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input` - natoms_vec : list[np.ndarray] - The vector for the number of atoms of the system and different types of - atoms. Can be generated by :meth:`deepmd.model.model_stat.make_stat_input` - mesh : list[np.ndarray] - The mesh for neighbor searching. Can be generated by - :meth:`deepmd.model.model_stat.make_stat_input` - input_dict : dict[str, list[np.ndarray]] - Dictionary for additional input - - Notes - ----- - This method must be implemented, as it's called by other classes. - """ - - @abstractmethod - def build( - self, - coord_: tf.Tensor, - atype_: tf.Tensor, - natoms: tf.Tensor, - box_: tf.Tensor, - mesh: tf.Tensor, - input_dict: Dict[str, Any], - reuse: Optional[bool] = None, - suffix: str = "", - ) -> tf.Tensor: - """Build the computational graph for the descriptor. - - Parameters - ---------- - coord_ : tf.Tensor - The coordinate of atoms - atype_ : tf.Tensor - The type of atoms - natoms : tf.Tensor - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - box_ : tf.Tensor - The box of frames - mesh : tf.Tensor - For historical reasons, only the length of the Tensor matters. - if size of mesh == 6, pbc is assumed. - if size of mesh == 0, no-pbc is assumed. - input_dict : dict[str, Any] - Dictionary for additional inputs - reuse : bool, optional - The weights in the networks should be reused when get the variable. - suffix : str, optional - Name suffix to identify this descriptor - - Returns - ------- - descriptor: tf.Tensor - The output descriptor - - Notes - ----- - This method must be implemented, as it's called by other classes. - """ - - def enable_compression( - self, - min_nbor_dist: float, - graph: tf.Graph, - graph_def: tf.GraphDef, - table_extrapolate: float = 5.0, - table_stride_1: float = 0.01, - table_stride_2: float = 0.1, - check_frequency: int = -1, - suffix: str = "", - ) -> None: - """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the - training data. - - Parameters - ---------- - min_nbor_dist : float - The nearest distance between atoms - graph : tf.Graph - The graph of the model - graph_def : tf.GraphDef - The graph definition of the model - table_extrapolate : float, default: 5. - The scale of model extrapolation - table_stride_1 : float, default: 0.01 - The uniform stride of the first table - table_stride_2 : float, default: 0.1 - The uniform stride of the second table - check_frequency : int, default: -1 - The overflow check frequency - suffix : str, optional - The suffix of the scope - - Notes - ----- - This method is called by others when the descriptor supported compression. - """ - raise NotImplementedError( - "Descriptor %s doesn't support compression!" % type(self).__name__ - ) - - def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None: - """Reveive the mixed precision setting. - - Parameters - ---------- - mixed_prec - The mixed precision setting used in the embedding net - - Notes - ----- - This method is called by others when the descriptor supported compression. - """ - raise NotImplementedError( - "Descriptor %s doesn't support mixed precision training!" - % type(self).__name__ - ) - - @abstractmethod - def prod_force_virial( - self, atom_ener: tf.Tensor, natoms: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - """Compute force and virial. - - Parameters - ---------- - atom_ener : tf.Tensor - The atomic energy - natoms : tf.Tensor - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - - Returns - ------- - force : tf.Tensor - The force on atoms - virial : tf.Tensor - The total virial - atom_virial : tf.Tensor - The atomic virial - """ - - def get_feed_dict( - self, - coord_: tf.Tensor, - atype_: tf.Tensor, - natoms: tf.Tensor, - box: tf.Tensor, - mesh: tf.Tensor, - ) -> Dict[str, tf.Tensor]: - """Generate the feed_dict for current descriptor. - - Parameters - ---------- - coord_ : tf.Tensor - The coordinate of atoms - atype_ : tf.Tensor - The type of atoms - natoms : tf.Tensor - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - box : tf.Tensor - The box. Can be generated by deepmd.model.make_stat_input - mesh : tf.Tensor - For historical reasons, only the length of the Tensor matters. - if size of mesh == 6, pbc is assumed. - if size of mesh == 0, no-pbc is assumed. - - Returns - ------- - feed_dict : dict[str, tf.Tensor] - The output feed_dict of current descriptor - """ - feed_dict = { - "t_coord:0": coord_, - "t_type:0": atype_, - "t_natoms:0": natoms, - "t_box:0": box, - "t_mesh:0": mesh, - } - return feed_dict - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - - Notes - ----- - This method is called by others when the descriptor supported initialization from the given variables. - """ - raise NotImplementedError( - "Descriptor %s doesn't support initialization from the given variables!" - % type(self).__name__ - ) - - def get_tensor_names(self, suffix: str = "") -> Tuple[str]: - """Get names of tensors. - - Parameters - ---------- - suffix : str - The suffix of the scope - - Returns - ------- - Tuple[str] - Names of tensors - """ - raise NotImplementedError( - "Descriptor %s doesn't support this property!" % type(self).__name__ - ) - - def pass_tensors_from_frz_model( - self, - *tensors: tf.Tensor, - ) -> None: - """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. - - Parameters - ---------- - *tensors : tf.Tensor - passed tensors - - Notes - ----- - The number of parameters in the method must be equal to the numbers of returns in - :meth:`get_tensor_names`. - """ - raise NotImplementedError( - "Descriptor %s doesn't support this method!" % type(self).__name__ - ) - - def build_type_exclude_mask( - self, - exclude_types: List[Tuple[int, int]], - ntypes: int, - sel: List[int], - ndescrpt: int, - atype: tf.Tensor, - shape0: tf.Tensor, - ) -> tf.Tensor: - r"""Build the type exclude mask for the descriptor. - - Notes - ----- - To exclude the interaction between two types, the derivative of energy with - respect to distances (or angles) between two atoms should be zero[1]_, i.e. - - .. math:: - \forall i \in \text{type 1}, j \in \text{type 2}, - \frac{\partial{E}}{\partial{r_{ij}}} = 0 - - When embedding networks between every two types are built, we can just remove - that network. But when `type_one_side` is enabled, a network may be built for - multiple pairs of types. In this case, we need to build a mask to exclude the - interaction between two types. - - The mask assumes the descriptors are sorted by neighbro type with the fixed - number of given `sel` and each neighbor has the same number of descriptors - (for example 4). - - Parameters - ---------- - exclude_types : List[Tuple[int, int]] - The list of excluded types, e.g. [(0, 1), (1, 0)] means the interaction - between type 0 and type 1 is excluded. - ntypes : int - The number of types. - sel : List[int] - The list of the number of selected neighbors for each type. - ndescrpt : int - The number of descriptors for each atom. - atype : tf.Tensor - The type of atoms, with the size of shape0. - shape0 : tf.Tensor - The shape of the first dimension of the inputs, which is equal to - nsamples * natoms. - - Returns - ------- - tf.Tensor - The type exclude mask, with the shape of (shape0, ndescrpt), and the - precision of GLOBAL_TF_FLOAT_PRECISION. The mask has the value of 1 if the - interaction between two types is not excluded, and 0 otherwise. - - References - ---------- - .. [1] Jinzhe Zeng, Timothy J. Giese, ̧Sölen Ekesan, Darrin M. York, - Development of Range-Corrected Deep Learning Potentials for Fast, - Accurate Quantum Mechanical/molecular Mechanical Simulations of - Chemical Reactions in Solution, J. Chem. Theory Comput., 2021, - 17 (11), 6993-7009. - """ - # generate a mask - type_mask = np.array( - [ - [ - 1 if (tt_i, tt_j) not in exclude_types else 0 - for tt_i in range(ntypes) - ] - for tt_j in range(ntypes) - ], - dtype=bool, - ) - type_mask = tf.convert_to_tensor(type_mask, dtype=GLOBAL_TF_FLOAT_PRECISION) - type_mask = tf.reshape(type_mask, [-1]) - - # (nsamples * natoms, 1) - atype_expand = tf.reshape(atype, [-1, 1]) - # (nsamples * natoms, ndescrpt) - idx_i = tf.tile(atype_expand * ntypes, (1, ndescrpt)) - ndescrpt_per_neighbor = ndescrpt // np.sum(sel) - # assume the number of neighbors for each type is the same - assert ndescrpt_per_neighbor * np.sum(sel) == ndescrpt - atype_descrpt = np.repeat( - np.arange(ntypes), np.array(sel) * ndescrpt_per_neighbor - ) - atype_descrpt = tf.convert_to_tensor(atype_descrpt, dtype=tf.int32) - # (1, ndescrpt) - atype_descrpt = tf.reshape(atype_descrpt, (1, ndescrpt)) - # (nsamples * natoms, ndescrpt) - idx_j = tf.tile(atype_descrpt, (shape0, 1)) - # the index to mask (row index * ntypes + col index) - idx = idx_i + idx_j - idx = tf.reshape(idx, [-1]) - mask = tf.nn.embedding_lookup(type_mask, idx) - # same as inputs_i, (nsamples * natoms, ndescrpt) - mask = tf.reshape(mask, [-1, ndescrpt]) - return mask diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py deleted file mode 100644 index 538ebdf168..0000000000 --- a/deepmd/descriptor/hybrid.py +++ /dev/null @@ -1,383 +0,0 @@ -from typing import ( - List, - Optional, - Tuple, -) - -import numpy as np - -from deepmd.env import ( - GLOBAL_TF_FLOAT_PRECISION, - tf, -) - -# from deepmd.descriptor import DescrptLocFrame -# from deepmd.descriptor import DescrptSeA -# from deepmd.descriptor import DescrptSeT -# from deepmd.descriptor import DescrptSeAEbd -# from deepmd.descriptor import DescrptSeAEf -# from deepmd.descriptor import DescrptSeR -from .descriptor import ( - Descriptor, -) - - -@Descriptor.register("hybrid") -class DescrptHybrid(Descriptor): - """Concate a list of descriptors to form a new descriptor. - - Parameters - ---------- - list : list - Build a descriptor from the concatenation of the list of descriptors. - """ - - def __init__(self, list: list, multi_task: bool = False) -> None: - """Constructor.""" - # warning: list is conflict with built-in list - descrpt_list = list - if descrpt_list == [] or descrpt_list is None: - raise RuntimeError( - "cannot build descriptor from an empty list of descriptors." - ) - formatted_descript_list = [] - self.multi_task = multi_task - for ii in descrpt_list: - if isinstance(ii, Descriptor): - formatted_descript_list.append(ii) - elif isinstance(ii, dict): - if multi_task: - ii["multi_task"] = True - formatted_descript_list.append(Descriptor(**ii)) - else: - raise NotImplementedError - self.descrpt_list = formatted_descript_list - self.numb_descrpt = len(self.descrpt_list) - for ii in range(1, self.numb_descrpt): - assert ( - self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes() - ), f"number of atom types in {ii}th descrptor does not match others" - - def get_rcut(self) -> float: - """Returns the cut-off radius.""" - all_rcut = [ii.get_rcut() for ii in self.descrpt_list] - return np.max(all_rcut) - - def get_ntypes(self) -> int: - """Returns the number of atom types.""" - return self.descrpt_list[0].get_ntypes() - - def get_dim_out(self) -> int: - """Returns the output dimension of this descriptor.""" - all_dim_out = [ii.get_dim_out() for ii in self.descrpt_list] - return sum(all_dim_out) - - def get_nlist( - self, - ) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: - """Get the neighbor information of the descriptor, returns the - nlist of the descriptor with the largest cut-off radius. - - Returns - ------- - nlist - Neighbor list - rij - The relative distance between the neighbor and the center atom. - sel_a - The number of neighbors with full information - sel_r - The number of neighbors with only radial information - """ - maxr_idx = np.argmax([ii.get_rcut() for ii in self.descrpt_list]) - return self.get_nlist_i(maxr_idx) - - def get_nlist_i(self, ii: int) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: - """Get the neighbor information of the ii-th descriptor. - - Parameters - ---------- - ii : int - The index of the descriptor - - Returns - ------- - nlist - Neighbor list - rij - The relative distance between the neighbor and the center atom. - sel_a - The number of neighbors with full information - sel_r - The number of neighbors with only radial information - """ - return ( - self.descrpt_list[ii].nlist, - self.descrpt_list[ii].rij, - self.descrpt_list[ii].sel_a, - self.descrpt_list[ii].sel_r, - ) - - def compute_input_stats( - self, - data_coord: list, - data_box: list, - data_atype: list, - natoms_vec: list, - mesh: list, - input_dict: dict, - ) -> None: - """Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics. - - Parameters - ---------- - data_coord - The coordinates. Can be generated by deepmd.model.make_stat_input - data_box - The box. Can be generated by deepmd.model.make_stat_input - data_atype - The atom types. Can be generated by deepmd.model.make_stat_input - natoms_vec - The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input - mesh - The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input - input_dict - Dictionary for additional input - """ - for ii in self.descrpt_list: - ii.compute_input_stats( - data_coord, data_box, data_atype, natoms_vec, mesh, input_dict - ) - - def merge_input_stats(self, stat_dict): - """Merge the statisitcs computed from compute_input_stats to obtain the self.davg and self.dstd. - - Parameters - ---------- - stat_dict - The dict of statisitcs computed from compute_input_stats, including: - sumr - The sum of radial statisitcs. - suma - The sum of relative coord statisitcs. - sumn - The sum of neighbor numbers. - sumr2 - The sum of square of radial statisitcs. - suma2 - The sum of square of relative coord statisitcs. - """ - for ii in self.descrpt_list: - ii.merge_input_stats(stat_dict) - - def build( - self, - coord_: tf.Tensor, - atype_: tf.Tensor, - natoms: tf.Tensor, - box_: tf.Tensor, - mesh: tf.Tensor, - input_dict: dict, - reuse: Optional[bool] = None, - suffix: str = "", - ) -> tf.Tensor: - """Build the computational graph for the descriptor. - - Parameters - ---------- - coord_ - The coordinate of atoms - atype_ - The type of atoms - natoms - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - box_ : tf.Tensor - The box of the system - mesh - For historical reasons, only the length of the Tensor matters. - if size of mesh == 6, pbc is assumed. - if size of mesh == 0, no-pbc is assumed. - input_dict - Dictionary for additional inputs - reuse - The weights in the networks should be reused when get the variable. - suffix - Name suffix to identify this descriptor - - Returns - ------- - descriptor - The output descriptor - """ - with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse): - t_rcut = tf.constant( - self.get_rcut(), name="rcut", dtype=GLOBAL_TF_FLOAT_PRECISION - ) - t_ntypes = tf.constant(self.get_ntypes(), name="ntypes", dtype=tf.int32) - all_dout = [] - for idx, ii in enumerate(self.descrpt_list): - dout = ii.build( - coord_, - atype_, - natoms, - box_, - mesh, - input_dict, - suffix=suffix + f"_{idx}", - reuse=reuse, - ) - dout = tf.reshape(dout, [-1, ii.get_dim_out()]) - all_dout.append(dout) - dout = tf.concat(all_dout, axis=1) - dout = tf.reshape(dout, [-1, natoms[0], self.get_dim_out()]) - return dout - - def prod_force_virial( - self, atom_ener: tf.Tensor, natoms: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - """Compute force and virial. - - Parameters - ---------- - atom_ener - The atomic energy - natoms - The number of atoms. This tensor has the length of Ntypes + 2 - natoms[0]: number of local atoms - natoms[1]: total number of atoms held by this processor - natoms[i]: 2 <= i < Ntypes+2, number of type i atoms - - Returns - ------- - force - The force on atoms - virial - The total virial - atom_virial - The atomic virial - """ - for idx, ii in enumerate(self.descrpt_list): - ff, vv, av = ii.prod_force_virial(atom_ener, natoms) - if idx == 0: - force = ff - virial = vv - atom_virial = av - else: - force += ff - virial += vv - atom_virial += av - return force, virial, atom_virial - - def enable_compression( - self, - min_nbor_dist: float, - graph: tf.Graph, - graph_def: tf.GraphDef, - table_extrapolate: float = 5.0, - table_stride_1: float = 0.01, - table_stride_2: float = 0.1, - check_frequency: int = -1, - suffix: str = "", - ) -> None: - """Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the - training data. - - Parameters - ---------- - min_nbor_dist : float - The nearest distance between atoms - graph : tf.Graph - The graph of the model - graph_def : tf.GraphDef - The graph_def of the model - table_extrapolate : float, default: 5. - The scale of model extrapolation - table_stride_1 : float, default: 0.01 - The uniform stride of the first table - table_stride_2 : float, default: 0.1 - The uniform stride of the second table - check_frequency : int, default: -1 - The overflow check frequency - suffix : str, optional - The suffix of the scope - """ - for idx, ii in enumerate(self.descrpt_list): - ii.enable_compression( - min_nbor_dist, - graph, - graph_def, - table_extrapolate, - table_stride_1, - table_stride_2, - check_frequency, - suffix=f"{suffix}_{idx}", - ) - - def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None: - """Reveive the mixed precision setting. - - Parameters - ---------- - mixed_prec - The mixed precision setting used in the embedding net - """ - for idx, ii in enumerate(self.descrpt_list): - ii.enable_mixed_precision(mixed_prec) - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - """ - for idx, ii in enumerate(self.descrpt_list): - ii.init_variables(graph, graph_def, suffix=f"{suffix}_{idx}") - - def get_tensor_names(self, suffix: str = "") -> Tuple[str]: - """Get names of tensors. - - Parameters - ---------- - suffix : str - The suffix of the scope - - Returns - ------- - Tuple[str] - Names of tensors - """ - tensor_names = [] - for idx, ii in enumerate(self.descrpt_list): - tensor_names.extend(ii.get_tensor_names(suffix=f"{suffix}_{idx}")) - return tuple(tensor_names) - - def pass_tensors_from_frz_model( - self, - *tensors: tf.Tensor, - ) -> None: - """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. - - Parameters - ---------- - *tensors : tf.Tensor - passed tensors - """ - jj = 0 - for ii in self.descrpt_list: - n_tensors = len(ii.get_tensor_names()) - ii.pass_tensors_from_frz_model(*tensors[jj : jj + n_tensors]) - jj += n_tensors diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py deleted file mode 100644 index 242ee5425d..0000000000 --- a/deepmd/descriptor/se.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import ( - Tuple, -) - -from deepmd.env import ( - tf, -) -from deepmd.utils.graph import ( - get_embedding_net_variables_from_graph_def, - get_tensor_by_name_from_graph, -) - -from .descriptor import ( - Descriptor, -) - - -class DescrptSe(Descriptor): - """A base class for smooth version of descriptors. - - Notes - ----- - All of these descriptors have an environmental matrix and an - embedding network (:meth:`deepmd.utils.network.embedding_net`), so - they can share some similiar methods without defining them twice. - - Attributes - ---------- - embedding_net_variables : dict - initial embedding network variables - descrpt_reshape : tf.Tensor - the reshaped descriptor - descrpt_deriv : tf.Tensor - the descriptor derivative - rij : tf.Tensor - distances between two atoms - nlist : tf.Tensor - the neighbor list - - """ - - def _identity_tensors(self, suffix: str = "") -> None: - """Identify tensors which are expected to be stored and restored. - - Notes - ----- - These tensors will be indentitied: - self.descrpt_reshape : o_rmat - self.descrpt_deriv : o_rmat_deriv - self.rij : o_rij - self.nlist : o_nlist - Thus, this method should be called during building the descriptor and - after these tensors are initialized. - - Parameters - ---------- - suffix : str - The suffix of the scope - """ - self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix) - self.descrpt_deriv = tf.identity( - self.descrpt_deriv, name="o_rmat_deriv" + suffix - ) - self.rij = tf.identity(self.rij, name="o_rij" + suffix) - self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix) - - def get_tensor_names(self, suffix: str = "") -> Tuple[str]: - """Get names of tensors. - - Parameters - ---------- - suffix : str - The suffix of the scope - - Returns - ------- - Tuple[str] - Names of tensors - """ - return ( - f"o_rmat{suffix}:0", - f"o_rmat_deriv{suffix}:0", - f"o_rij{suffix}:0", - f"o_nlist{suffix}:0", - ) - - def pass_tensors_from_frz_model( - self, - descrpt_reshape: tf.Tensor, - descrpt_deriv: tf.Tensor, - rij: tf.Tensor, - nlist: tf.Tensor, - ): - """Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def. - - Parameters - ---------- - descrpt_reshape - The passed descrpt_reshape tensor - descrpt_deriv - The passed descrpt_deriv tensor - rij - The passed rij tensor - nlist - The passed nlist tensor - """ - self.rij = rij - self.nlist = nlist - self.descrpt_deriv = descrpt_deriv - self.descrpt_reshape = descrpt_reshape - - def init_variables( - self, - graph: tf.Graph, - graph_def: tf.GraphDef, - suffix: str = "", - ) -> None: - """Init the embedding net variables with the given dict. - - Parameters - ---------- - graph : tf.Graph - The input frozen model graph - graph_def : tf.GraphDef - The input frozen model graph_def - suffix : str, optional - The suffix of the scope - """ - self.embedding_net_variables = get_embedding_net_variables_from_graph_def( - graph_def, suffix=suffix - ) - self.davg = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_avg" % suffix - ) - self.dstd = get_tensor_by_name_from_graph( - graph, "descrpt_attr%s/t_std" % suffix - ) - - @property - def precision(self) -> tf.DType: - """Precision of filter network.""" - return self.filter_precision diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py deleted file mode 100644 index 2bff523608..0000000000 --- a/deepmd/descriptor/se_a.py +++ /dev/null @@ -1,1161 +0,0 @@ -from typing import ( - List, - Optional, - Tuple, -) - -import numpy as np - -from deepmd.common import ( - cast_precision, - get_activation_func, - get_precision, -) -from deepmd.env import ( - GLOBAL_NP_FLOAT_PRECISION, - GLOBAL_TF_FLOAT_PRECISION, - default_tf_session_config, - op_module, - tf, -) -from deepmd.nvnmd.descriptor.se_a import ( - build_davg_dstd, - build_op_descriptor, - check_switch_range, - descrpt2r4, - filter_GR2D, - filter_lower_R42GR, -) -from deepmd.nvnmd.utils.config import ( - nvnmd_cfg, -) -from deepmd.utils.errors import ( - GraphWithoutTensorError, -) -from deepmd.utils.graph import ( - get_tensor_by_name_from_graph, -) -from deepmd.utils.network import ( - embedding_net, - embedding_net_rand_seed_shift, -) -from deepmd.utils.sess import ( - run_sess, -) -from deepmd.utils.tabulate import ( - DPTabulate, -) -from deepmd.utils.type_embed import ( - embed_atom_type, -) - -from .descriptor import ( - Descriptor, -) -from .se import ( - DescrptSe, -) - - -@Descriptor.register("se_e2_a") -@Descriptor.register("se_a") -class DescrptSeA(DescrptSe): - r"""DeepPot-SE constructed from all information (both angular and radial) of - atomic configurations. The embedding takes the distance between atoms as input. - - The descriptor :math:`\mathcal{D}^i \in \mathcal{R}^{M_1 \times M_2}` is given by [1]_ - - .. math:: - \mathcal{D}^i = (\mathcal{G}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \mathcal{G}^i_< - - where :math:`\mathcal{R}^i \in \mathbb{R}^{N \times 4}` is the coordinate - matrix, and each row of :math:`\mathcal{R}^i` can be constructed as follows - - .. math:: - (\mathcal{R}^i)_j = [ - \begin{array}{c} - s(r_{ji}) & \frac{s(r_{ji})x_{ji}}{r_{ji}} & \frac{s(r_{ji})y_{ji}}{r_{ji}} & \frac{s(r_{ji})z_{ji}}{r_{ji}} - \end{array} - ] - - where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is - the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm. - The switching function :math:`s(r)` is defined as: - - .. math:: - s(r)= - \begin{cases} - \frac{1}{r}, & r