Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions deepmd/entrypoints/eval_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
"""Evaluate descriptors using trained DeePMD model."""

import logging
import os
from pathlib import (
Path,
)
from typing import (
Optional,
)

import numpy as np

from deepmd.common import (
expand_sys_str,
)
from deepmd.infer.deep_eval import (
DeepEval,
)
from deepmd.utils.data import (
DeepmdData,
)

__all__ = ["eval_desc"]

log = logging.getLogger(__name__)


def eval_desc(
*,
model: str,
system: str,
datafile: str,
output: str = "desc",
head: Optional[str] = None,
**kwargs,
) -> None:
"""Evaluate descriptors for given systems.

Parameters
----------
model : str
path where model is stored
system : str
system directory
datafile : str
the path to the list of systems to process
output : str
output directory for descriptor files
head : Optional[str], optional
(Supported backend: PyTorch) Task head if in multi-task mode.
**kwargs
additional arguments

Notes
-----
Descriptors are saved as 3D numpy arrays with shape (nframes, natoms, ndesc)
where each frame contains the descriptors for all atoms.

Raises
------
RuntimeError
if no valid system was found
"""
if datafile is not None:
with open(datafile) as datalist:
all_sys = datalist.read().splitlines()
else:
all_sys = expand_sys_str(system)

if len(all_sys) == 0:
raise RuntimeError("Did not find valid system")

# init model
dp = DeepEval(model, head=head)

# create output directory
output_dir = Path(output)
output_dir.mkdir(parents=True, exist_ok=True)

for cc, system_path in enumerate(all_sys):
log.info("# -------output of dp eval_desc------- ")
log.info(f"# processing system : {system_path}")

# create data class
tmap = dp.get_type_map()
data = DeepmdData(
system_path,
set_prefix="set",
shuffle_test=False,
type_map=tmap,
sort_atoms=False,
)

# get test data
test_data = data.get_test()
mixed_type = data.mixed_type
natoms = len(test_data["type"][0])

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable natoms is not used.

Copilot Autofix

AI 8 months ago

To fix the issue, the line assigning to natoms should be deleted, since the value is not used. Removing this line will clean up the code without affecting any downstream logic, as all necessary information comes from test_data and related computations. Ensure that no references to natoms remain, and that deleting this line does not affect any required initialization or program side effects.

Suggested changeset 1
deepmd/entrypoints/eval_desc.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/deepmd/entrypoints/eval_desc.py b/deepmd/entrypoints/eval_desc.py
--- a/deepmd/entrypoints/eval_desc.py
+++ b/deepmd/entrypoints/eval_desc.py
@@ -96,7 +96,6 @@
         # get test data
         test_data = data.get_test()
         mixed_type = data.mixed_type
-        natoms = len(test_data["type"][0])
         nframes = test_data["box"].shape[0]
 
         # prepare input data
EOF
@@ -96,7 +96,6 @@
# get test data
test_data = data.get_test()
mixed_type = data.mixed_type
natoms = len(test_data["type"][0])
nframes = test_data["box"].shape[0]

# prepare input data
Copilot is powered by AI and may make mistakes. Always verify output.
nframes = test_data["box"].shape[0]

# prepare input data
coord = test_data["coord"].reshape([nframes, -1])
box = test_data["box"]
if not data.pbc:
box = None
if mixed_type:
atype = test_data["type"].reshape([nframes, -1])
else:
atype = test_data["type"][0]

# handle optional parameters
fparam = None
if dp.get_dim_fparam() > 0:
if "fparam" in test_data:
fparam = test_data["fparam"]

aparam = None
if dp.get_dim_aparam() > 0:
if "aparam" in test_data:
aparam = test_data["aparam"]

# evaluate descriptors
log.info(f"# evaluating descriptors for {nframes} frames")
descriptors = dp.eval_descriptor(
coord,
box,
atype,
fparam=fparam,
aparam=aparam,
)

# descriptors are kept in 3D format (nframes, natoms, ndesc)

# save descriptors
system_name = os.path.basename(system_path.rstrip("/"))
desc_file = output_dir / f"{system_name}.npy"
np.save(desc_file, descriptors)

log.info(f"# descriptors saved to {desc_file}")
log.info(f"# descriptor shape: {descriptors.shape}")
log.info("# ----------------------------------- ")

log.info("# eval_desc completed successfully")
11 changes: 11 additions & 0 deletions deepmd/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
from deepmd.entrypoints.doc import (
doc_train_input,
)
from deepmd.entrypoints.eval_desc import (
eval_desc,
)
from deepmd.entrypoints.gui import (
start_dpgui,
)
Expand Down Expand Up @@ -65,6 +68,14 @@ def main(args: argparse.Namespace) -> None:
strict_prefer=False,
)
test(**dict_args)
elif args.command == "eval-desc":
dict_args["model"] = format_model_suffix(
dict_args["model"],
feature=Backend.Feature.DEEP_EVAL,
preferred_backend=args.backend,
strict_prefer=False,
)
eval_desc(**dict_args)
elif args.command == "doc-train-input":
doc_train_input(**dict_args)
elif args.command == "model-devi":
Expand Down
51 changes: 51 additions & 0 deletions deepmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,56 @@ def main_parser() -> argparse.ArgumentParser:
help="(Supported backend: PyTorch) Task head (alias: model branch) to test if in multi-task mode.",
)

# * eval_desc script ***************************************************************
parser_eval_desc = subparsers.add_parser(
"eval-desc",
parents=[parser_log],
help="evaluate descriptors using the model",
formatter_class=RawTextArgumentDefaultsHelpFormatter,
epilog=textwrap.dedent(
"""\
examples:
dp eval-desc -m graph.pb -s /path/to/system -o desc
"""
),
)
parser_eval_desc.add_argument(
"-m",
"--model",
default="frozen_model",
type=str,
help="Frozen model file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.",
)
parser_eval_desc_subgroup = parser_eval_desc.add_mutually_exclusive_group()
parser_eval_desc_subgroup.add_argument(
"-s",
"--system",
default=".",
type=str,
help="The system dir. Recursively detect systems in this directory",
)
parser_eval_desc_subgroup.add_argument(
"-f",
"--datafile",
default=None,
type=str,
help="The path to the datafile, each line of which is a path to one data system.",
)
parser_eval_desc.add_argument(
"-o",
"--output",
default="desc",
type=str,
help="Output directory for descriptor files. Descriptors will be saved as desc/(system_name).npy",
)
parser_eval_desc.add_argument(
"--head",
"--model-branch",
default=None,
type=str,
help="(Supported backend: PyTorch) Task head (alias: model branch) to use if in multi-task mode.",
)

# * compress model *****************************************************************
# Compress a model, which including tabulating the embedding-net.
# The table is composed of fifth-order polynomial coefficients and is assembled
Expand Down Expand Up @@ -909,6 +959,7 @@ def main(args: Optional[list[str]] = None) -> None:

if args.command in (
"test",
"eval-desc",
"doc-train-input",
"model-devi",
"neighbor-stat",
Expand Down
15 changes: 15 additions & 0 deletions doc/inference/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ e, f, v = dp.eval(coord, cell, atype)

where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.

One can also evaluate the descriptors of the model:

```python
from deepmd.infer import DeepPot
import numpy as np

dp = DeepPot("graph.pb")
coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
cell = np.diag(10 * np.ones(3)).reshape([1, -1])
atype = [1, 0, 1]
descriptors = dp.eval_descriptor(coord, cell, atype)
```

where `descriptors` is the descriptor matrix of the system. This can also be done using the command line interface `dp eval-desc` as described in the [test documentation](../test/test.md).

Furthermore, one can use the python interface to calculate model deviation.

```python
Expand Down
22 changes: 22 additions & 0 deletions doc/test/test.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,25 @@ An explanation will be provided
```{program-output} dp test -h

```

## Evaluate descriptors

The descriptors of a model can be evaluated and saved using `dp eval-desc`. A typical usage of `dp eval-desc` is

```bash
dp eval-desc -m graph.pb -s /path/to/system -o desc
```

where `-m` gives the model file, `-s` the path to the system directory (or `-f` for a datafile containing paths to systems), and `-o` the output directory where descriptor files will be saved. The descriptors for each system will be saved as `.npy` files with the format `desc/(system_name).npy`. Each descriptor file contains a 3D array with shape (nframes, natoms, ndesc).

Several other command line options can be passed to `dp eval-desc`, which can be checked with

```bash
$ dp eval-desc --help
```

An explanation will be provided

```{program-output} dp eval-desc -h

```
101 changes: 101 additions & 0 deletions source/tests/pt/test_eval_desc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import json
import os
import shutil
import tempfile
import unittest
from copy import (
deepcopy,
)
from pathlib import (
Path,
)

import numpy as np
import torch

from deepmd.entrypoints.eval_desc import (
eval_desc,
)
from deepmd.pt.entrypoints.main import (
get_trainer,
)

from .model.test_permutation import (
model_se_e2_a,
)


class DPEvalDesc:
def test_dp_eval_desc_1_frame(self) -> None:
trainer = get_trainer(deepcopy(self.config))
with torch.device("cpu"):
input_dict, label_dict, _ = trainer.get_data(is_train=False)
has_spin = getattr(trainer.model, "has_spin", False)
if callable(has_spin):
has_spin = has_spin()
if not has_spin:
input_dict.pop("spin", None)
input_dict["do_atomic_virial"] = True
result = trainer.model(**input_dict)

Check notice

Code scanning / CodeQL

Unused local variable Note test

Variable result is not used.

Copilot Autofix

AI 8 months ago

To fix the problem, we should remove the assignment to the unused variable result on line 40. This means replacing result = trainer.model(**input_dict) with simply trainer.model(**input_dict), ensuring that the function call (and any side effects it might have) still occurs, but its result is not assigned to a variable. Only this line needs to change in source/tests/pt/test_eval_desc.py, within the test_dp_eval_desc_1_frame method of the DPEvalDesc class.

Suggested changeset 1
source/tests/pt/test_eval_desc.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/source/tests/pt/test_eval_desc.py b/source/tests/pt/test_eval_desc.py
--- a/source/tests/pt/test_eval_desc.py
+++ b/source/tests/pt/test_eval_desc.py
@@ -37,7 +37,7 @@
         if not has_spin:
             input_dict.pop("spin", None)
         input_dict["do_atomic_virial"] = True
-        result = trainer.model(**input_dict)
+        trainer.model(**input_dict)
         model = torch.jit.script(trainer.model)
         tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
         torch.jit.save(model, tmp_model.name)
EOF
@@ -37,7 +37,7 @@
if not has_spin:
input_dict.pop("spin", None)
input_dict["do_atomic_virial"] = True
result = trainer.model(**input_dict)
trainer.model(**input_dict)
model = torch.jit.script(trainer.model)
tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
torch.jit.save(model, tmp_model.name)
Copilot is powered by AI and may make mistakes. Always verify output.
model = torch.jit.script(trainer.model)
tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth")
torch.jit.save(model, tmp_model.name)

# Test eval_desc
eval_desc(
model=tmp_model.name,
system=self.config["training"]["validation_data"]["systems"][0],
datafile=None,
output=self.output_dir,
)
os.unlink(tmp_model.name)

# Check that descriptor file was created
system_name = os.path.basename(
self.config["training"]["validation_data"]["systems"][0].rstrip("/")
)
desc_file = os.path.join(self.output_dir, f"{system_name}.npy")
self.assertTrue(os.path.exists(desc_file))

# Load and validate descriptor
descriptors = np.load(desc_file)
self.assertIsInstance(descriptors, np.ndarray)
# Descriptors should be 3D: (nframes, natoms, ndesc)
self.assertEqual(len(descriptors.shape), 3) # Should be 3D array
self.assertGreater(descriptors.shape[0], 0) # Should have frames
self.assertGreater(descriptors.shape[1], 0) # Should have atoms
self.assertGreater(descriptors.shape[2], 0) # Should have descriptor dimensions

def tearDown(self) -> None:
for f in os.listdir("."):
if f.startswith("model") and f.endswith(".pt"):
os.remove(f)
if f in ["lcurve.out", self.input_json]:
os.remove(f)
if f in ["stat_files"]:
shutil.rmtree(f)
# Clean up output directory
if hasattr(self, "output_dir") and os.path.exists(self.output_dir):
shutil.rmtree(self.output_dir)


class TestDPEvalDescSeA(DPEvalDesc, unittest.TestCase):
def setUp(self) -> None:
self.output_dir = "test_eval_desc_output"
input_json = str(Path(__file__).parent / "water" / "se_atten.json")
with open(input_json) as f:
self.config = json.load(f)
self.config["training"]["numb_steps"] = 1
self.config["training"]["save_freq"] = 1
data_file = [str(Path(__file__).parent / "water" / "data" / "single")]
self.config["training"]["training_data"]["systems"] = data_file
self.config["training"]["validation_data"]["systems"] = data_file
self.config["model"] = deepcopy(model_se_e2_a)
self.input_json = "test_eval_desc.json"
with open(self.input_json, "w") as fp:
json.dump(self.config, fp, indent=4)


if __name__ == "__main__":
unittest.main()