diff --git a/deepmd/entrypoints/eval_desc.py b/deepmd/entrypoints/eval_desc.py new file mode 100644 index 0000000000..3d9de7142a --- /dev/null +++ b/deepmd/entrypoints/eval_desc.py @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Evaluate descriptors using trained DeePMD model.""" + +import logging +import os +from pathlib import ( + Path, +) +from typing import ( + Optional, +) + +import numpy as np + +from deepmd.common import ( + expand_sys_str, +) +from deepmd.infer.deep_eval import ( + DeepEval, +) +from deepmd.utils.data import ( + DeepmdData, +) + +__all__ = ["eval_desc"] + +log = logging.getLogger(__name__) + + +def eval_desc( + *, + model: str, + system: str, + datafile: str, + output: str = "desc", + head: Optional[str] = None, + **kwargs, +) -> None: + """Evaluate descriptors for given systems. + + Parameters + ---------- + model : str + path where model is stored + system : str + system directory + datafile : str + the path to the list of systems to process + output : str + output directory for descriptor files + head : Optional[str], optional + (Supported backend: PyTorch) Task head if in multi-task mode. + **kwargs + additional arguments + + Notes + ----- + Descriptors are saved as 3D numpy arrays with shape (nframes, natoms, ndesc) + where each frame contains the descriptors for all atoms. + + Raises + ------ + RuntimeError + if no valid system was found + """ + if datafile is not None: + with open(datafile) as datalist: + all_sys = datalist.read().splitlines() + else: + all_sys = expand_sys_str(system) + + if len(all_sys) == 0: + raise RuntimeError("Did not find valid system") + + # init model + dp = DeepEval(model, head=head) + + # create output directory + output_dir = Path(output) + output_dir.mkdir(parents=True, exist_ok=True) + + for cc, system_path in enumerate(all_sys): + log.info("# -------output of dp eval_desc------- ") + log.info(f"# processing system : {system_path}") + + # create data class + tmap = dp.get_type_map() + data = DeepmdData( + system_path, + set_prefix="set", + shuffle_test=False, + type_map=tmap, + sort_atoms=False, + ) + + # get test data + test_data = data.get_test() + mixed_type = data.mixed_type + natoms = len(test_data["type"][0]) + nframes = test_data["box"].shape[0] + + # prepare input data + coord = test_data["coord"].reshape([nframes, -1]) + box = test_data["box"] + if not data.pbc: + box = None + if mixed_type: + atype = test_data["type"].reshape([nframes, -1]) + else: + atype = test_data["type"][0] + + # handle optional parameters + fparam = None + if dp.get_dim_fparam() > 0: + if "fparam" in test_data: + fparam = test_data["fparam"] + + aparam = None + if dp.get_dim_aparam() > 0: + if "aparam" in test_data: + aparam = test_data["aparam"] + + # evaluate descriptors + log.info(f"# evaluating descriptors for {nframes} frames") + descriptors = dp.eval_descriptor( + coord, + box, + atype, + fparam=fparam, + aparam=aparam, + ) + + # descriptors are kept in 3D format (nframes, natoms, ndesc) + + # save descriptors + system_name = os.path.basename(system_path.rstrip("/")) + desc_file = output_dir / f"{system_name}.npy" + np.save(desc_file, descriptors) + + log.info(f"# descriptors saved to {desc_file}") + log.info(f"# descriptor shape: {descriptors.shape}") + log.info("# ----------------------------------- ") + + log.info("# eval_desc completed successfully") diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 2c91ca5f29..34ebe4d2e3 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -18,6 +18,9 @@ from deepmd.entrypoints.doc import ( doc_train_input, ) +from deepmd.entrypoints.eval_desc import ( + eval_desc, +) from deepmd.entrypoints.gui import ( start_dpgui, ) @@ -65,6 +68,14 @@ def main(args: argparse.Namespace) -> None: strict_prefer=False, ) test(**dict_args) + elif args.command == "eval-desc": + dict_args["model"] = format_model_suffix( + dict_args["model"], + feature=Backend.Feature.DEEP_EVAL, + preferred_backend=args.backend, + strict_prefer=False, + ) + eval_desc(**dict_args) elif args.command == "doc-train-input": doc_train_input(**dict_args) elif args.command == "model-devi": diff --git a/deepmd/main.py b/deepmd/main.py index 2db781b201..84aef14813 100644 --- a/deepmd/main.py +++ b/deepmd/main.py @@ -416,6 +416,56 @@ def main_parser() -> argparse.ArgumentParser: help="(Supported backend: PyTorch) Task head (alias: model branch) to test if in multi-task mode.", ) + # * eval_desc script *************************************************************** + parser_eval_desc = subparsers.add_parser( + "eval-desc", + parents=[parser_log], + help="evaluate descriptors using the model", + formatter_class=RawTextArgumentDefaultsHelpFormatter, + epilog=textwrap.dedent( + """\ + examples: + dp eval-desc -m graph.pb -s /path/to/system -o desc + """ + ), + ) + parser_eval_desc.add_argument( + "-m", + "--model", + default="frozen_model", + type=str, + help="Frozen model file (prefix) to import. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth.", + ) + parser_eval_desc_subgroup = parser_eval_desc.add_mutually_exclusive_group() + parser_eval_desc_subgroup.add_argument( + "-s", + "--system", + default=".", + type=str, + help="The system dir. Recursively detect systems in this directory", + ) + parser_eval_desc_subgroup.add_argument( + "-f", + "--datafile", + default=None, + type=str, + help="The path to the datafile, each line of which is a path to one data system.", + ) + parser_eval_desc.add_argument( + "-o", + "--output", + default="desc", + type=str, + help="Output directory for descriptor files. Descriptors will be saved as desc/(system_name).npy", + ) + parser_eval_desc.add_argument( + "--head", + "--model-branch", + default=None, + type=str, + help="(Supported backend: PyTorch) Task head (alias: model branch) to use if in multi-task mode.", + ) + # * compress model ***************************************************************** # Compress a model, which including tabulating the embedding-net. # The table is composed of fifth-order polynomial coefficients and is assembled @@ -909,6 +959,7 @@ def main(args: Optional[list[str]] = None) -> None: if args.command in ( "test", + "eval-desc", "doc-train-input", "model-devi", "neighbor-stat", diff --git a/doc/inference/python.md b/doc/inference/python.md index b2603c85f8..361db7b64f 100644 --- a/doc/inference/python.md +++ b/doc/inference/python.md @@ -19,6 +19,21 @@ e, f, v = dp.eval(coord, cell, atype) where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively. +One can also evaluate the descriptors of the model: + +```python +from deepmd.infer import DeepPot +import numpy as np + +dp = DeepPot("graph.pb") +coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1]) +cell = np.diag(10 * np.ones(3)).reshape([1, -1]) +atype = [1, 0, 1] +descriptors = dp.eval_descriptor(coord, cell, atype) +``` + +where `descriptors` is the descriptor matrix of the system. This can also be done using the command line interface `dp eval-desc` as described in the [test documentation](../test/test.md). + Furthermore, one can use the python interface to calculate model deviation. ```python diff --git a/doc/test/test.md b/doc/test/test.md index dfd59d8f1f..9d399cb1ed 100644 --- a/doc/test/test.md +++ b/doc/test/test.md @@ -17,3 +17,25 @@ An explanation will be provided ```{program-output} dp test -h ``` + +## Evaluate descriptors + +The descriptors of a model can be evaluated and saved using `dp eval-desc`. A typical usage of `dp eval-desc` is + +```bash +dp eval-desc -m graph.pb -s /path/to/system -o desc +``` + +where `-m` gives the model file, `-s` the path to the system directory (or `-f` for a datafile containing paths to systems), and `-o` the output directory where descriptor files will be saved. The descriptors for each system will be saved as `.npy` files with the format `desc/(system_name).npy`. Each descriptor file contains a 3D array with shape (nframes, natoms, ndesc). + +Several other command line options can be passed to `dp eval-desc`, which can be checked with + +```bash +$ dp eval-desc --help +``` + +An explanation will be provided + +```{program-output} dp eval-desc -h + +``` diff --git a/source/tests/pt/test_eval_desc.py b/source/tests/pt/test_eval_desc.py new file mode 100644 index 0000000000..ff79a0a376 --- /dev/null +++ b/source/tests/pt/test_eval_desc.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import json +import os +import shutil +import tempfile +import unittest +from copy import ( + deepcopy, +) +from pathlib import ( + Path, +) + +import numpy as np +import torch + +from deepmd.entrypoints.eval_desc import ( + eval_desc, +) +from deepmd.pt.entrypoints.main import ( + get_trainer, +) + +from .model.test_permutation import ( + model_se_e2_a, +) + + +class DPEvalDesc: + def test_dp_eval_desc_1_frame(self) -> None: + trainer = get_trainer(deepcopy(self.config)) + with torch.device("cpu"): + input_dict, label_dict, _ = trainer.get_data(is_train=False) + has_spin = getattr(trainer.model, "has_spin", False) + if callable(has_spin): + has_spin = has_spin() + if not has_spin: + input_dict.pop("spin", None) + input_dict["do_atomic_virial"] = True + result = trainer.model(**input_dict) + model = torch.jit.script(trainer.model) + tmp_model = tempfile.NamedTemporaryFile(delete=False, suffix=".pth") + torch.jit.save(model, tmp_model.name) + + # Test eval_desc + eval_desc( + model=tmp_model.name, + system=self.config["training"]["validation_data"]["systems"][0], + datafile=None, + output=self.output_dir, + ) + os.unlink(tmp_model.name) + + # Check that descriptor file was created + system_name = os.path.basename( + self.config["training"]["validation_data"]["systems"][0].rstrip("/") + ) + desc_file = os.path.join(self.output_dir, f"{system_name}.npy") + self.assertTrue(os.path.exists(desc_file)) + + # Load and validate descriptor + descriptors = np.load(desc_file) + self.assertIsInstance(descriptors, np.ndarray) + # Descriptors should be 3D: (nframes, natoms, ndesc) + self.assertEqual(len(descriptors.shape), 3) # Should be 3D array + self.assertGreater(descriptors.shape[0], 0) # Should have frames + self.assertGreater(descriptors.shape[1], 0) # Should have atoms + self.assertGreater(descriptors.shape[2], 0) # Should have descriptor dimensions + + def tearDown(self) -> None: + for f in os.listdir("."): + if f.startswith("model") and f.endswith(".pt"): + os.remove(f) + if f in ["lcurve.out", self.input_json]: + os.remove(f) + if f in ["stat_files"]: + shutil.rmtree(f) + # Clean up output directory + if hasattr(self, "output_dir") and os.path.exists(self.output_dir): + shutil.rmtree(self.output_dir) + + +class TestDPEvalDescSeA(DPEvalDesc, unittest.TestCase): + def setUp(self) -> None: + self.output_dir = "test_eval_desc_output" + input_json = str(Path(__file__).parent / "water" / "se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + data_file = [str(Path(__file__).parent / "water" / "data" / "single")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_se_e2_a) + self.input_json = "test_eval_desc.json" + with open(self.input_json, "w") as fp: + json.dump(self.config, fp, indent=4) + + +if __name__ == "__main__": + unittest.main()