diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py index 597f8ea006..fb77838b4c 100644 --- a/deepmd/dpmodel/model/make_model.py +++ b/deepmd/dpmodel/model/make_model.py @@ -614,7 +614,13 @@ def _format_nlist( axis=-1, ) - if n_nnei > nnei or extra_nlist_sort: + # Order matters for torch.export: Python evaluates `or` left-to-right + # with short-circuit. When `extra_nlist_sort=True` (Python bool) is + # on the left, the right-hand `n_nnei > nnei` is not evaluated, so no + # symbolic guard is registered on the dynamic `n_nnei` dimension. + # Swapping the operands would force the SymInt comparison to run and + # emit an `_assert_scalar` node in the exported graph. + if extra_nlist_sort or n_nnei > nnei: n_nf, n_nloc, n_nnei = nlist.shape # make a copy before revise m_real_nei = nlist >= 0 diff --git a/deepmd/entrypoints/convert_backend.py b/deepmd/entrypoints/convert_backend.py index a8cf20c6b3..e5cd51b386 100644 --- a/deepmd/entrypoints/convert_backend.py +++ b/deepmd/entrypoints/convert_backend.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import logging from typing import ( Any, ) @@ -7,11 +8,14 @@ Backend, ) +log = logging.getLogger(__name__) + def convert_backend( *, # Enforce keyword-only arguments INPUT: str, OUTPUT: str, + atomic_virial: bool = False, **kwargs: Any, ) -> None: """Convert a model file from one backend to another. @@ -20,12 +24,31 @@ def convert_backend( ---------- INPUT : str The input model file. - INPUT : str + OUTPUT : str The output model file. + atomic_virial : bool + If True, export .pt2/.pte models with per-atom virial correction. + This adds ~2.5x inference cost. Default False. Silently ignored + (with a warning) for backends that don't support the flag. """ inp_backend: Backend = Backend.detect_backend_by_model(INPUT)() out_backend: Backend = Backend.detect_backend_by_model(OUTPUT)() inp_hook = inp_backend.serialize_hook out_hook = out_backend.deserialize_hook data = inp_hook(INPUT) - out_hook(OUTPUT, data) + # Forward atomic_virial to pt_expt deserialize_to_file if applicable; + # warn and skip the flag for backends that don't accept it so that + # scripts passing --atomic-virial indiscriminately don't break. + import inspect + + sig = inspect.signature(out_hook) + if "do_atomic_virial" in sig.parameters: + out_hook(OUTPUT, data, do_atomic_virial=atomic_virial) + else: + if atomic_virial: + log.warning( + "--atomic-virial is only meaningful for pt_expt .pt2/.pte " + "outputs; ignoring it for output backend %s", + out_backend.name, + ) + out_hook(OUTPUT, data) diff --git a/deepmd/main.py b/deepmd/main.py index 3afcda8b4a..bf59dfdad5 100644 --- a/deepmd/main.py +++ b/deepmd/main.py @@ -915,6 +915,15 @@ def main_parser() -> argparse.ArgumentParser: ) parser_convert_backend.add_argument("INPUT", help="The input model file.") parser_convert_backend.add_argument("OUTPUT", help="The output model file.") + parser_convert_backend.add_argument( + "--atomic-virial", + action="store_true", + default=False, + help="Export .pt2/.pte models with per-atom virial correction. " + "This adds ~2.5x inference cost but is required for " + "LAMMPS compute/atom virial output. " + "Ignored (with a warning) for other output backends.", + ) # * show model ****************************************************************** parser_show = subparsers.add_parser( diff --git a/deepmd/pt_expt/model/dipole_model.py b/deepmd/pt_expt/model/dipole_model.py index 79ae26024e..4b0e570ecb 100644 --- a/deepmd/pt_expt/model/dipole_model.py +++ b/deepmd/pt_expt/model/dipole_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -16,6 +17,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -137,6 +139,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -147,6 +150,13 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # See make_model.py for the rationale of the pad + monkeypatch. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced diff --git a/deepmd/pt_expt/model/dos_model.py b/deepmd/pt_expt/model/dos_model.py index 2e69d90ab3..219c22e753 100644 --- a/deepmd/pt_expt/model/dos_model.py +++ b/deepmd/pt_expt/model/dos_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -16,6 +17,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -117,6 +119,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -127,6 +130,13 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # See make_model.py for the rationale of the pad + monkeypatch. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced diff --git a/deepmd/pt_expt/model/dp_linear_model.py b/deepmd/pt_expt/model/dp_linear_model.py index 46790c877e..0ac75659b0 100644 --- a/deepmd/pt_expt/model/dp_linear_model.py +++ b/deepmd/pt_expt/model/dp_linear_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -19,6 +20,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -142,6 +144,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -152,9 +155,16 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # See make_model.py for the rationale of the pad + monkeypatch. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced @classmethod def update_sel( diff --git a/deepmd/pt_expt/model/dp_zbl_model.py b/deepmd/pt_expt/model/dp_zbl_model.py index b7f164114b..baa30c4ce0 100644 --- a/deepmd/pt_expt/model/dp_zbl_model.py +++ b/deepmd/pt_expt/model/dp_zbl_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -16,6 +17,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -139,6 +141,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -149,6 +152,15 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # Force `_format_nlist`'s sort branch into the compiled graph so the + # exported model tolerates oversized nlists at runtime — see + # make_model.py for the full rationale. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced diff --git a/deepmd/pt_expt/model/make_model.py b/deepmd/pt_expt/model/make_model.py index 4bd9792420..b28b81ffb1 100644 --- a/deepmd/pt_expt/model/make_model.py +++ b/deepmd/pt_expt/model/make_model.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import math +import types from typing import ( Any, ) @@ -28,6 +29,28 @@ ) +def _pad_nlist_for_export(nlist: torch.Tensor) -> torch.Tensor: + """Append a single ``-1`` column to ``nlist`` for export-time tracing. + + Used inside ``forward_common_lower_exportable`` (and its spin counterpart) + so that ``_format_nlist``'s terminal slice ``ret[..., :nnei]`` truncates + to a statically sized output. Without the extra column, torch.export + cannot prove the ``ret.shape[-1] == nnei`` assertion at trace time and + would specialise the dynamic ``nnei`` dim to the sample value. + + Combined with the short-circuit order in ``_format_nlist`` + (``extra_nlist_sort`` on the left) and the ``need_sorted_nlist_for_lower`` + override during tracing, this keeps the compiled graph's ``nnei`` axis + fully dynamic and free of symbolic shape guards. + """ + pad = -torch.ones( + (*nlist.shape[:2], 1), + dtype=nlist.dtype, + device=nlist.device, + ) + return torch.cat([nlist, pad], dim=-1) + + def _cal_hessian_ext( model: Any, kk: str, @@ -346,6 +369,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_common_lower( extended_coord, extended_atype, @@ -356,13 +380,26 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, - extended_atype, - nlist, - mapping, - fparam, - aparam, + # Force `_format_nlist`'s sort branch into the compiled graph so the + # exported model tolerates oversized nlists at runtime (LAMMPS builds + # nlists with rcut+skin). Combined with the short-circuit order in + # `_format_nlist`, no symbolic guard on the dynamic `nnei` axis is + # emitted. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType( + lambda self: True, model ) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, + extended_atype, + nlist, + mapping, + fparam, + aparam, + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced return CM diff --git a/deepmd/pt_expt/model/polar_model.py b/deepmd/pt_expt/model/polar_model.py index d421bb76a4..dd6b1c5d0f 100644 --- a/deepmd/pt_expt/model/polar_model.py +++ b/deepmd/pt_expt/model/polar_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -16,6 +17,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -117,6 +119,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -127,6 +130,13 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # See make_model.py for the rationale of the pad + monkeypatch. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced diff --git a/deepmd/pt_expt/model/property_model.py b/deepmd/pt_expt/model/property_model.py index 72a327fb03..223f8e5d78 100644 --- a/deepmd/pt_expt/model/property_model.py +++ b/deepmd/pt_expt/model/property_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -16,6 +17,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -124,6 +126,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_lower( extended_coord, extended_atype, @@ -134,6 +137,13 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, extended_atype, nlist, mapping, fparam, aparam - ) + # See make_model.py for the rationale of the pad + monkeypatch. + _orig_need_sort = model.need_sorted_nlist_for_lower + model.need_sorted_nlist_for_lower = types.MethodType(lambda self: True, model) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, extended_atype, nlist, mapping, fparam, aparam + ) + finally: + model.need_sorted_nlist_for_lower = _orig_need_sort + return traced diff --git a/deepmd/pt_expt/model/spin_model.py b/deepmd/pt_expt/model/spin_model.py index 70f41f0701..e69ee29f5a 100644 --- a/deepmd/pt_expt/model/spin_model.py +++ b/deepmd/pt_expt/model/spin_model.py @@ -1,4 +1,5 @@ # SPDX-License-Identifier: LGPL-3.0-or-later +import types from typing import ( Any, ) @@ -17,6 +18,7 @@ ) from .make_model import ( + _pad_nlist_for_export, make_model, ) from .model import ( @@ -96,6 +98,7 @@ def fn( aparam: torch.Tensor | None, ) -> dict[str, torch.Tensor]: extended_coord = extended_coord.detach().requires_grad_(True) + nlist = _pad_nlist_for_export(nlist) return model.forward_common_lower( extended_coord, extended_atype, @@ -107,15 +110,30 @@ def fn( do_atomic_virial=do_atomic_virial, ) - return make_fx(fn, **make_fx_kwargs)( - extended_coord, - extended_atype, - extended_spin, - nlist, - mapping, - fparam, - aparam, + # Force the sort branch of `_format_nlist` into the compiled graph by + # overriding `need_sorted_nlist_for_lower` on the backbone (which is + # where `call_common_lower` reads it). Short-circuit `or` in + # `_format_nlist` then skips the symbolic `n_nnei > nnei` comparison, + # so no spurious shape guard is emitted. See make_model.py for the + # non-spin counterpart. + backbone = model.backbone_model + _orig_need_sort = backbone.need_sorted_nlist_for_lower + backbone.need_sorted_nlist_for_lower = types.MethodType( + lambda self: True, backbone ) + try: + traced = make_fx(fn, **make_fx_kwargs)( + extended_coord, + extended_atype, + extended_spin, + nlist, + mapping, + fparam, + aparam, + ) + finally: + backbone.need_sorted_nlist_for_lower = _orig_need_sort + return traced def forward_common_lower( self, *args: Any, **kwargs: Any diff --git a/deepmd/pt_expt/utils/serialization.py b/deepmd/pt_expt/utils/serialization.py index f59c397525..04cdedd6cf 100644 --- a/deepmd/pt_expt/utils/serialization.py +++ b/deepmd/pt_expt/utils/serialization.py @@ -17,7 +17,7 @@ def _strip_shape_assertions(graph_module: torch.nn.Module) -> None: - """Remove shape-guard assertion nodes from a spin model's exported graph. + """Neutralise shape-guard assertion nodes in a spin model's exported graph. ``torch.export`` inserts ``aten._assert_scalar`` nodes for symbolic shape relationships discovered during tracing. For the spin model, the atom- @@ -32,8 +32,12 @@ def _strip_shape_assertions(graph_module: torch.nn.Module) -> None: so filtering by message content is not reliable. Since ``prefer_deferred_runtime_asserts_over_guards=True`` converts all shape guards into these deferred assertions, and the only shape relationships in - the spin model involve nall/nloc, removing all of them is safe in this + the spin model involve nall/nloc, neutralising all of them is safe in this context. + + We replace each assertion's condition with ``True`` rather than erasing the + node; erasing nodes can disturb the FX graph structure and produce NaN + gradients on some Python/torch versions. """ graph = graph_module.graph for node in list(graph.nodes): @@ -41,8 +45,7 @@ def _strip_shape_assertions(graph_module: torch.nn.Module) -> None: node.op == "call_function" and node.target is torch.ops.aten._assert_scalar.default ): - graph.erase_node(node) - graph.eliminate_dead_code() + node.args = (True, node.args[1]) graph_module.recompile() @@ -178,11 +181,12 @@ def _make_sample_inputs( def _build_dynamic_shapes( *sample_inputs: torch.Tensor | None, has_spin: bool = False, + model_nnei: int = 1, ) -> tuple: """Build dynamic shape specifications for torch.export. - Marks nframes, nloc and nall as dynamic dimensions so the exported - program handles arbitrary frame and atom counts. + Marks nframes, nloc, nall and nnei as dynamic dimensions so the exported + program handles arbitrary frame, atom and neighbor counts. Parameters ---------- @@ -190,12 +194,15 @@ def _build_dynamic_shapes( Sample inputs: either 6 tensors (non-spin) or 7 tensors (spin). has_spin : bool Whether the inputs include an extended_spin tensor. + model_nnei : int + The model's sum(sel). Used as the min for the dynamic nnei dim. Returns a tuple (not dict) to match positional args of the make_fx traced module, whose arg names may have suffixes like ``_1``. """ nframes_dim = torch.export.Dim("nframes", min=1) nall_dim = torch.export.Dim("nall", min=1) nloc_dim = torch.export.Dim("nloc", min=1) + nnei_dim = torch.export.Dim("nnei", min=max(1, model_nnei)) if has_spin: # (ext_coord, ext_atype, ext_spin, nlist, mapping, fparam, aparam) @@ -205,7 +212,11 @@ def _build_dynamic_shapes( {0: nframes_dim, 1: nall_dim}, # extended_coord: (nframes, nall, 3) {0: nframes_dim, 1: nall_dim}, # extended_atype: (nframes, nall) {0: nframes_dim, 1: nall_dim}, # extended_spin: (nframes, nall, 3) - {0: nframes_dim, 1: nloc_dim}, # nlist: (nframes, nloc, nnei) + { + 0: nframes_dim, + 1: nloc_dim, + 2: nnei_dim, + }, # nlist: (nframes, nloc, nnei) — nnei is dynamic {0: nframes_dim, 1: nall_dim}, # mapping: (nframes, nall) {0: nframes_dim} if fparam is not None else None, # fparam {0: nframes_dim, 1: nloc_dim} if aparam is not None else None, # aparam @@ -217,7 +228,11 @@ def _build_dynamic_shapes( return ( {0: nframes_dim, 1: nall_dim}, # extended_coord: (nframes, nall, 3) {0: nframes_dim, 1: nall_dim}, # extended_atype: (nframes, nall) - {0: nframes_dim, 1: nloc_dim}, # nlist: (nframes, nloc, nnei) + { + 0: nframes_dim, + 1: nloc_dim, + 2: nnei_dim, + }, # nlist: (nframes, nloc, nnei) — nnei is dynamic {0: nframes_dim, 1: nall_dim}, # mapping: (nframes, nall) {0: nframes_dim} if fparam is not None else None, # fparam {0: nframes_dim, 1: nloc_dim} if aparam is not None else None, # aparam @@ -257,6 +272,7 @@ def _collect_metadata(model: torch.nn.Module, is_spin: bool = False) -> dict: "type_map": model.get_type_map(), "rcut": model.get_rcut(), "sel": model.get_sel(), + "nnei": sum(model.get_sel()), "dim_fparam": model.get_dim_fparam(), "dim_aparam": model.get_dim_aparam(), "mixed_types": model.mixed_types(), @@ -336,6 +352,7 @@ def deserialize_to_file( model_file: str, data: dict, model_json_override: dict | None = None, + do_atomic_virial: bool = False, ) -> None: """Deserialize a dictionary to a .pte or .pt2 model file. @@ -356,16 +373,24 @@ def deserialize_to_file( If provided, this dict is stored in model.json instead of ``data``. Used by ``dp compress`` to store the compressed model dict while tracing the uncompressed model (make_fx cannot trace custom ops). + do_atomic_virial : bool + If True, export with per-atom virial correction (3 extra backward + passes, ~2.5x slower). Default False for best performance. """ if model_file.endswith(".pt2"): - _deserialize_to_file_pt2(model_file, data, model_json_override) + _deserialize_to_file_pt2( + model_file, data, model_json_override, do_atomic_virial + ) else: - _deserialize_to_file_pte(model_file, data, model_json_override) + _deserialize_to_file_pte( + model_file, data, model_json_override, do_atomic_virial + ) def _trace_and_export( data: dict, model_json_override: dict | None = None, + do_atomic_virial: bool = False, ) -> tuple: """Common logic: build model, trace, export. @@ -447,7 +472,7 @@ def _trace_and_export( mapping_t, fparam=fparam, aparam=aparam, - do_atomic_virial=True, + do_atomic_virial=do_atomic_virial, tracing_mode="symbolic", _allow_non_fake_inputs=True, ) @@ -463,7 +488,7 @@ def _trace_and_export( mapping_t, fparam=fparam, aparam=aparam, - do_atomic_virial=True, + do_atomic_virial=do_atomic_virial, tracing_mode="symbolic", _allow_non_fake_inputs=True, ) @@ -477,7 +502,9 @@ def _trace_and_export( # graph. Exporting on CPU keeps devices consistent; we move the # ExportedProgram to the target device afterwards via the official # move_to_device_pass (avoids FakeTensor device-propagation errors). - dynamic_shapes = _build_dynamic_shapes(*sample_inputs, has_spin=is_spin) + dynamic_shapes = _build_dynamic_shapes( + *sample_inputs, has_spin=is_spin, model_nnei=sum(model.get_sel()) + ) exported = torch.export.export( traced, sample_inputs, @@ -487,13 +514,13 @@ def _trace_and_export( ) if is_spin: - # torch.export re-introduces shape-guard assertions even when - # the make_fx graph has none. The spin model's atom-doubling - # logic creates slice patterns that depend on (nall - nloc), - # producing guards like Ne(nall, nloc). These guards are - # spurious: the model is correct when nall == nloc (NoPBC). - # Strip them from the exported graph so the model can be - # used with any valid nall >= nloc. + # The spin model's atom-doubling slice patterns depend on + # (nall - nloc), producing guards like Ne(nall, nloc). These are + # spurious — the model is correct when nall == nloc (NoPBC). + # Non-spin models don't emit shape guards because the short-circuit + # order in `_format_nlist` (dpmodel) keeps the dynamic `nnei` axis + # free of symbolic comparisons when `extra_nlist_sort=True` + # (see `forward_common_lower_exportable` in pt_expt/model/make_model.py). _strip_shape_assertions(exported.graph_module) # 7. Move the exported program to the target device if needed. @@ -504,7 +531,10 @@ def _trace_and_export( exported = move_to_device_pass(exported, target_device) - # 8. Prepare JSON-serializable model dict + # 8. Record export-time config in metadata + metadata["do_atomic_virial"] = do_atomic_virial + + # 9. Prepare JSON-serializable model dict json_source = model_json_override if model_json_override is not None else data data_for_json = deepcopy(json_source) data_for_json = _numpy_to_json_serializable(data_for_json) @@ -516,10 +546,11 @@ def _deserialize_to_file_pte( model_file: str, data: dict, model_json_override: dict | None = None, + do_atomic_virial: bool = False, ) -> None: """Deserialize a dictionary to a .pte model file.""" exported, metadata, data_for_json, output_keys = _trace_and_export( - data, model_json_override + data, model_json_override, do_atomic_virial ) model_def_script = data.get("model_def_script") or {} @@ -537,6 +568,7 @@ def _deserialize_to_file_pt2( model_file: str, data: dict, model_json_override: dict | None = None, + do_atomic_virial: bool = False, ) -> None: """Deserialize a dictionary to a .pt2 model file (AOTInductor). @@ -551,11 +583,31 @@ def _deserialize_to_file_pt2( ) exported, metadata, data_for_json, output_keys = _trace_and_export( - data, model_json_override + data, model_json_override, do_atomic_virial ) - # Compile via AOTInductor into a .pt2 package - aoti_compile_and_package(exported, package_path=model_file) + # On CUDA, aggressive kernel fusion (default realize_opcount_threshold=30) + # causes NaN in the backward pass (force/virial) of attention-based + # descriptors (DPA1, DPA2). Setting threshold=0 prevents fusion and + # avoids the NaN. Only applied on CUDA; CPU compilation is unaffected. + # + # NOTE: `torch._inductor.config` is a process-wide singleton. The + # save/restore pattern here is NOT thread-safe — concurrent AOTInductor + # compilations from multiple threads would race on this global. Callers + # must serialise `.pt2` exports if running under a thread pool. Processes + # are fine (each has its own inductor config). + import torch._inductor.config as _inductor_config + + import deepmd.pt_expt.utils.env as _env + + is_cuda = _env.DEVICE.type == "cuda" + saved_threshold = _inductor_config.realize_opcount_threshold + if is_cuda: + _inductor_config.realize_opcount_threshold = 0 + try: + aoti_compile_and_package(exported, package_path=model_file) + finally: + _inductor_config.realize_opcount_threshold = saved_threshold # Embed metadata into the .pt2 ZIP archive model_def_script = data.get("model_def_script") or {} diff --git a/source/api_cc/include/DeepPotPTExpt.h b/source/api_cc/include/DeepPotPTExpt.h index 0d42324d24..1bcf44a885 100644 --- a/source/api_cc/include/DeepPotPTExpt.h +++ b/source/api_cc/include/DeepPotPTExpt.h @@ -208,9 +208,11 @@ class DeepPotPTExpt : public DeepPotBackend { bool gpu_enabled; std::vector type_map; std::vector output_keys; // sorted internal output key names - bool mixed_types; - std::vector sel; + bool do_atomic_virial; // whether model was exported with atomic virial corr + int nnei; // expected nlist nnei dimension (= sum(sel)) NeighborListData nlist_data; + at::Tensor mapping_tensor; // cached mapping tensor (LAMMPS path) + at::Tensor firstneigh_tensor; // cached nlist tensor (LAMMPS path) std::unique_ptr loader; /** diff --git a/source/api_cc/include/DeepSpinPTExpt.h b/source/api_cc/include/DeepSpinPTExpt.h index af108c7690..47b38767d4 100644 --- a/source/api_cc/include/DeepSpinPTExpt.h +++ b/source/api_cc/include/DeepSpinPTExpt.h @@ -183,9 +183,11 @@ class DeepSpinPTExpt : public DeepSpinBackend { bool gpu_enabled; std::vector type_map; std::vector output_keys; - bool mixed_types; - std::vector sel; + bool do_atomic_virial; // whether model was exported with atomic virial corr + int nnei; // expected nlist nnei dimension (= sum(sel)) NeighborListData nlist_data; + at::Tensor mapping_tensor; // cached mapping tensor (LAMMPS path) + at::Tensor firstneigh_tensor; // cached nlist tensor (LAMMPS path) std::unique_ptr loader; std::vector run_model(const torch::Tensor& coord, diff --git a/source/api_cc/include/commonPT.h b/source/api_cc/include/commonPT.h index a888d29758..61b823ae62 100644 --- a/source/api_cc/include/commonPT.h +++ b/source/api_cc/include/commonPT.h @@ -101,6 +101,45 @@ inline void build_comm_dict_with_virtual_atoms( remapped_sendnum.data(), remapped_recvnum.data()); } +/** + * @brief Flatten a jagged neighbor list into a [1, nloc, nnei] tensor. + * + * Each row in @p data may have a different number of neighbors. Short rows + * are padded with -1. The output width is max(min_nnei, max_row_length). + * No truncation or distance sorting is done — the model's format_nlist + * handles that on-device. + * + * If @p min_nnei is 0 (the default used by the .pth callers) and every row + * is empty (no atom has any neighbor — fully-dissociated system), the + * output shape is [1, nloc, 0]. PyTorch accepts zero-sized dimensions, and + * the eager `_format_nlist` pads it back up to sum(sel). .pt2 callers + * always pass @p min_nnei = sum(sel) > 0, so the output width is at least + * sum(sel) for them. + * + * @param data Jagged neighbor list: data[i] holds neighbor indices + * for local atom i. + * @param min_nnei Minimum width of the nnei dimension. For .pt2 models + * this should be sum(sel) from the model metadata, because + * torch.export marks nnei >= sum(sel) as a dynamic constraint. + * For .pth models 0 (the default) is fine. + */ +inline torch::Tensor createNlistTensor( + const std::vector>& data, int min_nnei = 0) { + int nloc = static_cast(data.size()); + int nnei = min_nnei; + for (int ii = 0; ii < nloc; ++ii) { + nnei = std::max(nnei, static_cast(data[ii].size())); + } + std::vector flat_data(static_cast(nloc) * nnei, -1); + for (int ii = 0; ii < nloc; ++ii) { + for (size_t jj = 0; jj < data[ii].size(); ++jj) { + flat_data[static_cast(ii) * nnei + jj] = data[ii][jj]; + } + } + torch::Tensor flat_tensor = torch::tensor(flat_data, torch::kInt32); + return flat_tensor.view({1, nloc, nnei}); +} + } // namespace deepmd #endif // BUILD_PYTORCH diff --git a/source/api_cc/src/DeepPotPT.cc b/source/api_cc/src/DeepPotPT.cc index fc09949691..d69dbb8f82 100644 --- a/source/api_cc/src/DeepPotPT.cc +++ b/source/api_cc/src/DeepPotPT.cc @@ -32,22 +32,6 @@ void DeepPotPT::translate_error(std::function f) { } } -torch::Tensor createNlistTensor(const std::vector>& data) { - size_t total_size = 0; - for (const auto& row : data) { - total_size += row.size(); - } - std::vector flat_data; - flat_data.reserve(total_size); - for (const auto& row : data) { - flat_data.insert(flat_data.end(), row.begin(), row.end()); - } - - torch::Tensor flat_tensor = torch::tensor(flat_data, torch::kInt32); - int nloc = data.size(); - int nnei = nloc > 0 ? total_size / nloc : 0; - return flat_tensor.view({1, nloc, nnei}); -} DeepPotPT::DeepPotPT() : inited(false) {} DeepPotPT::DeepPotPT(const std::string& model, const int& gpu_rank, diff --git a/source/api_cc/src/DeepPotPTExpt.cc b/source/api_cc/src/DeepPotPTExpt.cc index c1f3d9d674..db099ed464 100644 --- a/source/api_cc/src/DeepPotPTExpt.cc +++ b/source/api_cc/src/DeepPotPTExpt.cc @@ -12,12 +12,12 @@ #include "SimulationRegion.h" #include "common.h" +#include "commonPT.h" #include "commonPTExpt.h" #include "device.h" #include "errors.h" #include "neighbor_list.h" -using deepmd::ptexpt::buildTypeSortedNlist; using deepmd::ptexpt::parse_json; using deepmd::ptexpt::read_zip_entry; @@ -93,7 +93,6 @@ void DeepPotPTExpt::init(const std::string& model, ntypes = static_cast(metadata["type_map"].as_array().size()); dfparam = metadata["dim_fparam"].as_int(); daparam = metadata["dim_aparam"].as_int(); - mixed_types = metadata["mixed_types"].as_bool(); aparam_nall = false; // pt_expt models use nloc for aparam if (metadata.obj_val.count("has_default_fparam")) { has_default_fparam_ = metadata["has_default_fparam"].as_bool(); @@ -120,16 +119,29 @@ void DeepPotPTExpt::init(const std::string& model, } } + if (metadata.obj_val.count("do_atomic_virial")) { + do_atomic_virial = metadata["do_atomic_virial"].as_bool(); + } else { + // Older models without this field were exported with do_atomic_virial=True + do_atomic_virial = true; + } + + // Read expected nnei (= sum(sel)) — the .pt2 graph has this dimension static. + if (metadata.obj_val.count("nnei")) { + nnei = metadata["nnei"].as_int(); + } else { + // Fallback: compute from sel array + nnei = 0; + for (const auto& v : metadata["sel"].as_array()) { + nnei += v.as_int(); + } + } + type_map.clear(); for (const auto& v : metadata["type_map"].as_array()) { type_map.push_back(v.as_string()); } - sel.clear(); - for (const auto& v : metadata["sel"].as_array()) { - sel.push_back(v.as_int()); - } - // Parse output keys from metadata output_keys.clear(); for (const auto& v : metadata["output_keys"].as_array()) { @@ -211,6 +223,16 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener, const std::vector& fparam, const std::vector& aparam, const bool atomic) { + // Fail fast before allocating any tensors: refuse to run if the caller + // asked for atomic virial but the .pt2 was exported without it. + if (atomic && !do_atomic_virial) { + throw deepmd::deepmd_exception( + "Atomic virial was requested (e.g. by LAMMPS compute */atom/virial) " + "but this .pt2 model was exported without it (metadata field " + "do_atomic_virial=False). Atomic virial adds ~2.5x inference cost " + "and is off by default for .pt2. To enable it, regenerate with: " + "dp convert-backend --atomic-virial INPUT.pth OUTPUT.pt2"); + } torch::Device device(torch::kCUDA, gpu_id); if (!gpu_enabled) { device = torch::Device(torch::kCPU); @@ -252,40 +274,39 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener, .clone() .to(device); + // LAMMPS sets ago=0 on every nlist rebuild (neighbor rebuild, re-partition, + // atom exchange between subdomains), so `ago > 0` implies the cached + // mapping and nlist tensors are still valid. Rebuild only on ago==0. if (ago == 0) { nlist_data.copy_from_nlist(lmp_list, nall - nghost); nlist_data.shuffle_exclude_empty(fwd_map); nlist_data.padding(); - } - // Build type-sorted, sel-limited nlist expected by the .pt2 model - at::Tensor firstneigh_tensor = - buildTypeSortedNlist(nlist_data.jlist, coord_d, datype, sel, nloc, - mixed_types) - .to(device); - // Build mapping tensor. - // NOTE: must .clone() because the local vector goes out of scope before - // run_model is called, and torch::from_blob does not copy the data. - at::Tensor mapping_tensor; - if (lmp_list.mapping) { - std::vector mapping(nall_real); - for (int ii = 0; ii < nall_real; ii++) { - mapping[ii] = fwd_map[lmp_list.mapping[bkw_map[ii]]]; - } - mapping_tensor = - torch::from_blob(mapping.data(), {1, nall_real}, int_option) - .clone() - .to(device); - } else { - // Default identity mapping for local atoms - std::vector mapping(nall_real); - for (int ii = 0; ii < nall_real; ii++) { - mapping[ii] = ii; + // Rebuild mapping tensor + if (lmp_list.mapping) { + std::vector mapping(nall_real); + for (int ii = 0; ii < nall_real; ii++) { + mapping[ii] = fwd_map[lmp_list.mapping[bkw_map[ii]]]; + } + mapping_tensor = + torch::from_blob(mapping.data(), {1, nall_real}, int_option) + .clone() + .to(device); + } else { + // Default identity mapping for local atoms + std::vector mapping(nall_real); + for (int ii = 0; ii < nall_real; ii++) { + mapping[ii] = ii; + } + mapping_tensor = + torch::from_blob(mapping.data(), {1, nall_real}, int_option) + .clone() + .to(device); } - mapping_tensor = - torch::from_blob(mapping.data(), {1, nall_real}, int_option) - .clone() - .to(device); + + // Flatten raw nlist — the .pt2 model sorts by distance on-device. + firstneigh_tensor = + createNlistTensor(nlist_data.jlist, nnei).to(torch::kInt64).to(device); } // Build fparam/aparam tensors (cast to float64 for the model) @@ -434,6 +455,16 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener, const std::vector& fparam, const std::vector& aparam, const bool atomic) { + // Fail fast before allocating any tensors (same check as the nlist + // overload — see its comment). + if (atomic && !do_atomic_virial) { + throw deepmd::deepmd_exception( + "Atomic virial was requested (e.g. by LAMMPS compute */atom/virial) " + "but this .pt2 model was exported without it (metadata field " + "do_atomic_virial=False). Atomic virial adds ~2.5x inference cost " + "and is off by default for .pt2. To enable it, regenerate with: " + "dp convert-backend --atomic-virial INPUT.pth OUTPUT.pt2"); + } int natoms = atype.size(); int nframes = coord.size() / (natoms * 3); if (nframes > 1) { @@ -518,8 +549,6 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener, ncell, ext_stt, ext_end, region, ncell); } - // 3. Build type-sorted, sel-limited nlist (uses double coords for distances) - // 4. Convert to tensors (always float64 for .pt2 model) // NOTE: must .clone() because from_blob does not copy data, and the local // vectors would go out of scope before run_model completes. @@ -532,10 +561,9 @@ void DeepPotPTExpt::compute(ENERGYVTYPE& ener, torch::from_blob(atype_64.data(), {1, nall}, int_options) .clone() .to(device); + // Flatten raw nlist — the .pt2 model sorts by distance on-device. at::Tensor nlist_tensor = - buildTypeSortedNlist(nlist_raw, coord_cpy_d, atype_cpy, sel, nloc, - mixed_types) - .to(device); + createNlistTensor(nlist_raw, nnei).to(torch::kInt64).to(device); std::vector mapping_64(mapping_vec.begin(), mapping_vec.end()); at::Tensor mapping_tensor = torch::from_blob(mapping_64.data(), {1, nall}, int_options) diff --git a/source/api_cc/src/DeepSpinPT.cc b/source/api_cc/src/DeepSpinPT.cc index e7fdb0a6a5..5add377045 100644 --- a/source/api_cc/src/DeepSpinPT.cc +++ b/source/api_cc/src/DeepSpinPT.cc @@ -31,22 +31,6 @@ void DeepSpinPT::translate_error(std::function f) { } } -torch::Tensor createNlistTensor2(const std::vector>& data) { - size_t total_size = 0; - for (const auto& row : data) { - total_size += row.size(); - } - std::vector flat_data; - flat_data.reserve(total_size); - for (const auto& row : data) { - flat_data.insert(flat_data.end(), row.begin(), row.end()); - } - - torch::Tensor flat_tensor = torch::tensor(flat_data, torch::kInt32); - int nloc = data.size(); - int nnei = nloc > 0 ? total_size / nloc : 0; - return flat_tensor.view({1, nloc, nnei}); -} DeepSpinPT::DeepSpinPT() : inited(false) {} DeepSpinPT::DeepSpinPT(const std::string& model, const int& gpu_rank, @@ -209,7 +193,7 @@ void DeepSpinPT::compute(ENERGYVTYPE& ener, .to(device); } } - at::Tensor firstneigh = createNlistTensor2(nlist_data.jlist); + at::Tensor firstneigh = createNlistTensor(nlist_data.jlist); firstneigh_tensor = firstneigh.to(torch::kInt64).to(device); bool do_atom_virial_tensor = atomic; c10::optional fparam_tensor; diff --git a/source/api_cc/src/DeepSpinPTExpt.cc b/source/api_cc/src/DeepSpinPTExpt.cc index ae4ef423ed..dcd7df55a4 100644 --- a/source/api_cc/src/DeepSpinPTExpt.cc +++ b/source/api_cc/src/DeepSpinPTExpt.cc @@ -12,12 +12,12 @@ #include "SimulationRegion.h" #include "common.h" +#include "commonPT.h" #include "commonPTExpt.h" #include "device.h" #include "errors.h" #include "neighbor_list.h" -using deepmd::ptexpt::buildTypeSortedNlist; using deepmd::ptexpt::parse_json; using deepmd::ptexpt::read_zip_entry; @@ -93,7 +93,6 @@ void DeepSpinPTExpt::init(const std::string& model, ntypes = static_cast(metadata["type_map"].as_array().size()); dfparam = metadata["dim_fparam"].as_int(); daparam = metadata["dim_aparam"].as_int(); - mixed_types = metadata["mixed_types"].as_bool(); aparam_nall = false; // Spin-specific metadata @@ -133,16 +132,29 @@ void DeepSpinPTExpt::init(const std::string& model, } } + if (metadata.obj_val.count("do_atomic_virial")) { + do_atomic_virial = metadata["do_atomic_virial"].as_bool(); + } else { + // Older models without this field were exported with do_atomic_virial=True + do_atomic_virial = true; + } + + // Read expected nnei (= sum(sel)) — the .pt2 graph has this dimension static. + if (metadata.obj_val.count("nnei")) { + nnei = metadata["nnei"].as_int(); + } else { + // Fallback: compute from sel array + nnei = 0; + for (const auto& v : metadata["sel"].as_array()) { + nnei += v.as_int(); + } + } + type_map.clear(); for (const auto& v : metadata["type_map"].as_array()) { type_map.push_back(v.as_string()); } - sel.clear(); - for (const auto& v : metadata["sel"].as_array()) { - sel.push_back(v.as_int()); - } - output_keys.clear(); for (const auto& v : metadata["output_keys"].as_array()) { output_keys.push_back(v.as_string()); @@ -230,6 +242,15 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener, const std::vector& fparam, const std::vector& aparam, const bool atomic) { + // Fail fast before allocating any tensors. + if (atomic && !do_atomic_virial) { + throw deepmd::deepmd_exception( + "Atomic virial was requested (e.g. by LAMMPS compute */atom/virial) " + "but this .pt2 model was exported without it (metadata field " + "do_atomic_virial=False). Atomic virial adds ~2.5x inference cost " + "and is off by default for .pt2. To enable it, regenerate with: " + "dp convert-backend --atomic-virial INPUT.pth OUTPUT.pt2"); + } torch::Device device(torch::kCUDA, gpu_id); if (!gpu_enabled) { device = torch::Device(torch::kCPU); @@ -281,36 +302,38 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener, .clone() .to(device); + // LAMMPS sets ago=0 on every nlist rebuild, so ago>0 implies the cached + // mapping and nlist tensors are still valid — see DeepPotPTExpt.cc for + // the same rationale. if (ago == 0) { nlist_data.copy_from_nlist(lmp_list, nall - nghost); nlist_data.shuffle_exclude_empty(fwd_map); nlist_data.padding(); - } - at::Tensor firstneigh_tensor = - buildTypeSortedNlist(nlist_data.jlist, coord_d, datype, sel, nloc, - mixed_types) - .to(device); - // Build mapping tensor - at::Tensor mapping_tensor; - if (lmp_list.mapping) { - std::vector mapping(nall_real); - for (int ii = 0; ii < nall_real; ii++) { - mapping[ii] = fwd_map[lmp_list.mapping[bkw_map[ii]]]; - } - mapping_tensor = - torch::from_blob(mapping.data(), {1, nall_real}, int_option) - .clone() - .to(device); - } else { - std::vector mapping(nall_real); - for (int ii = 0; ii < nall_real; ii++) { - mapping[ii] = ii; + // Rebuild mapping tensor + if (lmp_list.mapping) { + std::vector mapping(nall_real); + for (int ii = 0; ii < nall_real; ii++) { + mapping[ii] = fwd_map[lmp_list.mapping[bkw_map[ii]]]; + } + mapping_tensor = + torch::from_blob(mapping.data(), {1, nall_real}, int_option) + .clone() + .to(device); + } else { + std::vector mapping(nall_real); + for (int ii = 0; ii < nall_real; ii++) { + mapping[ii] = ii; + } + mapping_tensor = + torch::from_blob(mapping.data(), {1, nall_real}, int_option) + .clone() + .to(device); } - mapping_tensor = - torch::from_blob(mapping.data(), {1, nall_real}, int_option) - .clone() - .to(device); + + // Flatten raw nlist — the .pt2 model sorts by distance on-device. + firstneigh_tensor = + createNlistTensor(nlist_data.jlist, nnei).to(torch::kInt64).to(device); } // Build fparam/aparam tensors @@ -475,6 +498,15 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener, const std::vector& fparam, const std::vector& aparam, const bool atomic) { + // Fail fast before allocating any tensors. + if (atomic && !do_atomic_virial) { + throw deepmd::deepmd_exception( + "Atomic virial was requested (e.g. by LAMMPS compute */atom/virial) " + "but this .pt2 model was exported without it (metadata field " + "do_atomic_virial=False). Atomic virial adds ~2.5x inference cost " + "and is off by default for .pt2. To enable it, regenerate with: " + "dp convert-backend --atomic-virial INPUT.pth OUTPUT.pt2"); + } int natoms = atype.size(); torch::Device device(torch::kCUDA, gpu_id); @@ -580,10 +612,9 @@ void DeepSpinPTExpt::compute(ENERGYVTYPE& ener, torch::from_blob(atype_64.data(), {1, nall}, int_options) .clone() .to(device); + // Flatten raw nlist — the .pt2 model sorts by distance on-device. at::Tensor nlist_tensor = - buildTypeSortedNlist(nlist_raw, coord_cpy_d, atype_cpy, sel, nloc, - mixed_types) - .to(device); + createNlistTensor(nlist_raw, nnei).to(torch::kInt64).to(device); std::vector mapping_64(mapping_vec.begin(), mapping_vec.end()); at::Tensor mapping_tensor = torch::from_blob(mapping_64.data(), {1, nall}, int_options) diff --git a/source/api_cc/src/DeepSpinTF.cc b/source/api_cc/src/DeepSpinTF.cc index 7f569ea231..091fca64c2 100644 --- a/source/api_cc/src/DeepSpinTF.cc +++ b/source/api_cc/src/DeepSpinTF.cc @@ -1121,7 +1121,9 @@ void DeepSpinTF::extend(int& extend_inum, extend_firstneigh.resize(extend_nloc); extend_numneigh.resize(extend_nloc); for (int ii = 0; ii < extend_nloc; ii++) { - extend_firstneigh[ii] = &extend_neigh[ii][0]; + // `&vec[0]` is undefined behaviour for empty vectors; use + // vector::data() instead. See convert_nlist for the same fix. + extend_firstneigh[ii] = extend_neigh[ii].data(); extend_numneigh[ii] = extend_neigh[ii].size(); } diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc index 1ad1a5c97b..7aae726242 100644 --- a/source/api_cc/src/common.cc +++ b/source/api_cc/src/common.cc @@ -276,13 +276,21 @@ void deepmd::NeighborListData::copy_from_nlist(const InputNlist& inlist, int inum = natoms >= 0 ? natoms : inlist.inum; ilist.resize(inum); jlist.resize(inum); - memcpy(&ilist[0], inlist.ilist, inum * sizeof(int)); + if (inum > 0) { + memcpy(&ilist[0], inlist.ilist, inum * sizeof(int)); + } for (int ii = 0; ii < inum; ++ii) { int jnum = inlist.numneigh[ii]; jlist[ii].resize(jnum); - memcpy(&jlist[ii][0], inlist.firstneigh[ii], jnum * sizeof(int)); - for (int jj = 0; jj < jnum; ++jj) { - jlist[ii][jj] &= inlist.mask; + // Guard against empty jlist[ii]: `&vec[0]` is undefined behaviour for + // empty vectors and libstdc++ debug mode asserts on it. This happens + // when a subdomain's local atoms legitimately have zero neighbours + // within cutoff (e.g. under spatial partitioning). + if (jnum > 0) { + memcpy(&jlist[ii][0], inlist.firstneigh[ii], jnum * sizeof(int)); + for (int jj = 0; jj < jnum; ++jj) { + jlist[ii][jj] &= inlist.mask; + } } } } @@ -295,13 +303,13 @@ void deepmd::NeighborListData::shuffle(const AtomMap& map) { void deepmd::NeighborListData::shuffle(const std::vector& fwd_map) { int nloc = fwd_map.size(); for (unsigned ii = 0; ii < ilist.size(); ++ii) { - if (ilist[ii] < nloc) { + if (ilist[ii] >= 0 && ilist[ii] < nloc) { ilist[ii] = fwd_map[ilist[ii]]; } } for (unsigned ii = 0; ii < jlist.size(); ++ii) { for (unsigned jj = 0; jj < jlist[ii].size(); ++jj) { - if (jlist[ii][jj] < nloc) { + if (jlist[ii][jj] >= 0 && jlist[ii][jj] < nloc) { jlist[ii][jj] = fwd_map[jlist[ii][jj]]; } } @@ -353,12 +361,16 @@ void deepmd::NeighborListData::make_inlist(InputNlist& inlist) { firstneigh.resize(nloc); for (int ii = 0; ii < nloc; ++ii) { numneigh[ii] = jlist[ii].size(); - firstneigh[ii] = &jlist[ii][0]; + // `&vec[0]` is undefined behaviour for empty vectors (libstdc++ + // debug mode asserts on it). When numneigh[ii] is 0 the pointer is + // never dereferenced; use vector::data() which is well-defined for + // empty vectors. Mirrors the fix in convert_nlist. + firstneigh[ii] = jlist[ii].data(); } inlist.inum = nloc; - inlist.ilist = &ilist[0]; - inlist.numneigh = &numneigh[0]; - inlist.firstneigh = &firstneigh[0]; + inlist.ilist = ilist.data(); + inlist.numneigh = numneigh.data(); + inlist.firstneigh = firstneigh.data(); } #ifdef BUILD_TENSORFLOW diff --git a/source/api_cc/src/commonPTExpt.h b/source/api_cc/src/commonPTExpt.h index 7dd02d09a9..ddc8ad5014 100644 --- a/source/api_cc/src/commonPTExpt.h +++ b/source/api_cc/src/commonPTExpt.h @@ -438,101 +438,11 @@ inline std::string read_zip_entry(const std::string& zip_path, } // ============================================================================ -// Build type-sorted, sel-limited neighbor list tensor. +// Create raw neighbor list tensor. +// The .pt2 compiled graph already contains format_nlist (distance sort + +// truncation) and an internal +1 pad that guarantees the sort branch fires. +// The C++ side just flattens the jagged nlist into a rectangular tensor. // ============================================================================ -/** - * @brief Convert a raw neighbor list to the sel-limited format expected by the - * pt_expt model. - * - * For non-mixed-type models (distinguish_types=true): the nlist has shape - * (nframes, nloc, sum(sel)), where the first sel[0] entries are neighbors of - * type 0, the next sel[1] are type 1, etc. Within each type group neighbors - * are sorted by distance (ascending). - * - * For mixed-type models (distinguish_types=false): all neighbors go into a - * single group sorted by distance, truncated to sum(sel). - * - * Missing slots are filled with -1. - */ -template -inline torch::Tensor buildTypeSortedNlist( - const std::vector>& raw_nlist, - const std::vector& coord_ext, - const std::vector& atype_ext, - const std::vector& sel, - int nloc, - bool mixed_types) { - int nsel = 0; - for (auto s : sel) { - nsel += s; - } - int ntypes = sel.size(); - std::vector result(static_cast(nloc) * nsel, -1); - - for (int ii = 0; ii < nloc; ++ii) { - const auto& neighbors = raw_nlist[ii]; - VALUETYPE xi = coord_ext[ii * 3 + 0]; - VALUETYPE yi = coord_ext[ii * 3 + 1]; - VALUETYPE zi = coord_ext[ii * 3 + 2]; - int offset = ii * nsel; - - if (mixed_types) { - std::vector> all_neighbors; - for (int jj : neighbors) { - if (jj < 0) { - continue; - } - int jtype = atype_ext[jj]; - if (jtype < 0) { - continue; - } - VALUETYPE dx = coord_ext[jj * 3 + 0] - xi; - VALUETYPE dy = coord_ext[jj * 3 + 1] - yi; - VALUETYPE dz = coord_ext[jj * 3 + 2] - zi; - VALUETYPE rr = dx * dx + dy * dy + dz * dz; - all_neighbors.emplace_back(rr, jj); - } - std::sort(all_neighbors.begin(), all_neighbors.end()); - int count = std::min(static_cast(all_neighbors.size()), nsel); - for (int kk = 0; kk < count; ++kk) { - result[offset + kk] = all_neighbors[kk].second; - } - } else { - std::vector>> by_type(ntypes); - for (int jj : neighbors) { - if (jj < 0) { - continue; - } - int jtype = atype_ext[jj]; - if (jtype < 0 || jtype >= ntypes) { - continue; - } - VALUETYPE dx = coord_ext[jj * 3 + 0] - xi; - VALUETYPE dy = coord_ext[jj * 3 + 1] - yi; - VALUETYPE dz = coord_ext[jj * 3 + 2] - zi; - VALUETYPE rr = dx * dx + dy * dy + dz * dz; - by_type[jtype].emplace_back(rr, jj); - } - int col = 0; - for (int tt = 0; tt < ntypes; ++tt) { - auto& group = by_type[tt]; - std::sort(group.begin(), group.end()); - int count = std::min(static_cast(group.size()), sel[tt]); - for (int kk = 0; kk < count; ++kk) { - result[offset + col + kk] = group[kk].second; - } - col += sel[tt]; - } - } - } - - torch::Tensor tensor = - torch::from_blob(result.data(), {1, nloc, nsel}, - torch::TensorOptions().dtype(torch::kInt64)) - .clone(); - return tensor; -} - } // namespace ptexpt } // namespace deepmd diff --git a/source/api_cc/tests/CMakeLists.txt b/source/api_cc/tests/CMakeLists.txt index a3e7d067f7..a812f776fc 100644 --- a/source/api_cc/tests/CMakeLists.txt +++ b/source/api_cc/tests/CMakeLists.txt @@ -11,6 +11,7 @@ if(ENABLE_TENSORFLOW) endif() if(ENABLE_PYTORCH) target_compile_definitions(runUnitTests_cc PRIVATE BUILD_PYTORCH) + target_link_libraries(runUnitTests_cc "${TORCH_LIBRARIES}") endif() if(ENABLE_JAX) target_compile_definitions(runUnitTests_cc PRIVATE BUILD_JAX) diff --git a/source/api_cc/tests/test_deeppot_dpa_ptexpt_spin.cc b/source/api_cc/tests/test_deeppot_dpa_ptexpt_spin.cc index 27c1836aa1..5f5ae7a6b6 100644 --- a/source/api_cc/tests/test_deeppot_dpa_ptexpt_spin.cc +++ b/source/api_cc/tests/test_deeppot_dpa_ptexpt_spin.cc @@ -8,12 +8,13 @@ #include #include "DeepSpin.h" +#include "DeepSpinPTExpt.h" #include "neighbor_list.h" #include "test_utils.h" // Spin models need relaxed epsilon #undef EPSILON -#define EPSILON (std::is_same::value ? 1e-6 : 1e-1) +#define EPSILON (std::is_same::value ? 1e-10 : 1e-4) // ============================================================================ // PBC test fixture @@ -52,7 +53,7 @@ class TestInferDeepSpinDpaPtExpt : public ::testing::Test { GTEST_SKIP() << "Skipping: " << model_path << " not found."; } } -#ifndef BUILD_PYTORCH +#if !defined(BUILD_PYTORCH) || !BUILD_PT_EXPT_SPIN GTEST_SKIP() << "Skip because PyTorch support is not enabled."; #endif dp.init(model_path); @@ -238,7 +239,7 @@ class TestInferDeepSpinDpaPtExptNopbc : public ::testing::Test { GTEST_SKIP() << "Skipping: " << model_path << " not found."; } } -#ifndef BUILD_PYTORCH +#if !defined(BUILD_PYTORCH) || !BUILD_PT_EXPT_SPIN GTEST_SKIP() << "Skip because PyTorch support is not enabled."; #endif dp.init(model_path); @@ -470,3 +471,103 @@ TYPED_TEST(TestInferDeepSpinDpaPtExptNopbc, cpu_lmp_nlist_atomic) { EXPECT_LT(fabs(atom_vir[ii] - expected_atom_v[ii]), EPSILON); } } + +TYPED_TEST(TestInferDeepSpinDpaPtExptNopbc, cpu_lmp_nlist_oversized) { + using VALUETYPE = TypeParam; + const std::vector& coord = this->coord; + const std::vector& spin = this->spin; + std::vector& atype = this->atype; + std::vector& box = this->box; + std::vector& expected_f = this->expected_f; + std::vector& expected_fm = this->expected_fm; + std::vector& expected_tot_v = this->expected_tot_v; + int& natoms = this->natoms; + double& expected_tot_e = this->expected_tot_e; + deepmd::DeepSpin& dp = this->dp; + double ener; + std::vector force, force_mag, virial; + + std::vector > nlist_data = { + {1, 2, 3, 4, 5}, {0, 2, 3, 4, 5}, {0, 1, 3, 4, 5}, + {0, 1, 2, 4, 5}, {0, 1, 2, 3, 5}, {0, 1, 2, 3, 4}}; + // Pad with extra -1 entries and shuffle to create oversized nlist + std::vector > nlist_oversized; + _pad_shuffle_nlist(nlist_oversized, nlist_data, 50); + std::vector ilist(natoms), numneigh(natoms); + std::vector firstneigh(natoms); + deepmd::InputNlist inlist(natoms, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_oversized); + dp.compute(ener, force, force_mag, virial, coord, spin, atype, box, 0, inlist, + 0); + + EXPECT_EQ(force.size(), natoms * 3); + EXPECT_EQ(force_mag.size(), natoms * 3); + EXPECT_LT(fabs(ener - expected_tot_e), EPSILON); + for (int ii = 0; ii < natoms * 3; ++ii) { + EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON); + EXPECT_LT(fabs(force_mag[ii] - expected_fm[ii]), EPSILON); + } + EXPECT_FALSE(virial.empty()) << "Virial should not be empty"; + EXPECT_EQ(virial.size(), 9); + for (int ii = 0; ii < 3 * 3; ++ii) { + EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON); + } +} + +// Sanity check that the shuffle in `_pad_shuffle_nlist` actually redistributes +// real neighbors away from the front of each row — see the non-spin version +// in test_deeppot_ptexpt.cc for the rationale. At least one prediction +// (energy / force / force_mag / virial) must deviate from the reference by +// more than EPSILON when the nlist is truncated to a single column. +TYPED_TEST(TestInferDeepSpinDpaPtExptNopbc, + cpu_lmp_nlist_oversized_shuffle_sanity) { + using VALUETYPE = TypeParam; + const std::vector& coord = this->coord; + const std::vector& spin = this->spin; + std::vector& atype = this->atype; + std::vector& box = this->box; + std::vector& expected_f = this->expected_f; + std::vector& expected_fm = this->expected_fm; + std::vector& expected_tot_v = this->expected_tot_v; + int& natoms = this->natoms; + double& expected_tot_e = this->expected_tot_e; + deepmd::DeepSpin& dp = this->dp; + double ener; + std::vector force, force_mag, virial; + + std::vector > nlist_data = { + {1, 2, 3, 4, 5}, {0, 2, 3, 4, 5}, {0, 1, 3, 4, 5}, + {0, 1, 2, 4, 5}, {0, 1, 2, 3, 5}, {0, 1, 2, 3, 4}}; + std::vector > nlist_oversized; + _pad_shuffle_nlist(nlist_oversized, nlist_data, 50); + for (auto& row : nlist_oversized) { + row.resize(1); + } + std::vector ilist(natoms), numneigh(natoms); + std::vector firstneigh(natoms); + deepmd::InputNlist inlist(natoms, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_oversized); + dp.compute(ener, force, force_mag, virial, coord, spin, atype, box, 0, inlist, + 0); + + // See test_deeppot_ptexpt.cc for the rationale of the separate negative- + // direction threshold. For the spin nopbc test the strongest signal + // is in force_mag (max magnitude ~0.97); 1e-3 is well below all + // expected signal magnitudes and well above the float precision floor. + static constexpr double SANITY_MIN_DEV = 1e-3; + bool any_deviates = fabs(ener - expected_tot_e) > SANITY_MIN_DEV; + for (int ii = 0; ii < natoms * 3 && !any_deviates; ++ii) { + any_deviates = fabs(force[ii] - expected_f[ii]) > SANITY_MIN_DEV; + } + for (int ii = 0; ii < natoms * 3 && !any_deviates; ++ii) { + any_deviates = fabs(force_mag[ii] - expected_fm[ii]) > SANITY_MIN_DEV; + } + for (int ii = 0; ii < 9 && !any_deviates; ++ii) { + any_deviates = fabs(virial[ii] - expected_tot_v[ii]) > SANITY_MIN_DEV; + } + EXPECT_TRUE(any_deviates) + << "Every prediction stayed within SANITY_MIN_DEV after single-column " + "truncation of the shuffled oversized nlist — the shuffle appears " + "to have kept real neighbors at the front. The oversized test " + "above may be a tautology; increase n_extra in _pad_shuffle_nlist."; +} diff --git a/source/api_cc/tests/test_deeppot_ptexpt.cc b/source/api_cc/tests/test_deeppot_ptexpt.cc index cfde173a82..201724a725 100644 --- a/source/api_cc/tests/test_deeppot_ptexpt.cc +++ b/source/api_cc/tests/test_deeppot_ptexpt.cc @@ -356,6 +356,166 @@ TYPED_TEST(TestInferDeepPotAPtExpt, cpu_lmp_nlist_2rc) { } } +TYPED_TEST(TestInferDeepPotAPtExpt, cpu_lmp_nlist_oversized) { + using VALUETYPE = TypeParam; + std::vector& coord = this->coord; + std::vector& atype = this->atype; + std::vector& box = this->box; + std::vector& expected_f = this->expected_f; + int& natoms = this->natoms; + double& expected_tot_e = this->expected_tot_e; + std::vector& expected_tot_v = this->expected_tot_v; + deepmd::DeepPot& dp = this->dp; + float rc = dp.cutoff(); + int nloc = coord.size() / 3; + std::vector coord_cpy; + std::vector atype_cpy, mapping; + std::vector > nlist_data; + _build_nlist(nlist_data, coord_cpy, atype_cpy, mapping, coord, + atype, box, rc); + // Pad with extra -1 entries and shuffle to create oversized nlist + std::vector > nlist_oversized; + _pad_shuffle_nlist(nlist_oversized, nlist_data, 200); + int nall = coord_cpy.size() / 3; + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_oversized); + + double ener; + std::vector force_(nall * 3, 0.0), virial(9, 0.0); + dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc, + inlist, 0); + std::vector force; + _fold_back(force, force_, mapping, nloc, nall, 3); + + EXPECT_EQ(force.size(), natoms * 3); + EXPECT_EQ(virial.size(), 9); + + EXPECT_LT(fabs(ener - expected_tot_e), EPSILON); + for (int ii = 0; ii < natoms * 3; ++ii) { + EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON); + } + for (int ii = 0; ii < 3 * 3; ++ii) { + EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON); + } +} + +// Sanity check for the oversized-nlist test above: the Python counterpart +// asserts that naïvely truncating the shuffled nlist (without distance +// sorting) produces a DIFFERENT prediction, proving that the shuffle did +// move real neighbors past the sum(sel) boundary. The C++ API always +// sorts on-device, so we verify the equivalent property by truncating the +// shuffled nlist to a single column: with the shuffle ratio (~5 real +// entries out of 205), almost every row ends up holding a -1, and the +// model's prediction must deviate from the reference on at least one of +// {energy, force, virial}. If every prediction still matches the +// reference within EPSILON, the shuffle is not meaningful and the +// oversized test above is a tautology. +TYPED_TEST(TestInferDeepPotAPtExpt, cpu_lmp_nlist_oversized_shuffle_sanity) { + using VALUETYPE = TypeParam; + std::vector& coord = this->coord; + std::vector& atype = this->atype; + std::vector& box = this->box; + std::vector& expected_f = this->expected_f; + std::vector& expected_tot_v = this->expected_tot_v; + int& natoms = this->natoms; + double& expected_tot_e = this->expected_tot_e; + deepmd::DeepPot& dp = this->dp; + float rc = dp.cutoff(); + int nloc = coord.size() / 3; + std::vector coord_cpy; + std::vector atype_cpy, mapping; + std::vector > nlist_data; + _build_nlist(nlist_data, coord_cpy, atype_cpy, mapping, coord, + atype, box, rc); + std::vector > nlist_oversized; + _pad_shuffle_nlist(nlist_oversized, nlist_data, 200); + for (auto& row : nlist_oversized) { + row.resize(1); + } + int nall = coord_cpy.size() / 3; + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_oversized); + + double ener; + std::vector force_(nall * 3, 0.0), virial(9, 0.0); + dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc, + inlist, 0); + std::vector force; + _fold_back(force, force_, mapping, nloc, nall, 3); + + // Threshold for the negative-direction check is intentionally separate + // from EPSILON. EPSILON is a numerical-precision tolerance for positive + // assertions ("model matches reference"); here we want a physical-signal + // tolerance ("broken model produces a meaningful deviation"). 1e-3 is + // 100x below the smallest expected force magnitude (~1e-1) and well + // above the float precision noise floor (~1e-5), so it cleanly + // distinguishes "actually broken" from "noise". + static constexpr double SANITY_MIN_DEV = 1e-3; + bool any_deviates = fabs(ener - expected_tot_e) > SANITY_MIN_DEV; + for (int ii = 0; ii < natoms * 3 && !any_deviates; ++ii) { + any_deviates = fabs(force[ii] - expected_f[ii]) > SANITY_MIN_DEV; + } + for (int ii = 0; ii < 9 && !any_deviates; ++ii) { + any_deviates = fabs(virial[ii] - expected_tot_v[ii]) > SANITY_MIN_DEV; + } + EXPECT_TRUE(any_deviates) + << "Every prediction (energy/force/virial) stayed within " + "SANITY_MIN_DEV after single-column truncation of the shuffled " + "oversized nlist — the shuffle appears to have kept real neighbors " + "at the front. The oversized test above may be a tautology; " + "increase n_extra in _pad_shuffle_nlist."; +} + +// Edge case: a subdomain whose every local atom has zero neighbors within +// cutoff (e.g. under aggressive spatial partitioning on a small/sparse +// subdomain, or at the start of a simulation before atoms settle in). +// The C++ side builds an InputNlist with empty rows, which feeds +// `createNlistTensor` with min_nnei=sum(sel); the compiled .pt2 graph must +// then run cleanly on an all-`-1` nlist and produce a sensible (finite, +// interaction-free) result. Verifies both that the code path doesn't +// crash and that the forces/virial collapse to zero (no interactions). +TYPED_TEST(TestInferDeepPotAPtExpt, cpu_lmp_nlist_empty_subdomain) { + using VALUETYPE = TypeParam; + std::vector& coord = this->coord; + std::vector& atype = this->atype; + std::vector& box = this->box; + int& natoms = this->natoms; + deepmd::DeepPot& dp = this->dp; + + // Pass coord/atype as-is; the model sees them, but `dp.compute` only + // uses the provided InputNlist for neighbor information. + int nall = natoms; + std::vector > nlist_data(natoms); // every row empty + std::vector ilist(natoms), numneigh(natoms); + std::vector firstneigh(natoms); + deepmd::InputNlist inlist(natoms, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_data); + + double ener; + std::vector force(nall * 3, 0.0), virial(9, 0.0); + // Must not throw: zero-neighbor input is legal and expected under some + // spatial-partitioning configurations. + ASSERT_NO_THROW( + dp.compute(ener, force, virial, coord, atype, box, 0, inlist, 0)); + EXPECT_EQ(force.size(), natoms * 3); + EXPECT_EQ(virial.size(), 9); + EXPECT_TRUE(std::isfinite(ener)); + // With no neighbors, interaction forces and virial must be exactly zero + // (the descriptor sees only -1 entries, so pair contributions vanish). + for (int ii = 0; ii < natoms * 3; ++ii) { + EXPECT_TRUE(std::isfinite(force[ii])); + EXPECT_LT(fabs(force[ii]), EPSILON); + } + for (int ii = 0; ii < 9; ++ii) { + EXPECT_TRUE(std::isfinite(virial[ii])); + EXPECT_LT(fabs(virial[ii]), EPSILON); + } +} + TYPED_TEST(TestInferDeepPotAPtExpt, cpu_lmp_nlist_type_sel) { using VALUETYPE = TypeParam; std::vector& coord = this->coord; @@ -421,6 +581,52 @@ TYPED_TEST(TestInferDeepPotAPtExpt, print_summary) { dp.print_summary(""); } +// Regression test for the fail-fast guard hoisted in commit c80db58d. +// `deeppot_sea_no_atomic_virial.pt2` is a copy of deeppot_sea.pt2 with +// the do_atomic_virial=false flag patched into its metadata.json. +// Calling compute() with atomic=true on this model must throw before +// any tensors are allocated. +TYPED_TEST(TestInferDeepPotAPtExpt, cpu_atomic_throws_when_disabled) { + using VALUETYPE = TypeParam; + deepmd::DeepPot dp_no_av; + ASSERT_NO_THROW( + dp_no_av.init("../../tests/infer/deeppot_sea_no_atomic_virial.pt2")); + + std::vector& coord = this->coord; + std::vector& atype = this->atype; + std::vector& box = this->box; + int& natoms = this->natoms; + + // Build an LMP-style nlist so we exercise the nlist-overload of + // compute(); the no-nlist overload has the same guard but is + // covered by symmetry. + float rc = dp_no_av.cutoff(); + int nloc = coord.size() / 3; + std::vector coord_cpy; + std::vector atype_cpy, mapping; + std::vector > nlist_data; + _build_nlist(nlist_data, coord_cpy, atype_cpy, mapping, coord, + atype, box, rc); + int nall = coord_cpy.size() / 3; + std::vector ilist(nloc), numneigh(nloc); + std::vector firstneigh(nloc); + deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]); + convert_nlist(inlist, nlist_data); + + double ener; + std::vector force(nall * 3, 0.0), virial(9, 0.0), atom_ener, + atom_vir; + // atomic=true => guard must trip and throw deepmd_exception. + EXPECT_THROW( + dp_no_av.compute(ener, force, virial, atom_ener, atom_vir, coord_cpy, + atype_cpy, box, nall - nloc, inlist, 0), + deepmd::deepmd_exception); + // atomic=false on the same model must work normally (sanity check + // that the guard fires only when actually requested). + EXPECT_NO_THROW(dp_no_av.compute(ener, force, virial, coord_cpy, atype_cpy, + box, nall - nloc, inlist, 0)); +} + template class TestInferDeepPotAPtExptNoPbc : public ::testing::Test { protected: diff --git a/source/api_cc/tests/test_deepspin_model_devi_ptexpt.cc b/source/api_cc/tests/test_deepspin_model_devi_ptexpt.cc index e58d9c0f78..ada7c2eba7 100644 --- a/source/api_cc/tests/test_deepspin_model_devi_ptexpt.cc +++ b/source/api_cc/tests/test_deepspin_model_devi_ptexpt.cc @@ -8,6 +8,7 @@ #include #include "DeepSpin.h" +#include "DeepSpinPTExpt.h" #include "neighbor_list.h" #include "test_utils.h" @@ -46,7 +47,7 @@ class TestInferDeepSpinModeDeviPtExpt : public ::testing::Test { GTEST_SKIP() << "Skipping: " << model1_path << " not found."; } } -#ifndef BUILD_PYTORCH +#if !defined(BUILD_PYTORCH) || !BUILD_PT_EXPT_SPIN GTEST_SKIP() << "Skip because PyTorch support is not enabled."; #endif dp0.init(model0_path); diff --git a/source/api_cc/tests/test_neighbor_list_data.cc b/source/api_cc/tests/test_neighbor_list_data.cc new file mode 100644 index 0000000000..3e5198d30d --- /dev/null +++ b/source/api_cc/tests/test_neighbor_list_data.cc @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// Backend-agnostic unit tests for deepmd::NeighborListData and the +// deepmd::convert_nlist helper. Exercises edge cases (empty rows, empty +// nlist) that surfaced as latent libstdc++-debug-mode UB assertions in +// production code paths. +#include + +#include + +#include "common.h" +#include "neighbor_list.h" + +namespace deepmd { + +// Build a NeighborListData with @p nloc local atoms and zero neighbors per +// atom. Realistic under aggressive spatial partitioning where a subdomain's +// every local atom has no neighbors within cutoff. +TEST(TestNeighborListData, MakeInlistEmptyRows) { + NeighborListData data; + const int nloc = 4; + data.ilist.resize(nloc); + for (int ii = 0; ii < nloc; ++ii) { + data.ilist[ii] = ii; + } + data.jlist.resize(nloc); // every row default-constructed to empty + + // Must not trigger UB ('&vec[0]' on empty vector) under libstdc++ debug. + InputNlist inlist; + ASSERT_NO_THROW(data.make_inlist(inlist)); + + EXPECT_EQ(inlist.inum, nloc); + ASSERT_NE(inlist.numneigh, nullptr); + ASSERT_NE(inlist.firstneigh, nullptr); + for (int ii = 0; ii < nloc; ++ii) { + EXPECT_EQ(inlist.numneigh[ii], 0); + } +} + +TEST(TestNeighborListData, MakeInlistMixedEmptyAndNonemptyRows) { + NeighborListData data; + data.ilist = {0, 1, 2}; + data.jlist.resize(3); + // row 0: empty (legitimate edge case) + data.jlist[1] = {7, 8}; + // row 2: empty + InputNlist inlist; + ASSERT_NO_THROW(data.make_inlist(inlist)); + EXPECT_EQ(inlist.numneigh[0], 0); + EXPECT_EQ(inlist.numneigh[1], 2); + EXPECT_EQ(inlist.numneigh[2], 0); + // Only the populated row's firstneigh should be dereferenced. + EXPECT_EQ(inlist.firstneigh[1][0], 7); + EXPECT_EQ(inlist.firstneigh[1][1], 8); +} + +// convert_nlist(jagged) must not dereference an empty row when populating +// firstneigh. Regression test for the same `&vec[0]` UB pattern fixed in +// commit 72f95f87. +TEST(TestNeighborListData, ConvertNlistEmptyRows) { + std::vector> input = {{}, {}, {}}; // all rows empty + std::vector ilist(input.size()), numneigh(input.size()); + std::vector firstneigh(input.size()); + InputNlist out(static_cast(input.size()), ilist.data(), numneigh.data(), + firstneigh.data()); + ASSERT_NO_THROW(convert_nlist(out, input)); + EXPECT_EQ(out.inum, 3); + for (int ii = 0; ii < 3; ++ii) { + EXPECT_EQ(out.numneigh[ii], 0); + // firstneigh[ii] may be vector::data()'s sentinel or nullptr — must not + // be dereferenced when numneigh[ii] == 0. + } +} + +// copy_from_nlist must not dereference an empty source row even when +// memcpy size is 0. Regression test for the same UB pattern. +TEST(TestNeighborListData, CopyFromNlistEmptyRows) { + // Build an InputNlist with all empty rows. + const int nloc = 4; + std::vector src_ilist(nloc), src_numneigh(nloc, 0); + std::vector src_firstneigh(nloc, nullptr); + for (int ii = 0; ii < nloc; ++ii) { + src_ilist[ii] = ii; + } + InputNlist src(nloc, src_ilist.data(), src_numneigh.data(), + src_firstneigh.data()); + src.mask = ~0; // identity mask, must not be applied to absent neighbors + + NeighborListData data; + ASSERT_NO_THROW(data.copy_from_nlist(src)); + EXPECT_EQ(static_cast(data.ilist.size()), nloc); + EXPECT_EQ(static_cast(data.jlist.size()), nloc); + for (int ii = 0; ii < nloc; ++ii) { + EXPECT_TRUE(data.jlist[ii].empty()); + } +} + +// copy_from_nlist with an empty source list (inum == 0) must not +// dereference '&ilist[0]' on the empty target ilist. +TEST(TestNeighborListData, CopyFromNlistInumZero) { + InputNlist src; + src.inum = 0; + src.ilist = nullptr; + src.numneigh = nullptr; + src.firstneigh = nullptr; + src.mask = ~0; + + NeighborListData data; + ASSERT_NO_THROW(data.copy_from_nlist(src)); + EXPECT_TRUE(data.ilist.empty()); + EXPECT_TRUE(data.jlist.empty()); +} + +// Round-trip: convert_nlist(jagged) → copy_from_nlist → make_inlist +// must preserve both empty and non-empty rows without UB. +TEST(TestNeighborListData, RoundTripWithEmptyRows) { + std::vector> input = {{}, {3, 4}, {}, {5}}; + std::vector ilist(input.size()), numneigh(input.size()); + std::vector firstneigh(input.size()); + InputNlist src(static_cast(input.size()), ilist.data(), numneigh.data(), + firstneigh.data()); + ASSERT_NO_THROW(convert_nlist(src, input)); + + NeighborListData data; + ASSERT_NO_THROW(data.copy_from_nlist(src)); + EXPECT_EQ(static_cast(data.jlist.size()), 4); + EXPECT_TRUE(data.jlist[0].empty()); + EXPECT_EQ(data.jlist[1], (std::vector{3, 4})); + EXPECT_TRUE(data.jlist[2].empty()); + EXPECT_EQ(data.jlist[3], (std::vector{5})); + + InputNlist out; + ASSERT_NO_THROW(data.make_inlist(out)); + EXPECT_EQ(out.numneigh[0], 0); + EXPECT_EQ(out.numneigh[1], 2); + EXPECT_EQ(out.numneigh[2], 0); + EXPECT_EQ(out.numneigh[3], 1); +} + +} // namespace deepmd diff --git a/source/api_cc/tests/test_utils.h b/source/api_cc/tests/test_utils.h index 51f05f9b54..9d5d5a4815 100644 --- a/source/api_cc/tests/test_utils.h +++ b/source/api_cc/tests/test_utils.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-3.0-or-later #pragma once #include +#include #include "SimulationRegion.h" #include "gtest/gtest.h" @@ -84,6 +85,34 @@ inline void _build_nlist(std::vector>& nlist_data, coord_cpy.assign(coord_cpy_.begin(), coord_cpy_.end()); } +/** + * @brief Pad each atom's neighbor list with -1 entries and shuffle. + * + * Mimics the Python test_oversized_nlist approach: append n_extra + * padding entries (-1) to each row, then apply a deterministic + * permutation so that real neighbors are no longer at the front. + * This exercises the model's internal distance-sort + truncation. + */ +inline void _pad_shuffle_nlist(std::vector>& nlist_out, + const std::vector>& nlist_in, + int n_extra, + unsigned int seed = 42) { + nlist_out.resize(nlist_in.size()); + for (size_t ii = 0; ii < nlist_in.size(); ++ii) { + // copy original + pad with -1 + nlist_out[ii] = nlist_in[ii]; + for (int jj = 0; jj < n_extra; ++jj) { + nlist_out[ii].push_back(-1); + } + // deterministic shuffle (std::minstd_rand for reproducibility) + std::minstd_rand rng(seed + static_cast(ii)); + for (size_t jj = nlist_out[ii].size() - 1; jj > 0; --jj) { + size_t kk = rng() % (jj + 1); + std::swap(nlist_out[ii][jj], nlist_out[ii][kk]); + } + } +} + template class EnergyModelTest { protected: diff --git a/source/lib/src/neighbor_list.cc b/source/lib/src/neighbor_list.cc index 6723e3de66..3a0d8eb122 100644 --- a/source/lib/src/neighbor_list.cc +++ b/source/lib/src/neighbor_list.cc @@ -852,7 +852,11 @@ void deepmd::convert_nlist(InputNlist& to_nlist, for (int ii = 0; ii < to_nlist.inum; ++ii) { to_nlist.ilist[ii] = ii; to_nlist.numneigh[ii] = from_nlist[ii].size(); - to_nlist.firstneigh[ii] = &from_nlist[ii][0]; + // `&vec[0]` is undefined behaviour for empty vectors (libstdc++ debug + // mode asserts on it). When numneigh[ii] is 0 the pointer is never + // dereferenced; use vector::data() which is well-defined for empty + // vectors (may return nullptr). + to_nlist.firstneigh[ii] = from_nlist[ii].data(); } } diff --git a/source/tests/infer/case.py b/source/tests/infer/case.py index 828974c6e6..84acc5bad5 100644 --- a/source/tests/infer/case.py +++ b/source/tests/infer/case.py @@ -173,7 +173,12 @@ def get_model(self, suffix: str, out_file: str | None = None) -> str: out_file = tempfile.NamedTemporaryFile( suffix=suffix, dir=tempdir.name, delete=False, prefix=self.key + "_" ).name - convert_backend(INPUT=self.filename, OUTPUT=out_file) + # For .pte/.pt2, export with atomic virial so tests can verify + # per-atom virial against reference values. + kwargs: dict = {} + if suffix in (".pte", ".pt2"): + kwargs["atomic_virial"] = True + convert_backend(INPUT=self.filename, OUTPUT=out_file, **kwargs) return out_file diff --git a/source/tests/infer/gen_dpa1.py b/source/tests/infer/gen_dpa1.py index 26e8fc4afd..d587d7e534 100644 --- a/source/tests/infer/gen_dpa1.py +++ b/source/tests/infer/gen_dpa1.py @@ -85,7 +85,7 @@ def main(): pt2_path = os.path.join(base_dir, "deeppot_dpa1.pt2") print(f"Exporting to {pt2_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data)) + pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data), do_atomic_virial=True) pth_path = os.path.join(base_dir, "deeppot_dpa1.pth") print(f"Exporting to {pth_path} ...") # noqa: T201 diff --git a/source/tests/infer/gen_dpa2.py b/source/tests/infer/gen_dpa2.py index 84b4d314a3..8ce277fcf5 100644 --- a/source/tests/infer/gen_dpa2.py +++ b/source/tests/infer/gen_dpa2.py @@ -108,7 +108,7 @@ def main(): pt2_path = os.path.join(base_dir, "deeppot_dpa2.pt2") print(f"Exporting to {pt2_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data)) + pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data), do_atomic_virial=True) pth_path = os.path.join(base_dir, "deeppot_dpa2.pth") print(f"Exporting to {pth_path} ...") # noqa: T201 diff --git a/source/tests/infer/gen_dpa3.py b/source/tests/infer/gen_dpa3.py index 322163462d..e5e2d66579 100644 --- a/source/tests/infer/gen_dpa3.py +++ b/source/tests/infer/gen_dpa3.py @@ -86,7 +86,7 @@ def main(): pt2_path = os.path.join(base_dir, "deeppot_dpa3.pt2") print(f"Exporting to {pt2_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data)) + pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data), do_atomic_virial=True) pth_path = os.path.join(base_dir, "deeppot_dpa3.pth") print(f"Exporting to {pth_path} ...") # noqa: T201 diff --git a/source/tests/infer/gen_fparam_aparam.py b/source/tests/infer/gen_fparam_aparam.py index f0d80d5764..13dbe4ffcf 100644 --- a/source/tests/infer/gen_fparam_aparam.py +++ b/source/tests/infer/gen_fparam_aparam.py @@ -67,7 +67,9 @@ def main(): } pt2_default_path = os.path.join(base_dir, "fparam_aparam_default.pt2") print(f"Exporting to {pt2_default_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_default_path, copy.deepcopy(data_default)) + pt_expt_deserialize_to_file( + pt2_default_path, copy.deepcopy(data_default), do_atomic_virial=True + ) # ---- 3. Export fparam_aparam.pt2 and .pth (without default_fparam) ---- config_no_default = copy.deepcopy(config) @@ -84,7 +86,9 @@ def main(): pt2_path = os.path.join(base_dir, "fparam_aparam.pt2") print(f"Exporting to {pt2_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data_no_default)) + pt_expt_deserialize_to_file( + pt2_path, copy.deepcopy(data_no_default), do_atomic_virial=True + ) pth_path = os.path.join(base_dir, "fparam_aparam.pth") pth_exported = False diff --git a/source/tests/infer/gen_model_devi.py b/source/tests/infer/gen_model_devi.py index bdceaa05aa..7e43add02e 100644 --- a/source/tests/infer/gen_model_devi.py +++ b/source/tests/infer/gen_model_devi.py @@ -99,7 +99,9 @@ def main(): } pt2_path = os.path.join(base_dir, f"model_devi_md{idx}.pt2") print(f"Exporting to {pt2_path} ...") # noqa: T201 - pt_expt_deserialize_to_file(pt2_path, copy.deepcopy(data)) + pt_expt_deserialize_to_file( + pt2_path, copy.deepcopy(data), do_atomic_virial=True + ) models.append(pt2_path) print("Export done.") # noqa: T201 diff --git a/source/tests/infer/gen_sea.py b/source/tests/infer/gen_sea.py index 84b78bbb78..02f4e7ee63 100644 --- a/source/tests/infer/gen_sea.py +++ b/source/tests/infer/gen_sea.py @@ -57,10 +57,52 @@ def main(): } print(f"Exporting to {pt2_path} ...") # noqa: T201 - deserialize_to_file(pt2_path, data) + deserialize_to_file(pt2_path, data, do_atomic_virial=True) + + # Produce a variant for regression-testing the C++ "atomic && + # !do_atomic_virial" throw path by copying the .pt2 archive and + # flipping the do_atomic_virial flag in its metadata.json — much + # cheaper than running a second AOTInductor compile. The compiled + # graph itself supports atomic virial; only the C++ guard differs. + import shutil + + pt2_no_aviral = os.path.join(base_dir, "deeppot_sea_no_atomic_virial.pt2") + print(f"Patching to {pt2_no_aviral} ...") # noqa: T201 + shutil.copyfile(pt2_path, pt2_no_aviral) + _patch_no_atomic_virial(pt2_no_aviral) print("Done!") # noqa: T201 +def _patch_no_atomic_virial(pt2_path: str) -> None: + """Flip do_atomic_virial=False in the metadata.json of a .pt2 archive. + + The .pt2 is a ZIP archive; the metadata blob lives at + ``extra/metadata.json``. We rewrite the archive with that one entry + replaced and all other entries preserved verbatim. + """ + import json + import zipfile + + metadata_name = "extra/metadata.json" + tmp_path = pt2_path + ".tmp" + # PyTorch .pt2 archives use ZIP_STORED (uncompressed) so that the C++ + # reader (read_zip_entry in commonPTExpt.h) and torch's mmap-based + # tensor loader can read entries without decompression. Preserve + # that on rewrite — using ZIP_DEFLATED would yield bytes the C++ + # reader treats as raw, resulting in JSON parse errors. + with zipfile.ZipFile(pt2_path, "r") as src: + names = src.namelist() + meta = json.loads(src.read(metadata_name).decode("utf-8")) + meta["do_atomic_virial"] = False + with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_STORED) as dst: + for name in names: + if name == metadata_name: + dst.writestr(name, json.dumps(meta)) + else: + dst.writestr(name, src.read(name)) + os.replace(tmp_path, pt2_path) + + if __name__ == "__main__": main() diff --git a/source/tests/infer/gen_spin.py b/source/tests/infer/gen_spin.py index 3053e0ad4f..d37e3207ff 100644 --- a/source/tests/infer/gen_spin.py +++ b/source/tests/infer/gen_spin.py @@ -112,7 +112,7 @@ def main(): convert_backend(INPUT=yaml_path, OUTPUT=pth_path) print(f"Converting to {pt2_path} ...") # noqa: T201 - convert_backend(INPUT=yaml_path, OUTPUT=pt2_path) + convert_backend(INPUT=yaml_path, OUTPUT=pt2_path, atomic_virial=True) print("Export done.") # noqa: T201 diff --git a/source/tests/infer/gen_spin_model_devi.py b/source/tests/infer/gen_spin_model_devi.py index 3dc5240e14..72792205e2 100644 --- a/source/tests/infer/gen_spin_model_devi.py +++ b/source/tests/infer/gen_spin_model_devi.py @@ -116,7 +116,7 @@ def main(): pt2_path = os.path.join(base_dir, f"deeppot_dpa_spin_md{idx}.pt2") pt2_paths.append(pt2_path) print(f"Converting to {pt2_path} ...") # noqa: T201 - convert_backend(INPUT=yaml_path, OUTPUT=pt2_path) + convert_backend(INPUT=yaml_path, OUTPUT=pt2_path, atomic_virial=True) print("Export done.") # noqa: T201 diff --git a/source/tests/pt_expt/export_helpers.py b/source/tests/pt_expt/export_helpers.py index 453b9d0c01..ae4db82ddb 100644 --- a/source/tests/pt_expt/export_helpers.py +++ b/source/tests/pt_expt/export_helpers.py @@ -213,7 +213,7 @@ def model_forward_lower_export_round_trip( tracing_mode="symbolic", _allow_non_fake_inputs=True, ) - dynamic_shapes = _build_dynamic_shapes(*inputs_2f) + dynamic_shapes = _build_dynamic_shapes(*inputs_2f, model_nnei=sum(md_pt.get_sel())) exported_dyn = torch.export.export( traced_sym, inputs_2f, diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py index 6797fa2c03..f96f08ae28 100644 --- a/source/tests/pt_expt/infer/test_deep_eval.py +++ b/source/tests/pt_expt/infer/test_deep_eval.py @@ -63,11 +63,11 @@ def setUpClass(cls) -> None: cls.model = cls.model.to(torch.float64) cls.model.eval() - # Serialize and save to .pte + # Serialize and save to .pte (with atomic virial for test_dynamic_shapes) cls.model_data = {"model": cls.model.serialize()} cls.tmpfile = tempfile.NamedTemporaryFile(suffix=".pte", delete=False) cls.tmpfile.close() - deserialize_to_file(cls.tmpfile.name, cls.model_data) + deserialize_to_file(cls.tmpfile.name, cls.model_data, do_atomic_virial=True) # Create DeepPot for testing cls.dp = DeepPot(cls.tmpfile.name) @@ -273,6 +273,106 @@ def test_dynamic_shapes(self) -> None: err_msg=f"nloc={nloc}, key={key}", ) + def test_oversized_nlist(self) -> None: + """Test that the exported model handles nlist with more neighbors than nnei. + + In LAMMPS, the neighbor list is built with rcut + skin, so atoms + typically have more neighbors than sum(sel). The compiled + format_nlist must sort by distance and truncate correctly. + + The test verifies two things: + + 1. **Correctness**: the exported model with an oversized, shuffled + nlist produces the same results as the eager model (both sort by + distance and keep the closest sum(sel) neighbors). + + 2. **Naive truncation produces wrong results**: simply taking the + first sum(sel) columns of the shuffled nlist (simulating a C++ + implementation that truncates without sorting) gives a different + energy. This proves the distance sort is necessary. + """ + exported = torch.export.load(self.tmpfile.name) + exported_mod = exported.module() + + nnei = sum(self.sel) # model's expected neighbor count + nloc = 5 + ext_coord, ext_atype, nlist_t, mapping_t, fparam, aparam = _make_sample_inputs( + self.model, nloc=nloc + ) + + # Pad nlist with -1 columns, then shuffle column order so real + # neighbors are interspersed with absent ones beyond column sum(sel). + n_extra = nnei # double the nlist width + nlist_padded = torch.cat( + [ + nlist_t, + -torch.ones( + (*nlist_t.shape[:2], n_extra), + dtype=nlist_t.dtype, + device=nlist_t.device, + ), + ], + dim=-1, + ) + # Shuffle columns: move some real neighbors past sum(sel) boundary. + rng = np.random.default_rng(42) + perm = rng.permutation(nlist_padded.shape[-1]) + nlist_shuffled = nlist_padded[:, :, perm] + assert nlist_shuffled.shape[-1] > nnei + + # --- Part 1: exported model sorts correctly --- + # Reference: eager model with shuffled oversized nlist + ec = ext_coord.detach().requires_grad_(True) + ref_ret = self.model.forward_common_lower( + ec, + ext_atype, + nlist_shuffled, + mapping_t, + fparam=fparam, + aparam=aparam, + do_atomic_virial=True, + ) + + # Exported model with same shuffled oversized nlist + pte_ret = exported_mod( + ext_coord, ext_atype, nlist_shuffled, mapping_t, fparam, aparam + ) + + for key in ("energy", "energy_redu", "energy_derv_r", "energy_derv_c"): + if ref_ret[key] is not None and key in pte_ret: + np.testing.assert_allclose( + ref_ret[key].detach().cpu().numpy(), + pte_ret[key].detach().cpu().numpy(), + rtol=1e-10, + atol=1e-10, + err_msg=f"oversized nlist, key={key}", + ) + + # --- Part 2: naive truncation gives wrong results --- + # Simulate the old C++ bug: truncate shuffled nlist to sum(sel) columns + # without distance sorting. Some close neighbors that were shuffled + # beyond column sum(sel) are lost, producing wrong energy. + nlist_truncated = nlist_shuffled[:, :, :nnei] + ec2 = ext_coord.detach().requires_grad_(True) + trunc_ret = self.model.forward_common_lower( + ec2, + ext_atype, + nlist_truncated, + mapping_t, + fparam=fparam, + aparam=aparam, + do_atomic_virial=True, + ) + # The truncated result MUST differ from the correctly sorted result, + # proving that naive truncation discards real neighbors. + e_ref = ref_ret["energy_redu"].detach().cpu().numpy() + e_trunc = trunc_ret["energy_redu"].detach().cpu().numpy() + assert not np.allclose(e_ref, e_trunc, rtol=1e-10, atol=1e-10), ( + "Naive truncation of shuffled nlist should give different energy, " + "but got the same result. The test data may not have enough " + "neighbors shuffled beyond sum(sel) to trigger the bug." + ) + def test_serialize_round_trip(self) -> None: """Test .pte → serialize_from_file → deserialize → model gives same outputs.""" loaded_data = serialize_from_file(self.tmpfile.name) @@ -547,14 +647,14 @@ def setUpClass(cls) -> None: # compilation (tests/pt/__init__.py sets it to "cuda:9999999"). torch.set_default_device(None) try: - deserialize_to_file(cls.tmpfile.name, cls.model_data) + deserialize_to_file(cls.tmpfile.name, cls.model_data, do_atomic_virial=True) finally: torch.set_default_device("cuda:9999999") # Also save to .pte for cross-format comparison cls.pte_tmpfile = tempfile.NamedTemporaryFile(suffix=".pte", delete=False) cls.pte_tmpfile.close() - deserialize_to_file(cls.pte_tmpfile.name, cls.model_data) + deserialize_to_file(cls.pte_tmpfile.name, cls.model_data, do_atomic_virial=True) # Create DeepPot for .pt2 cls.dp = DeepPot(cls.tmpfile.name) @@ -768,6 +868,99 @@ def test_no_pbc(self) -> None: v, ref["virial"].detach().cpu().numpy(), rtol=1e-10, atol=1e-10 ) + def test_oversized_nlist(self) -> None: + """Test that the exported model handles nlist with more neighbors than nnei. + + In LAMMPS, the neighbor list is built with rcut + skin, so atoms + typically have more neighbors than sum(sel). The compiled + format_nlist must sort by distance and truncate correctly. + + The test verifies two things: + + 1. **Correctness**: the exported model with an oversized, shuffled + nlist produces the same results as the eager model (both sort by + distance and keep the closest sum(sel) neighbors). + + 2. **Naive truncation produces wrong results**: simply taking the + first sum(sel) columns of the shuffled nlist (simulating a C++ + implementation that truncates without sorting) gives a different + energy. This proves the distance sort is necessary. + """ + exported = torch.export.load(self.pte_tmpfile.name) + exported_mod = exported.module() + + nnei = sum(self.sel) # model's expected neighbor count + nloc = 5 + ext_coord, ext_atype, nlist_t, mapping_t, fparam, aparam = _make_sample_inputs( + self.model, nloc=nloc + ) + + # Pad nlist with -1 columns, then shuffle column order so real + # neighbors are interspersed with absent ones beyond column sum(sel). + n_extra = nnei # double the nlist width + nlist_padded = torch.cat( + [ + nlist_t, + -torch.ones( + (*nlist_t.shape[:2], n_extra), + dtype=nlist_t.dtype, + device=nlist_t.device, + ), + ], + dim=-1, + ) + # Shuffle columns: move some real neighbors past sum(sel) boundary. + rng = np.random.default_rng(42) + perm = rng.permutation(nlist_padded.shape[-1]) + nlist_shuffled = nlist_padded[:, :, perm] + assert nlist_shuffled.shape[-1] > nnei + + # --- Part 1: exported model sorts correctly --- + ec = ext_coord.detach().requires_grad_(True) + ref_ret = self.model.forward_common_lower( + ec, + ext_atype, + nlist_shuffled, + mapping_t, + fparam=fparam, + aparam=aparam, + do_atomic_virial=True, + ) + + pte_ret = exported_mod( + ext_coord, ext_atype, nlist_shuffled, mapping_t, fparam, aparam + ) + + for key in ("energy", "energy_redu", "energy_derv_r", "energy_derv_c"): + if ref_ret[key] is not None and key in pte_ret: + np.testing.assert_allclose( + ref_ret[key].detach().cpu().numpy(), + pte_ret[key].detach().cpu().numpy(), + rtol=1e-10, + atol=1e-10, + err_msg=f"oversized nlist, key={key}", + ) + + # --- Part 2: naive truncation gives wrong results --- + nlist_truncated = nlist_shuffled[:, :, :nnei] + ec2 = ext_coord.detach().requires_grad_(True) + trunc_ret = self.model.forward_common_lower( + ec2, + ext_atype, + nlist_truncated, + mapping_t, + fparam=fparam, + aparam=aparam, + do_atomic_virial=True, + ) + e_ref = ref_ret["energy_redu"].detach().cpu().numpy() + e_trunc = trunc_ret["energy_redu"].detach().cpu().numpy() + assert not np.allclose(e_ref, e_trunc, rtol=1e-10, atol=1e-10), ( + "Naive truncation of shuffled nlist should give different energy, " + "but got the same result. The test data may not have enough " + "neighbors shuffled beyond sum(sel) to trigger the bug." + ) + def test_serialize_round_trip(self) -> None: """Test .pt2 → serialize_from_file → deserialize → model gives same outputs.""" loaded_data = serialize_from_file(self.tmpfile.name) @@ -1070,7 +1263,7 @@ def setUpClass(cls) -> None: cls.model_data = {"model": cls.model.serialize()} cls.tmpfile = tempfile.NamedTemporaryFile(suffix=".pte", delete=False) cls.tmpfile.close() - deserialize_to_file(cls.tmpfile.name, cls.model_data) + deserialize_to_file(cls.tmpfile.name, cls.model_data, do_atomic_virial=True) cls.dp = DeepPot(cls.tmpfile.name) @@ -1187,14 +1380,14 @@ def setUpClass(cls) -> None: cls.tmpfile.close() torch.set_default_device(None) try: - deserialize_to_file(cls.tmpfile.name, cls.model_data) + deserialize_to_file(cls.tmpfile.name, cls.model_data, do_atomic_virial=True) finally: torch.set_default_device("cuda:9999999") # Also save .pte for cross-format comparison cls.pte_tmpfile = tempfile.NamedTemporaryFile(suffix=".pte", delete=False) cls.pte_tmpfile.close() - deserialize_to_file(cls.pte_tmpfile.name, cls.model_data) + deserialize_to_file(cls.pte_tmpfile.name, cls.model_data, do_atomic_virial=True) cls.dp = DeepPot(cls.tmpfile.name) cls.dp_pte = DeepPot(cls.pte_tmpfile.name) diff --git a/source/tests/pt_expt/model/test_ener_model.py b/source/tests/pt_expt/model/test_ener_model.py index cd520b5cc0..b91653a260 100644 --- a/source/tests/pt_expt/model/test_ener_model.py +++ b/source/tests/pt_expt/model/test_ener_model.py @@ -366,7 +366,7 @@ def test_forward_lower_exportable(self) -> None: _allow_non_fake_inputs=True, ) - dynamic_shapes = _build_dynamic_shapes(*inputs_5f) + dynamic_shapes = _build_dynamic_shapes(*inputs_5f, model_nnei=sum(md.get_sel())) exported_dyn = torch.export.export( traced_sym, inputs_5f, diff --git a/source/tests/pt_expt/model/test_export_pipeline.py b/source/tests/pt_expt/model/test_export_pipeline.py index 8bff3d7130..23e0a62a98 100644 --- a/source/tests/pt_expt/model/test_export_pipeline.py +++ b/source/tests/pt_expt/model/test_export_pipeline.py @@ -149,7 +149,13 @@ def test_export_pipeline(self, descriptor_type, with_fparam) -> None: # 6. Export with dynamic shapes (same as dp freeze) dynamic_shapes = _build_dynamic_shapes( - ext_coord, ext_atype, nlist_t, mapping_t, fparam, aparam + ext_coord, + ext_atype, + nlist_t, + mapping_t, + fparam, + aparam, + model_nnei=sum(model2.get_sel()), ) exported = torch.export.export( traced, diff --git a/source/tests/pt_expt/model/test_spin_ener_model.py b/source/tests/pt_expt/model/test_spin_ener_model.py index 5dd6dbecf2..f7f96392d3 100644 --- a/source/tests/pt_expt/model/test_spin_ener_model.py +++ b/source/tests/pt_expt/model/test_spin_ener_model.py @@ -359,6 +359,56 @@ def setUp(self) -> None: class TestSpinEnerModelExportable(unittest.TestCase): + @staticmethod + def _build_extended_inputs(model, natoms=6): + """Build extended inputs for spin model export tests.""" + from deepmd.dpmodel.utils import ( + build_neighbor_list, + extend_coord_with_ghosts, + normalize_coord, + ) + + generator = torch.Generator(device="cpu").manual_seed(GLOBAL_SEED) + cell = torch.rand([3, 3], dtype=dtype, device="cpu", generator=generator) + cell = (cell + cell.T) + 5.0 * torch.eye(3, device="cpu") + coord = torch.rand([natoms, 3], dtype=dtype, device="cpu", generator=generator) + coord = torch.matmul(coord, cell) + atype = torch.tensor([0, 0, 1, 0, 1, 1], dtype=torch.int64) + spin = ( + torch.rand([natoms, 3], dtype=dtype, device="cpu", generator=generator) + * 0.5 + ) + + rcut = model.get_rcut() + sel = SPIN_DATA["descriptor"]["sel"] + coord_np = coord.unsqueeze(0).numpy() + atype_np = atype.unsqueeze(0).numpy() + box_np = cell.reshape(1, 9).numpy() + coord_normalized = normalize_coord( + coord_np.reshape(1, natoms, 3), + box_np.reshape(1, 3, 3), + ) + ext_coord, ext_atype, mapping = extend_coord_with_ghosts( + coord_normalized, atype_np, box_np, rcut + ) + nlist = build_neighbor_list( + ext_coord, ext_atype, natoms, rcut, sel, distinguish_types=True + ) + ext_coord = ext_coord.reshape(1, -1, 3) + spin_np = spin.unsqueeze(0).numpy() + ext_spin = np.take_along_axis( + spin_np, + np.repeat(mapping[:, :, np.newaxis], 3, axis=2), + axis=1, + ) + + ext_coord_t = torch.tensor(ext_coord, dtype=dtype, device=env.DEVICE) + ext_atype_t = torch.tensor(ext_atype, dtype=torch.int64, device=env.DEVICE) + nlist_t = torch.tensor(nlist, dtype=torch.int64, device=env.DEVICE) + mapping_t = torch.tensor(mapping, dtype=torch.int64, device=env.DEVICE) + ext_spin_t = torch.tensor(ext_spin, dtype=dtype, device=env.DEVICE) + return ext_coord_t, ext_atype_t, ext_spin_t, nlist_t, mapping_t, sel + def test_forward_lower_exportable(self) -> None: """Test that SpinEnergyModel.forward_lower_exportable works with make_fx and torch.export.""" from deepmd.dpmodel.utils import ( @@ -538,6 +588,80 @@ def test_forward_lower_exportable(self) -> None: err_msg=f"loaded vs eager (nf=1): {key}", ) + def test_oversized_nlist(self) -> None: + """Test that the exported spin model handles nlist with more neighbors than nnei. + + Verifies two things: + 1. The exported model with an oversized, shuffled nlist produces the + same results as the eager model. + 2. Naive truncation of the shuffled nlist gives different energy, + proving distance sort is necessary. + """ + model = _make_model() + ext_coord_t, ext_atype_t, ext_spin_t, nlist_t, mapping_t, sel = ( + self._build_extended_inputs(model) + ) + nnei = sum(sel) + + # Pad and shuffle nlist + n_extra = nnei + nlist_padded = torch.cat( + [ + nlist_t, + -torch.ones( + (*nlist_t.shape[:2], n_extra), + dtype=nlist_t.dtype, + device=nlist_t.device, + ), + ], + dim=-1, + ) + rng = np.random.default_rng(42) + perm = rng.permutation(nlist_padded.shape[-1]) + nlist_shuffled = nlist_padded[:, :, perm] + assert nlist_shuffled.shape[-1] > nnei + + output_keys = ("energy", "extended_force", "extended_force_mag", "virial") + + # --- Part 1: exported model sorts correctly --- + traced = model.forward_lower_exportable( + ext_coord_t, + ext_atype_t, + ext_spin_t, + nlist_shuffled, + mapping_t, + ) + ret_traced = traced( + ext_coord_t, ext_atype_t, ext_spin_t, nlist_shuffled, mapping_t, None, None + ) + + ec = ext_coord_t.detach().requires_grad_(True) + ret_eager = model.forward_lower( + ec, ext_atype_t, ext_spin_t, nlist_shuffled, mapping_t + ) + + for key in output_keys: + if ret_eager.get(key) is not None and key in ret_traced: + np.testing.assert_allclose( + ret_eager[key].detach().cpu().numpy(), + ret_traced[key].detach().cpu().numpy(), + rtol=1e-10, + atol=1e-10, + err_msg=f"oversized nlist, key={key}", + ) + + # --- Part 2: naive truncation gives wrong results --- + nlist_truncated = nlist_shuffled[:, :, :nnei] + ec2 = ext_coord_t.detach().requires_grad_(True) + ret_trunc = model.forward_lower( + ec2, ext_atype_t, ext_spin_t, nlist_truncated, mapping_t + ) + e_ref = ret_eager["energy"].detach().cpu().numpy() + e_trunc = ret_trunc["energy"].detach().cpu().numpy() + assert not np.allclose(e_ref, e_trunc, rtol=1e-10, atol=1e-10), ( + "Naive truncation of shuffled nlist should give different energy." + ) + if __name__ == "__main__": unittest.main()