feat(pt): add hook to last fitting layer output (#4789)

iProzd · web-flow · commit 5450066dcb93 · 2025-07-11T06:18:16.000Z
&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;
## Summary by CodeRabbit

- **New Features**
- Added the ability to evaluate and retrieve the output of the last
hidden layer in fitting neural networks, providing access to
intermediate model outputs.
- Extended evaluation interfaces to support fetching intermediate
fitting outputs for both standard and mixed-type models.
- **Improvements**
- Enhanced output dictionaries to optionally include intermediate
network outputs when enabled, allowing for more detailed inspection
during evaluation.
- **Tests**
- Introduced tests to verify correctness and consistency of fitting last
layer evaluations across supported model types.
&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;
diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py
@@ -642,6 +642,25 @@ def call(self, x):
                 x = layer(x)
             return x
 
+        def call_until_last(self, x):
+            """Return the output before last layer.
+
+            Parameters
+            ----------
+            x : np.ndarray
+                The input.
+
+            Returns
+            -------
+            np.ndarray
+                The output before last layer.
+            """
+            # avoid slice (self.layers[:-1]) for jit
+            for ii, layer in enumerate(self.layers):
+                if ii < len(self.layers) - 1:
+                    x = layer(x)
+            return x
+
         def clear(self) -> None:
             """Clear the network parameters to zero."""
             for layer in self.layers:
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
@@ -215,6 +215,48 @@ def eval_descriptor(
         """
         raise NotImplementedError
 
+    def eval_fitting_last_layer(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate fitting before last layer by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+
+        Returns
+        -------
+        fitting
+            Fitting output before last layer.
+        """
+        raise NotImplementedError
+
     def eval_typeebd(self) -> np.ndarray:
         """Evaluate output of type embedding network by using this model.
 
@@ -467,6 +509,73 @@ def eval_descriptor(
         )
         return descriptor
 
+    def eval_fitting_last_layer(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate fitting before last layer by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+        efield
+            The external field on atoms.
+            The array should be of size nframes x natoms x 3
+        mixed_type
+            Whether to perform the mixed_type mode.
+            If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
+            in which frames in a system may have different natoms_vec(s), with the same nloc.
+
+        Returns
+        -------
+        fitting
+            Fitting output before last layer.
+        """
+        (
+            coords,
+            cells,
+            atom_types,
+            fparam,
+            aparam,
+            nframes,
+            natoms,
+        ) = self._standard_input(coords, cells, atom_types, fparam, aparam, mixed_type)
+        fitting = self.deep_eval.eval_fitting_last_layer(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        return fitting
+
     def eval_typeebd(self) -> np.ndarray:
         """Evaluate output of type embedding network by using this model.
 
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
@@ -722,3 +722,58 @@ def eval_descriptor(
         descriptor = model.eval_descriptor()
         model.set_eval_descriptor_hook(False)
         return to_numpy_array(descriptor)
+
+    def eval_fitting_last_layer(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate fitting before last layer by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+
+        Returns
+        -------
+        fitting
+            Fitting output before last layer.
+        """
+        model = self.dp.model["Default"]
+        model.set_eval_fitting_last_layer_hook(True)
+        self.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic=False,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        fitting_net = model.eval_fitting_last_layer()
+        model.set_eval_fitting_last_layer_hook(False)
+        return to_numpy_array(fitting_net)
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -62,9 +62,12 @@ def __init__(
         self.fitting_net = fitting
         super().init_out_stat()
         self.enable_eval_descriptor_hook = False
+        self.enable_eval_fitting_last_layer_hook = False
         self.eval_descriptor_list = []
+        self.eval_fitting_last_layer_list = []
 
     eval_descriptor_list: list[torch.Tensor]
+    eval_fitting_last_layer_list: list[torch.Tensor]
 
     def set_eval_descriptor_hook(self, enable: bool) -> None:
         """Set the hook for evaluating descriptor and clear the cache for descriptor list."""
@@ -76,6 +79,17 @@ def eval_descriptor(self) -> torch.Tensor:
         """Evaluate the descriptor."""
         return torch.concat(self.eval_descriptor_list)
 
+    def set_eval_fitting_last_layer_hook(self, enable: bool) -> None:
+        """Set the hook for evaluating fitting last layer output and clear the cache for fitting last layer output list."""
+        self.enable_eval_fitting_last_layer_hook = enable
+        self.fitting_net.set_return_middle_output(enable)
+        # = [] does not work; See #4533
+        self.eval_fitting_last_layer_list.clear()
+
+    def eval_fitting_last_layer(self) -> torch.Tensor:
+        """Evaluate the fitting last layer output."""
+        return torch.concat(self.eval_fitting_last_layer_list)
+
     @torch.jit.export
     def fitting_output_def(self) -> FittingOutputDef:
         """Get the output def of the fitting net."""
@@ -255,6 +269,13 @@ def forward_atomic(
             fparam=fparam,
             aparam=aparam,
         )
+        if self.enable_eval_fitting_last_layer_hook:
+            assert "middle_output" in fit_ret, (
+                "eval_fitting_last_layer not supported for this fitting net!"
+            )
+            self.eval_fitting_last_layer_list.append(
+                fit_ret.pop("middle_output").detach()
+            )
         return fit_ret
 
     def get_out_bias(self) -> torch.Tensor:
diff --git a/deepmd/pt/model/model/dp_model.py b/deepmd/pt/model/model/dp_model.py
@@ -64,3 +64,13 @@ def set_eval_descriptor_hook(self, enable: bool) -> None:
     def eval_descriptor(self) -> torch.Tensor:
         """Evaluate the descriptor."""
         return self.atomic_model.eval_descriptor()
+
+    @torch.jit.export
+    def set_eval_fitting_last_layer_hook(self, enable: bool) -> None:
+        """Set the hook for evaluating fitting_last_layer and clear the cache for fitting_last_layer list."""
+        self.atomic_model.set_eval_fitting_last_layer_hook(enable)
+
+    @torch.jit.export
+    def eval_fitting_last_layer(self) -> torch.Tensor:
+        """Evaluate the fitting_last_layer."""
+        return self.atomic_model.eval_fitting_last_layer()
diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py
@@ -329,6 +329,8 @@ def __init__(
         for param in self.parameters():
             param.requires_grad = self.trainable
 
+        self.eval_return_middle_output = False
+
     def reinit_exclude(
         self,
         exclude_types: list[int] = [],
@@ -450,6 +452,9 @@ def set_case_embd(self, case_idx: int):
             case_idx
         ]
 
+    def set_return_middle_output(self, return_middle_output: bool = True) -> None:
+        self.eval_return_middle_output = return_middle_output
+
     def __setitem__(self, key, value) -> None:
         if key in ["bias_atom_e"]:
             value = value.view([self.ntypes, self._net_out_dim()])
@@ -598,14 +603,37 @@ def _forward_common(
             dtype=self.prec,
             device=descriptor.device,
         )  # jit assertion
+        results = {}
+
         if self.mixed_types:
             atom_property = self.filter_layers.networks[0](xx)
+            if self.eval_return_middle_output:
+                results["middle_output"] = self.filter_layers.networks[
+                    0
+                ].call_until_last(xx)
             if xx_zeros is not None:
                 atom_property -= self.filter_layers.networks[0](xx_zeros)
             outs = (
                 outs + atom_property + self.bias_atom_e[atype].to(self.prec)
             )  # Shape is [nframes, natoms[0], net_dim_out]
         else:
+            if self.eval_return_middle_output:
+                outs_middle = torch.zeros(
+                    (nf, nloc, self.neuron[-1]),
+                    dtype=self.prec,
+                    device=descriptor.device,
+                )  # jit assertion
+                for type_i, ll in enumerate(self.filter_layers.networks):
+                    mask = (atype == type_i).unsqueeze(-1)
+                    mask = torch.tile(mask, (1, 1, net_dim_out))
+                    middle_output_type = ll.call_until_last(xx)
+                    middle_output_type = torch.where(
+                        torch.tile(mask, (1, 1, self.neuron[-1])),
+                        middle_output_type,
+                        0.0,
+                    )
+                    outs_middle = outs_middle + middle_output_type
+                results["middle_output"] = outs_middle
             for type_i, ll in enumerate(self.filter_layers.networks):
                 mask = (atype == type_i).unsqueeze(-1)
                 mask = torch.tile(mask, (1, 1, net_dim_out))
@@ -627,4 +655,5 @@ def _forward_common(
         mask = self.emask(atype).to(torch.bool)
         # nf x nloc x nod
         outs = torch.where(mask[:, :, None], outs, 0.0)
-        return {self.var_name: outs}
+        results.update({self.var_name: outs})
+        return results
diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py
@@ -181,10 +181,17 @@ def forward(
         -------
         - `torch.Tensor`: Total energy with shape [nframes, natoms[0]].
         """
-        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)[
-            self.var_name
-        ]
-        return {self.var_name: out.to(env.GLOBAL_PT_FLOAT_PRECISION)}
+        out = self._forward_common(descriptor, atype, gr, g2, h2, fparam, aparam)
+        result = {self.var_name: out[self.var_name].to(env.GLOBAL_PT_FLOAT_PRECISION)}
+        if "middle_output" in out:
+            result.update(
+                {
+                    "middle_output": out["middle_output"].to(
+                        env.GLOBAL_PT_FLOAT_PRECISION
+                    )
+                }
+            )
+        return result
 
     # make jit happy with torch 2.0.0
     exclude_types: list[int]
diff --git a/source/tests/infer/case.py b/source/tests/infer/case.py
@@ -125,6 +125,13 @@ def __init__(self, data: dict) -> None:
         else:
             self.descriptor = None
 
+        if "fit_ll" in data:
+            self.fit_ll = np.array(data["fit_ll"], dtype=np.float64).reshape(
+                self.nloc, -1
+            )
+        else:
+            self.fit_ll = None
+
 
 class Case:
     """Test case.
diff --git a/source/tests/infer/deeppot-testcase.yaml b/source/tests/infer/deeppot-testcase.yaml
diff --git a/source/tests/infer/test_models.py b/source/tests/infer/test_models.py