fix(finetune): calculate fitting stat when using random fitting in finetuning process (#4928)

Chengqian-Zhang · pre-commit-ci[bot] · web-flow · commit e98dc5ac94f5 · 2025-11-20T16:10:59.000Z
In finetuing process, the computation of fitting stat is skipped in
previous code. There are two situations:
1. Finetuning from pretrained model's branch: it means pretrained model
also has `fparam` or `aparam` which has the same meaning of finetuning
task. The key `fparam_avg`/`fparam_inv_std`/
`aparam_avg`/`aparam_inv_std` load from the pretrained model. It is
correct.
2. Finetuning using RANDOM fitting. The fitting stat should be
calculated in this situation. But the computation of fitting stat is
skipped now. There is some error.

&lt;!-- This is an auto-generated comment: release notes by coderabbit.ai
--&gt;
## Summary by CodeRabbit

* **New Features**
- Automatic computation of input statistics is now performed during bias
adjustment in "set-by-statistic" mode; public API extended to support
computing fitting input statistics.

* **Tests**
- Training tests now compare additional parameter categories to the
random-finetuned baseline.
- Added test-only batching configuration for data-statistics and new
unit tests validating fitting input-statistics calculation.
&lt;!-- end of auto-generated comment: release notes by coderabbit.ai --&gt;

---------

Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
@@ -4,6 +4,7 @@
 )
 from typing import (
     Any,
+    Callable,
     Optional,
     Union,
 )
@@ -221,6 +222,71 @@ def __init__(
             ],
         )
 
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        protection: float = 1e-2,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `numpy.ndarray`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        protection : float
+            Divided-by-zero protection
+        """
+        if self.numb_fparam == 0 and self.numb_aparam == 0:
+            # skip data statistics
+            return
+        if callable(merged):
+            sampled = merged()
+        else:
+            sampled = merged
+        # stat fparam
+        if self.numb_fparam > 0:
+            cat_data = np.concatenate([frame["fparam"] for frame in sampled], axis=0)
+            cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
+            fparam_avg = np.mean(cat_data, axis=0)
+            fparam_std = np.std(cat_data, axis=0, ddof=0)  # ddof=0 for population std
+            fparam_std = np.where(
+                fparam_std < protection,
+                np.array(protection, dtype=fparam_std.dtype),
+                fparam_std,
+            )
+            fparam_inv_std = 1.0 / fparam_std
+            self.fparam_avg = fparam_avg.astype(self.fparam_avg.dtype)
+            self.fparam_inv_std = fparam_inv_std.astype(self.fparam_inv_std.dtype)
+        # stat aparam
+        if self.numb_aparam > 0:
+            sys_sumv = []
+            sys_sumv2 = []
+            sys_sumn = []
+            for ss_ in [frame["aparam"] for frame in sampled]:
+                ss = np.reshape(ss_, [-1, self.numb_aparam])
+                sys_sumv.append(np.sum(ss, axis=0))
+                sys_sumv2.append(np.sum(ss * ss, axis=0))
+                sys_sumn.append(ss.shape[0])
+            sumv = np.sum(np.stack(sys_sumv), axis=0)
+            sumv2 = np.sum(np.stack(sys_sumv2), axis=0)
+            sumn = sum(sys_sumn)
+            aparam_avg = sumv / sumn
+            aparam_std = np.sqrt(sumv2 / sumn - (sumv / sumn) ** 2)
+            aparam_std = np.where(
+                aparam_std < protection,
+                np.array(protection, dtype=aparam_std.dtype),
+                aparam_std,
+            )
+            aparam_inv_std = 1.0 / aparam_std
+            self.aparam_avg = aparam_avg.astype(self.aparam_avg.dtype)
+            self.aparam_inv_std = aparam_inv_std.astype(self.aparam_inv_std.dtype)
+
     @abstractmethod
     def _net_out_dim(self) -> int:
         """Set the FittingNet output dim."""
diff --git a/deepmd/pd/model/atomic_model/base_atomic_model.py b/deepmd/pd/model/atomic_model/base_atomic_model.py
@@ -515,6 +515,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., paddle.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -397,12 +397,30 @@ def wrapped_sampler():
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
-        )
+        self.compute_fitting_input_stat(wrapped_sampler)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged, protection=self.data_stat_protect
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py
@@ -228,6 +228,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -493,6 +493,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -5,6 +5,7 @@
     Any,
     Callable,
     Optional,
+    Union,
 )
 
 import torch
@@ -328,12 +329,30 @@ def wrapped_sampler() -> list[dict]:
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
-        )
+        self.compute_fitting_input_stat(wrapped_sampler)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged, protection=self.data_stat_protect
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
@@ -232,6 +232,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
diff --git a/source/tests/common/dpmodel/test_fitting_stat.py b/source/tests/common/dpmodel/test_fitting_stat.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    EnergyFittingNet,
+)
+
+
+def _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds):
+    merged_output_stat = []
+    nsys = len(sys_natoms)
+    ndof = len(avgs)
+    for ii in range(nsys):
+        sys_dict = {}
+        tmp_data_f = []
+        tmp_data_a = []
+        for jj in range(ndof):
+            rng = np.random.default_rng(2025 * ii + 220 * jj)
+            tmp_data_f.append(
+                rng.normal(loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], 1))
+            )
+            rng = np.random.default_rng(220 * ii + 1636 * jj)
+            tmp_data_a.append(
+                rng.normal(
+                    loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], sys_natoms[ii])
+                )
+            )
+        tmp_data_f = np.transpose(tmp_data_f, (1, 2, 0))
+        tmp_data_a = np.transpose(tmp_data_a, (1, 2, 0))
+        sys_dict["fparam"] = tmp_data_f
+        sys_dict["aparam"] = tmp_data_a
+        merged_output_stat.append(sys_dict)
+    return merged_output_stat
+
+
+def _brute_fparam_pt(data, ndim):
+    adata = [ii["fparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+def _brute_aparam_pt(data, ndim):
+    adata = [ii["aparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+class TestEnerFittingStat(unittest.TestCase):
+    def test(self) -> None:
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnergyFittingNet(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=3,
+            numb_aparam=3,
+        )
+        avgs = [0, 10, 100]
+        stds = [2, 0.4, 0.00001]
+        sys_natoms = [10, 100]
+        sys_nframes = [5, 2]
+        all_data = _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds)
+        frefa, frefs = _brute_fparam_pt(all_data, len(avgs))
+        arefa, arefs = _brute_aparam_pt(all_data, len(avgs))
+        fitting.compute_input_stats(all_data, protection=1e-2)
+        frefs_inv = 1.0 / frefs
+        arefs_inv = 1.0 / arefs
+        frefs_inv[frefs_inv > 100] = 100
+        arefs_inv[arefs_inv > 100] = 100
+        np.testing.assert_almost_equal(frefa, fitting.fparam_avg)
+        np.testing.assert_almost_equal(frefs_inv, fitting.fparam_inv_std)
+        np.testing.assert_almost_equal(arefa, fitting.aparam_avg)
+        np.testing.assert_almost_equal(arefs_inv, fitting.aparam_inv_std)
diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py
@@ -89,7 +89,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key].numpy(),
                     state_dict_finetuned_empty[state_key].numpy(),
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     np.testing.assert_allclose(
                         state_dict_trained[state_key].numpy(),
                         state_dict_finetuned_random[state_key].numpy(),
@@ -190,6 +194,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
@@ -92,7 +92,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key],
                     state_dict_finetuned_empty[state_key],
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     torch.testing.assert_close(
                         state_dict_trained[state_key],
                         state_dict_finetuned_random[state_key],
@@ -256,6 +260,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)

Original file line number	Diff line number	Diff line change
`@@ -228,6 +228,8 @@ def change_out_bias(`
`228`	`228`	`merged,`
`229`	`229`	`bias_adjust_mode=bias_adjust_mode,`
`230`	`230`	`)`
	`231`	`+ if bias_adjust_mode == "set-by-statistic":`
	`232`	`+ self.atomic_model.compute_fitting_input_stat(merged)`
`231`	`233`
`232`	`234`	`def forward_common_lower(`
`233`	`235`	`self,`
Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,8 @@ def change_out_bias(`
`232`	`232`	`merged,`
`233`	`233`	`bias_adjust_mode=bias_adjust_mode,`
`234`	`234`	`)`
	`235`	`+ if bias_adjust_mode == "set-by-statistic":`
	`236`	`+ self.atomic_model.compute_fitting_input_stat(merged)`
`235`	`237`
`236`	`238`	`def forward_common_lower(`
`237`	`239`	`self,`