Merge branch 'devel' into 1108_default_fparam_stat

Chengqian-Zhang · web-flow · commit 07483a7cbb37 · 2025-11-23T17:01:21.000+08:00
Signed-off-by: Chenqqian Zhang &lt;100290172+Chengqian-Zhang@users.noreply.github.com&gt;
diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py
@@ -4,6 +4,7 @@
 )
 from typing import (
     Any,
+    Callable,
     Optional,
     Union,
 )
@@ -221,6 +222,71 @@ def __init__(
             ],
         )
 
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        protection: float = 1e-2,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `numpy.ndarray`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        protection : float
+            Divided-by-zero protection
+        """
+        if self.numb_fparam == 0 and self.numb_aparam == 0:
+            # skip data statistics
+            return
+        if callable(merged):
+            sampled = merged()
+        else:
+            sampled = merged
+        # stat fparam
+        if self.numb_fparam > 0:
+            cat_data = np.concatenate([frame["fparam"] for frame in sampled], axis=0)
+            cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
+            fparam_avg = np.mean(cat_data, axis=0)
+            fparam_std = np.std(cat_data, axis=0, ddof=0)  # ddof=0 for population std
+            fparam_std = np.where(
+                fparam_std < protection,
+                np.array(protection, dtype=fparam_std.dtype),
+                fparam_std,
+            )
+            fparam_inv_std = 1.0 / fparam_std
+            self.fparam_avg = fparam_avg.astype(self.fparam_avg.dtype)
+            self.fparam_inv_std = fparam_inv_std.astype(self.fparam_inv_std.dtype)
+        # stat aparam
+        if self.numb_aparam > 0:
+            sys_sumv = []
+            sys_sumv2 = []
+            sys_sumn = []
+            for ss_ in [frame["aparam"] for frame in sampled]:
+                ss = np.reshape(ss_, [-1, self.numb_aparam])
+                sys_sumv.append(np.sum(ss, axis=0))
+                sys_sumv2.append(np.sum(ss * ss, axis=0))
+                sys_sumn.append(ss.shape[0])
+            sumv = np.sum(np.stack(sys_sumv), axis=0)
+            sumv2 = np.sum(np.stack(sys_sumv2), axis=0)
+            sumn = sum(sys_sumn)
+            aparam_avg = sumv / sumn
+            aparam_std = np.sqrt(sumv2 / sumn - (sumv / sumn) ** 2)
+            aparam_std = np.where(
+                aparam_std < protection,
+                np.array(protection, dtype=aparam_std.dtype),
+                aparam_std,
+            )
+            aparam_inv_std = 1.0 / aparam_std
+            self.aparam_avg = aparam_avg.astype(self.aparam_avg.dtype)
+            self.aparam_inv_std = aparam_inv_std.astype(self.aparam_inv_std.dtype)
+
     @abstractmethod
     def _net_out_dim(self) -> int:
         """Set the FittingNet output dim."""
diff --git a/deepmd/pd/model/atomic_model/base_atomic_model.py b/deepmd/pd/model/atomic_model/base_atomic_model.py
@@ -515,6 +515,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., paddle.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -397,12 +397,30 @@ def wrapped_sampler():
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler, protection=self.data_stat_protect
-        )
+        self.compute_fitting_input_stat(wrapped_sampler)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged, protection=self.data_stat_protect
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py
@@ -228,6 +228,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -493,6 +493,24 @@ def change_out_bias(
         else:
             raise RuntimeError("Unknown bias_adjust_mode mode: " + bias_adjust_mode)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the atomic model from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        pass
+
     def _get_forward_wrapper_func(self) -> Callable[..., torch.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
diff --git a/deepmd/pt/model/atomic_model/dp_atomic_model.py b/deepmd/pt/model/atomic_model/dp_atomic_model.py
@@ -5,6 +5,7 @@
     Any,
     Callable,
     Optional,
+    Union,
 )
 
 import torch
@@ -337,14 +338,30 @@ def wrapped_sampler() -> list[dict]:
             return sampled
 
         self.descriptor.compute_input_stats(wrapped_sampler, stat_file_path)
-        self.fitting_net.compute_input_stats(
-            wrapped_sampler,
-            protection=self.data_stat_protect,
-            stat_file_path=stat_file_path,
-        )
+        self.compute_fitting_input_stat(wrapped_sampler)
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+    def compute_fitting_input_stat(
+        self,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
+    ) -> None:
+        """Compute the input statistics (e.g. mean and stddev) for the fittings from packed data.
+
+        Parameters
+        ----------
+        sample_merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `torch.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        """
+        self.fitting_net.compute_input_stats(
+            sample_merged, protection=self.data_stat_protect
+        )
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py
@@ -232,6 +232,8 @@ def change_out_bias(
                 merged,
                 bias_adjust_mode=bias_adjust_mode,
             )
+            if bias_adjust_mode == "set-by-statistic":
+                self.atomic_model.compute_fitting_input_stat(merged)
 
         def forward_common_lower(
             self,
diff --git a/source/tests/common/dpmodel/test_fitting_stat.py b/source/tests/common/dpmodel/test_fitting_stat.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import unittest
+
+import numpy as np
+
+from deepmd.dpmodel.descriptor import (
+    DescrptSeA,
+)
+from deepmd.dpmodel.fitting import (
+    EnergyFittingNet,
+)
+
+
+def _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds):
+    merged_output_stat = []
+    nsys = len(sys_natoms)
+    ndof = len(avgs)
+    for ii in range(nsys):
+        sys_dict = {}
+        tmp_data_f = []
+        tmp_data_a = []
+        for jj in range(ndof):
+            rng = np.random.default_rng(2025 * ii + 220 * jj)
+            tmp_data_f.append(
+                rng.normal(loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], 1))
+            )
+            rng = np.random.default_rng(220 * ii + 1636 * jj)
+            tmp_data_a.append(
+                rng.normal(
+                    loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], sys_natoms[ii])
+                )
+            )
+        tmp_data_f = np.transpose(tmp_data_f, (1, 2, 0))
+        tmp_data_a = np.transpose(tmp_data_a, (1, 2, 0))
+        sys_dict["fparam"] = tmp_data_f
+        sys_dict["aparam"] = tmp_data_a
+        merged_output_stat.append(sys_dict)
+    return merged_output_stat
+
+
+def _brute_fparam_pt(data, ndim):
+    adata = [ii["fparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+def _brute_aparam_pt(data, ndim):
+    adata = [ii["aparam"] for ii in data]
+    all_data = []
+    for ii in adata:
+        tmp = np.reshape(ii, [-1, ndim])
+        if len(all_data) == 0:
+            all_data = np.array(tmp)
+        else:
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
+    return avg, std
+
+
+class TestEnerFittingStat(unittest.TestCase):
+    def test(self) -> None:
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnergyFittingNet(
+            descrpt.get_ntypes(),
+            descrpt.get_dim_out(),
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=3,
+            numb_aparam=3,
+        )
+        avgs = [0, 10, 100]
+        stds = [2, 0.4, 0.00001]
+        sys_natoms = [10, 100]
+        sys_nframes = [5, 2]
+        all_data = _make_fake_data_pt(sys_natoms, sys_nframes, avgs, stds)
+        frefa, frefs = _brute_fparam_pt(all_data, len(avgs))
+        arefa, arefs = _brute_aparam_pt(all_data, len(avgs))
+        fitting.compute_input_stats(all_data, protection=1e-2)
+        frefs_inv = 1.0 / frefs
+        arefs_inv = 1.0 / arefs
+        frefs_inv[frefs_inv > 100] = 100
+        arefs_inv[arefs_inv > 100] = 100
+        np.testing.assert_almost_equal(frefa, fitting.fparam_avg)
+        np.testing.assert_almost_equal(frefs_inv, fitting.fparam_inv_std)
+        np.testing.assert_almost_equal(arefa, fitting.aparam_avg)
+        np.testing.assert_almost_equal(arefs_inv, fitting.aparam_inv_std)
diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py
@@ -89,7 +89,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key].numpy(),
                     state_dict_finetuned_empty[state_key].numpy(),
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     np.testing.assert_allclose(
                         state_dict_trained[state_key].numpy(),
                         state_dict_finetuned_random[state_key].numpy(),
@@ -190,6 +194,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
@@ -92,7 +92,11 @@ def test_dp_train(self) -> None:
                     state_dict_trained[state_key],
                     state_dict_finetuned_empty[state_key],
                 )
-                if "fitting_net" not in state_key:
+                if (
+                    ("fitting_net" not in state_key)
+                    or ("fparam" in state_key)
+                    or ("aparam" in state_key)
+                ):
                     torch.testing.assert_close(
                         state_dict_trained[state_key],
                         state_dict_finetuned_random[state_key],
@@ -256,6 +260,7 @@ def setUp(self) -> None:
         self.config["training"]["save_freq"] = 1
         self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000"
         shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy")
+        self.config["model"]["data_stat_nbatch"] = 100
 
     def tearDown(self) -> None:
         (self.set_path / "fparam.npy").unlink(missing_ok=True)

Original file line number	Diff line number	Diff line change
`@@ -228,6 +228,8 @@ def change_out_bias(`
`228`	`228`	`merged,`
`229`	`229`	`bias_adjust_mode=bias_adjust_mode,`
`230`	`230`	`)`
	`231`	`+ if bias_adjust_mode == "set-by-statistic":`
	`232`	`+ self.atomic_model.compute_fitting_input_stat(merged)`
`231`	`233`
`232`	`234`	`def forward_common_lower(`
`233`	`235`	`self,`
Original file line number	Diff line number	Diff line change
`@@ -232,6 +232,8 @@ def change_out_bias(`
`232`	`232`	`merged,`
`233`	`233`	`bias_adjust_mode=bias_adjust_mode,`
`234`	`234`	`)`
	`235`	`+ if bias_adjust_mode == "set-by-statistic":`
	`236`	`+ self.atomic_model.compute_fitting_input_stat(merged)`
`235`	`237`
`236`	`238`	`def forward_common_lower(`
`237`	`239`	`self,`