deepmodeling
diff --git a/‎deepmd/dpmodel/atomic_model/property_atomic_model.py‎
Lines changed: 7 additions & 3 deletions b/‎deepmd/dpmodel/atomic_model/property_atomic_model.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎deepmd/dpmodel/fitting/property_fitting.py‎
Lines changed: 12 additions & 2 deletions b/‎deepmd/dpmodel/fitting/property_fitting.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 21 additions & 19 deletions b/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 21 additions & 19 deletions
diff --git a/‎deepmd/pd/utils/stat.py‎
Lines changed: 4 additions & 2 deletions b/‎deepmd/pd/utils/stat.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎deepmd/pt/model/atomic_model/property_atomic_model.py‎
Lines changed: 7 additions & 3 deletions b/‎deepmd/pt/model/atomic_model/property_atomic_model.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎deepmd/pt/model/task/property.py‎
Lines changed: 12 additions & 2 deletions b/‎deepmd/pt/model/task/property.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎deepmd/pt/utils/stat.py‎
Lines changed: 21 additions & 19 deletions b/‎deepmd/pt/utils/stat.py‎
Lines changed: 21 additions & 19 deletions
diff --git a/‎deepmd/utils/argcheck.py‎
Lines changed: 10 additions & 0 deletions b/‎deepmd/utils/argcheck.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎deepmd/utils/out_stat.py‎
Lines changed: 5 additions & 0 deletions b/‎deepmd/utils/out_stat.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎source/tests/common/dpmodel/test_atomic_model_atomic_stat.py‎
Lines changed: 1 addition & 0 deletions b/‎source/tests/common/dpmodel/test_atomic_model_atomic_stat.py‎
Lines changed: 1 addition & 0 deletions
@@ -27,7 +27,7 @@ def __init__(
 
     def get_compute_stats_distinguish_types(self) -> bool:
         """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
-        return False
+        return self.fitting_net.get_distinguish_types()
 
     def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
@@ -51,6 +51,10 @@ def apply_out_stat(
 
         """
         out_bias, out_std = self._fetch_out_stat(self.bias_keys)
-        for kk in self.bias_keys:
-            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        if self.get_compute_stats_distinguish_types():
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][atype] + out_bias[kk][atype]
+        else:
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
         return ret
@@ -65,6 +65,8 @@ class PropertyFittingNet(InvarFitting):
     default_fparam: list[float], optional
             The default frame parameter. If set, when `fparam.npy` files are not included in the data system,
             this value will be used as the default value for the frame parameter in the fitting net.
+    distinguish_types : bool
+            Whether to distinguish atom types when computing output statistics.
     """
 
     def __init__(
@@ -88,11 +90,13 @@ def __init__(
         exclude_types: list[int] = [],
         type_map: list[str] | None = None,
         default_fparam: list | None = None,
+        distinguish_types: bool = True,
         # not used
         seed: int | None = None,
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
+        self.distinguish_types = distinguish_types
         super().__init__(
             var_name=property_name,
             ntypes=ntypes,
@@ -131,7 +135,8 @@ def output_def(self) -> FittingOutputDef:
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 5, 1)
+        check_version_compatibility(data.pop("@version"), 6, 1)
+        data.setdefault("distinguish_types", False)
         data.pop("dim_out")
         data["property_name"] = data.pop("var_name")
         data.pop("tot_ener_zero")
@@ -150,7 +155,12 @@ def serialize(self) -> dict:
             "type": "property",
             "task_dim": self.task_dim,
             "intensive": self.intensive,
+            "distinguish_types": self.distinguish_types,
         }
-        dd["@version"] = 5
+        dd["@version"] = 6
 
         return dd
+
+    def get_distinguish_types(self) -> bool:
+        """Get whether the fitting net computes stats which are distinguished between different types of atoms."""
+        return self.distinguish_types
@@ -135,7 +135,7 @@ def _post_process_stat(
     """Post process the statistics.
 
     For global statistics, we do not have the std for each type of atoms,
-    thus fake the output std by ones for all the types.
+    thus broadcast the global std to all the types.
     If the shape of out_std is already the same as out_bias,
     we do not need to do anything.
     """
@@ -144,7 +144,9 @@ def _post_process_stat(
         if vv.shape == out_std[kk].shape:
             new_std[kk] = out_std[kk]
         else:
-            new_std[kk] = np.ones_like(vv)
+            ntypes = vv.shape[0]
+            reps = [ntypes] + [1] * (vv.ndim - 1)
+            new_std[kk] = np.tile(out_std[kk], reps)
     return out_bias, new_std
 
 
@@ -481,6 +483,7 @@ def _compute_output_stats_global(
                     merged_natoms[kk],
                     assigned_bias=assigned_atom_ener[kk],
                     rcond=rcond,
+                    intensive=intensive,
                 )
         else:
             # this key does not have global labels, skip it.
@@ -491,26 +494,25 @@ def _compute_output_stats_global(
     def rmse(x: np.ndarray) -> float:
         return np.sqrt(np.mean(np.square(x)))
 
-    if model_pred is None:
-        unbias_e = {
-            kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    else:
-        unbias_e = {
-            kk: model_pred[kk].reshape(nf[kk], -1)
-            + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()}
+    unbias_e = {}
+    for kk in bias_atom_e.keys():
+        coeffs = merged_natoms[kk]
+        if intensive:
+            total_atoms = coeffs.sum(axis=1, keepdims=True)
+            coeffs = coeffs / total_atoms
+        recon = coeffs @ bias_atom_e[kk].reshape(ntypes, -1)
+        if model_pred is not None:
+            recon += model_pred[kk].reshape(nf[kk], -1)
+        unbias_e[kk] = recon
 
     for kk in bias_atom_e.keys():
-        rmse_ae = rmse(
-            (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1))
-            / atom_numbs[kk][:, None]
-        )
+        diff = unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)
+        if not intensive:
+            diff /= merged_natoms[kk].sum(axis=-1, keepdims=True)
+        rmse_ae = rmse(diff)
+        stat_type = "per atom " if not intensive else ""
         log.info(
-            f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}."
+            f"RMSE of {kk} {stat_type}after linear regression is: {rmse_ae} in the unit of {kk}."
         )
     return bias_atom_e, std_atom_e
 
 
@@ -144,7 +144,7 @@ def _post_process_stat(
     """Post process the statistics.
 
     For global statistics, we do not have the std for each type of atoms,
-    thus fake the output std by ones for all the types.
+    thus broadcast the global std to all the types.
     If the shape of out_std is already the same as out_bias,
     we do not need to do anything.
 
@@ -154,7 +154,9 @@ def _post_process_stat(
         if vv.shape == out_std[kk].shape:
             new_std[kk] = out_std[kk]
         else:
-            new_std[kk] = np.ones_like(vv)
+            ntypes = vv.shape[0]
+            reps = [ntypes] + [1] * (vv.ndim - 1)
+            new_std[kk] = np.tile(out_std[kk], reps)
     return out_bias, new_std
 
 
 
@@ -26,7 +26,7 @@ def __init__(
 
     def get_compute_stats_distinguish_types(self) -> bool:
         """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
-        return False
+        return self.fitting_net.get_distinguish_types()
 
     def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
@@ -49,6 +49,10 @@ def apply_out_stat(
 
         """
         out_bias, out_std = self._fetch_out_stat(self.bias_keys)
-        for kk in self.bias_keys:
-            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        if self.get_compute_stats_distinguish_types():
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][atype] + out_bias[kk][atype]
+        else:
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
         return ret
@@ -70,6 +70,8 @@ class PropertyFittingNet(InvarFitting):
         different fitting nets for different atom types.
     seed : int, optional
         Random seed.
+    distinguish_types : bool
+        Whether to distinguish atom types when computing output statistics.
     """
 
     def __init__(
@@ -91,10 +93,12 @@ def __init__(
         trainable: bool | list[bool] = True,
         seed: int | None = None,
         default_fparam: list | None = None,
+        distinguish_types: bool = True,
         **kwargs: Any,
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
+        self.distinguish_types = distinguish_types
         super().__init__(
             var_name=property_name,
             ntypes=ntypes,
@@ -133,10 +137,15 @@ def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
         return self.intensive
 
+    def get_distinguish_types(self) -> bool:
+        """Get whether to distinguish atom types when computing output statistics."""
+        return self.distinguish_types
+
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 5, 1)
+        check_version_compatibility(data.pop("@version", 1), 6, 1)
+        data.setdefault("distinguish_types", False)
         data.pop("dim_out")
         data["property_name"] = data.pop("var_name")
         obj = super().deserialize(data)
@@ -150,8 +159,9 @@ def serialize(self) -> dict:
             "type": "property",
             "task_dim": self.task_dim,
             "intensive": self.intensive,
+            "distinguish_types": self.distinguish_types,
         }
-        dd["@version"] = 5
+        dd["@version"] = 6
 
         return dd
 
 
@@ -157,7 +157,7 @@ def _post_process_stat(
     """Post process the statistics.
 
     For global statistics, we do not have the std for each type of atoms,
-    thus fake the output std by ones for all the types.
+    thus broadcast the global std to all the types.
     If the shape of out_std is already the same as out_bias,
     we do not need to do anything.
 
@@ -167,7 +167,9 @@ def _post_process_stat(
         if vv.shape == out_std[kk].shape:
             new_std[kk] = out_std[kk]
         else:
-            new_std[kk] = np.ones_like(vv)
+            ntypes = vv.shape[0]
+            reps = [ntypes] + [1] * (vv.ndim - 1)
+            new_std[kk] = np.tile(out_std[kk], reps)
     return out_bias, new_std
 
 
@@ -517,6 +519,7 @@ def _compute_output_stats_global(
                     merged_natoms[kk],
                     assigned_bias=assigned_atom_ener[kk],
                     rcond=rcond,
+                    intensive=intensive,
                 )
         else:
             # this key does not have global labels, skip it.
@@ -525,29 +528,28 @@ def _compute_output_stats_global(
 
     # unbias_e is only used for print rmse
 
-    if model_pred is None:
-        unbias_e = {
-            kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    else:
-        unbias_e = {
-            kk: model_pred[kk].reshape(nf[kk], -1)
-            + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()}
+    unbias_e = {}
+    for kk in bias_atom_e.keys():
+        coeffs = merged_natoms[kk]
+        if intensive:
+            total_atoms = coeffs.sum(axis=1, keepdims=True)
+            coeffs = coeffs / total_atoms
+        recon = coeffs @ bias_atom_e[kk].reshape(ntypes, -1)
+        if model_pred is not None:
+            recon += model_pred[kk].reshape(nf[kk], -1)
+        unbias_e[kk] = recon
 
     def rmse(x: np.ndarray) -> float:
         return np.sqrt(np.mean(np.square(x)))
 
     for kk in bias_atom_e.keys():
-        rmse_ae = rmse(
-            (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1))
-            / atom_numbs[kk][:, None]
-        )
+        diff = unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)
+        if not intensive:
+            diff /= merged_natoms[kk].sum(axis=-1, keepdims=True)
+        rmse_ae = rmse(diff)
+        stat_type = "per atom " if not intensive else ""
         log.info(
-            f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}."
+            f"RMSE of {kk} {stat_type}after linear regression is: {rmse_ae} in the unit of {kk}."
         )
     return bias_atom_e, std_atom_e
 
 
@@ -1925,6 +1925,9 @@ def fitting_property() -> list[Argument]:
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_task_dim = "The dimension of outputs of fitting net"
     doc_intensive = "Whether the fitting property is intensive"
+    doc_distinguish_types = (
+        "Whether to distinguish atom types when computing output statistics."
+    )
     doc_property_name = "The names of fitting property, which should be consistent with the property name in the dataset."
     doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
@@ -1966,6 +1969,13 @@ def fitting_property() -> list[Argument]:
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument("task_dim", int, optional=True, default=1, doc=doc_task_dim),
         Argument("intensive", bool, optional=True, default=False, doc=doc_intensive),
+        Argument(
+            "distinguish_types",
+            bool,
+            optional=True,
+            default=True,
+            doc=doc_distinguish_types,
+        ),
         Argument(
             "property_name",
             str,
 
@@ -13,6 +13,7 @@ def compute_stats_from_redu(
     natoms: np.ndarray,
     assigned_bias: np.ndarray | None = None,
     rcond: float | None = None,
+    intensive: bool = False,
 ) -> tuple[np.ndarray, np.ndarray]:
     """Compute the output statistics.
 
@@ -31,6 +32,8 @@ def compute_stats_from_redu(
         of the type is not assigned.
     rcond
         Cut-off ratio for small singular values of a.
+    intensive
+        Whether the output is intensive or extensive.
 
     Returns
     -------
@@ -44,6 +47,8 @@ def compute_stats_from_redu(
     output_redu = np.array(output_redu)
     var_shape = list(output_redu.shape[1:])
     output_redu = output_redu.reshape(nf, -1)
+    if intensive:
+        natoms = natoms / np.sum(natoms, axis=1, keepdims=True)
     # check shape
     assert output_redu.ndim == 2
     assert natoms.ndim == 2
 
@@ -208,6 +208,7 @@ def test_output_stat(self) -> None:
         expected_std[0, :, :1] = np.array([0.0, 0.816496]).reshape(
             2, 1
         )  # updating std for foo based on [5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]
+        expected_std[1, :, :] = np.zeros([2, 2])
         np.testing.assert_almost_equal(md0.out_std, expected_std, decimal=4)
         # nt x odim
         foo_bias = np.array([5.0, 6.0]).reshape(2, 1)