Support type wise bias in property fitting

Chengqian-Zhang · Chengqian-Zhang · commit 546e6620284b · 2026-03-17T07:47:45.000Z
diff --git a/deepmd/pt/model/atomic_model/property_atomic_model.py b/deepmd/pt/model/atomic_model/property_atomic_model.py
@@ -26,7 +26,7 @@ def __init__(
 
     def get_compute_stats_distinguish_types(self) -> bool:
         """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
-        return False
+        return True
 
     def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
@@ -49,6 +49,10 @@ def apply_out_stat(
 
         """
         out_bias, out_std = self._fetch_out_stat(self.bias_keys)
-        for kk in self.bias_keys:
-            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        if self.get_compute_stats_distinguish_types:
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][atype]
+        else:
+            for kk in self.bias_keys:
+                ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
         return ret
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
@@ -157,7 +157,7 @@ def _post_process_stat(
     """Post process the statistics.
 
     For global statistics, we do not have the std for each type of atoms,
-    thus fake the output std by ones for all the types.
+    thus broadcast the global std to all the types.
     If the shape of out_std is already the same as out_bias,
     we do not need to do anything.
 
@@ -167,7 +167,9 @@ def _post_process_stat(
         if vv.shape == out_std[kk].shape:
             new_std[kk] = out_std[kk]
         else:
-            new_std[kk] = np.ones_like(vv)
+            ntypes = vv.shape[0]
+            reps = [ntypes] + [1] * (vv.ndim - 1)
+            new_std[kk] = np.tile(out_std[kk], reps)
     return out_bias, new_std
 
 
@@ -517,6 +519,7 @@ def _compute_output_stats_global(
                     merged_natoms[kk],
                     assigned_bias=assigned_atom_ener[kk],
                     rcond=rcond,
+                    intensive=intensive,
                 )
         else:
             # this key does not have global labels, skip it.
@@ -525,29 +528,28 @@ def _compute_output_stats_global(
 
     # unbias_e is only used for print rmse
 
-    if model_pred is None:
-        unbias_e = {
-            kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    else:
-        unbias_e = {
-            kk: model_pred[kk].reshape(nf[kk], -1)
-            + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1)
-            for kk in bias_atom_e.keys()
-        }
-    atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()}
+    unbias_e = {}
+    for kk in bias_atom_e.keys():
+        coeffs = merged_natoms[kk]
+        if intensive:
+            total_atoms = coeffs.sum(axis=1, keepdims=True)
+            coeffs = coeffs / total_atoms
+        recon = coeffs @ bias_atom_e[kk].reshape(ntypes, -1)
+        if model_pred is not None:
+            recon += model_pred[kk].reshape(nf[kk], -1)
+        unbias_e[kk] = recon
 
     def rmse(x: np.ndarray) -> float:
         return np.sqrt(np.mean(np.square(x)))
 
     for kk in bias_atom_e.keys():
-        rmse_ae = rmse(
-            (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1))
-            / atom_numbs[kk][:, None]
-        )
+        diff = unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)
+        if not intensive:
+            diff /= merged_natoms[kk].sum(axis=-1, keepdims=True)
+        rmse_ae = rmse(diff)
+        stat_type = "per atom " if not intensive else ""
         log.info(
-            f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}."
+            f"RMSE of {kk} {stat_type}after linear regression is: {rmse_ae} in the unit of {kk}."
         )
     return bias_atom_e, std_atom_e
 
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
@@ -13,6 +13,7 @@ def compute_stats_from_redu(
     natoms: np.ndarray,
     assigned_bias: np.ndarray | None = None,
     rcond: float | None = None,
+    intensive: bool = False,
 ) -> tuple[np.ndarray, np.ndarray]:
     """Compute the output statistics.
 
@@ -31,6 +32,8 @@ def compute_stats_from_redu(
         of the type is not assigned.
     rcond
         Cut-off ratio for small singular values of a.
+    intensive
+        Whether the output is intensive or extensive.
 
     Returns
     -------
@@ -44,6 +47,8 @@ def compute_stats_from_redu(
     output_redu = np.array(output_redu)
     var_shape = list(output_redu.shape[1:])
     output_redu = output_redu.reshape(nf, -1)
+    if intensive:
+        natoms = natoms / np.sum(natoms, axis=1, keepdims=True)
     # check shape
     assert output_redu.ndim == 2
     assert natoms.ndim == 2