deepmodeling
diff --git a/‎deepmd/dpmodel/utils/env_mat_stat.py‎
Lines changed: 1 addition & 2 deletions b/‎deepmd/dpmodel/utils/env_mat_stat.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 23 additions & 15 deletions b/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 23 additions & 15 deletions
diff --git a/‎deepmd/pd/utils/env_mat_stat.py‎
Lines changed: 1 addition & 2 deletions b/‎deepmd/pd/utils/env_mat_stat.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎deepmd/pd/utils/stat.py‎
Lines changed: 48 additions & 54 deletions b/‎deepmd/pd/utils/stat.py‎
Lines changed: 48 additions & 54 deletions
@@ -128,11 +128,10 @@ def iter(
             device=array_api_compat.device(data[0]["coord"]),
         )
         for system in data:
-            coord, atype, box, natoms = (
+            coord, atype, box = (
                 system["coord"],
                 system["atype"],
                 system["box"],
-                system["natoms"],
             )
             (
                 extended_coord,
 
@@ -14,6 +14,9 @@
 from deepmd.dpmodel.common import (
     to_numpy_array,
 )
+from deepmd.dpmodel.utils.exclude_mask import (
+    AtomExcludeMask,
+)
 from deepmd.utils.out_stat import (
     compute_stats_do_not_distinguish_types,
     compute_stats_from_atomic,
@@ -245,10 +248,8 @@ def compute_output_stats(
                     system["find_atom_" + kk] > 0.0
                 ):
                     atomic_sampled_idx[kk].append(idx)
-                elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
+                if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
                     global_sampled_idx[kk].append(idx)
-                else:
-                    continue
 
         # use index to gather model predictions for the corresponding systems.
         model_pred_g = (
@@ -291,7 +292,7 @@ def compute_output_stats(
         )
 
         # compute stat
-        bias_atom_g, std_atom_g = compute_output_stats_global(
+        bias_atom_g, std_atom_g = _compute_output_stats_global(
             sampled,
             ntypes,
             keys,
@@ -302,7 +303,7 @@ def compute_output_stats(
             intensive,
             model_pred_g,
         )
-        bias_atom_a, std_atom_a = compute_output_stats_atomic(
+        bias_atom_a, std_atom_a = _compute_output_stats_atomic(
             sampled,
             ntypes,
             keys,
@@ -335,7 +336,7 @@ def compute_output_stats(
     return bias_atom_e, std_atom_e
 
 
-def compute_output_stats_global(
+def _compute_output_stats_global(
     sampled: list[dict],
     ntypes: int,
     keys: list[str],
@@ -359,14 +360,21 @@ def compute_output_stats_global(
         for kk in keys
     }
 
-    natoms_key = "natoms"
-    input_natoms = {
-        kk: [
-            to_numpy_array(sampled[idx][natoms_key])
-            for idx in global_sampled_idx.get(kk, [])
-        ]
-        for kk in keys
-    }
+    data_mixed_type = "real_natoms_vec" in sampled[0]
+    natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
+    input_natoms = {}
+    for kk in keys:
+        kk_natoms = []
+        for idx in global_sampled_idx.get(kk, []):
+            nn = to_numpy_array(sampled[idx][natoms_key])
+            if "atom_exclude_types" in sampled[idx]:
+                nn = nn.copy()
+                type_mask = AtomExcludeMask(
+                    ntypes, sampled[idx]["atom_exclude_types"]
+                ).get_type_mask()
+                nn[:, 2:] *= type_mask.reshape(1, -1)
+            kk_natoms.append(nn)
+        input_natoms[kk] = kk_natoms
 
     # shape: (nframes, ndim)
     merged_output = {
@@ -453,7 +461,7 @@ def rmse(x: np.ndarray) -> float:
     return bias_atom_e, std_atom_e
 
 
-def compute_output_stats_atomic(
+def _compute_output_stats_atomic(
     sampled: list[dict],
     ntypes: int,
     keys: list[str],
 
@@ -107,11 +107,10 @@ def iter(
                 "last_dim should be 1 for raial-only or 4 for full descriptor."
             )
         for system in data:
-            coord, atype, box, natoms = (
+            coord, atype, box = (
                 system["coord"],
                 system["atype"],
                 system["box"],
-                system["natoms"],
             )
             (
                 extended_coord,
 
@@ -167,11 +167,10 @@ def _compute_model_predict(
     model_predict = {kk: [] for kk in keys}
     for system in sampled:
         nframes = system["coord"].shape[0]
-        coord, atype, box, natoms = (
+        coord, atype, box = (
             system["coord"],
             system["atype"],
             system["box"],
-            system["natoms"],
         )
         fparam = system.get("fparam", None)
         aparam = system.get("aparam", None)
@@ -324,12 +323,9 @@ def compute_output_stats(
                     system["find_atom_" + kk] > 0.0
                 ):
                     atomic_sampled_idx[kk].append(idx)
-                elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
+                if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
                     global_sampled_idx[kk].append(idx)
 
-                else:
-                    continue
-
         # use index to gather model predictions for the corresponding systems.
 
         model_pred_g = (
@@ -372,20 +368,22 @@ def compute_output_stats(
         )
 
         # compute stat
-        bias_atom_g, std_atom_g = compute_output_stats_global(
+        bias_atom_g, std_atom_g = _compute_output_stats_global(
             sampled,
             ntypes,
             keys,
             rcond,
             preset_bias,
-            model_pred_g,
+            global_sampled_idx,
             stats_distinguish_types,
             intensive,
+            model_pred_g,
         )
-        bias_atom_a, std_atom_a = compute_output_stats_atomic(
+        bias_atom_a, std_atom_a = _compute_output_stats_atomic(
             sampled,
             ntypes,
             keys,
+            atomic_sampled_idx,
             model_pred_a,
         )
 
@@ -416,58 +414,52 @@ def compute_output_stats(
     return bias_atom_e, std_atom_e
 
 
-def compute_output_stats_global(
+def _compute_output_stats_global(
     sampled: list[dict],
     ntypes: int,
     keys: list[str],
     rcond: float | None = None,
     preset_bias: dict[str, list[paddle.Tensor | None]] | None = None,
-    model_pred: dict[str, np.ndarray] | None = None,
+    global_sampled_idx: dict | None = None,
     stats_distinguish_types: bool = True,
     intensive: bool = False,
+    model_pred: dict[str, np.ndarray] | None = None,
 ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
     """This function only handle stat computation from reduced global labels."""
-    # return directly if model predict is empty for global
-    if model_pred == {}:
+    # return directly if no global samples
+    if global_sampled_idx is None or all(
+        len(v) == 0 for v in global_sampled_idx.values()
+    ):
         return {}, {}
 
     # get label dict from sample; for each key, only picking the system with global labels.
     outputs = {
-        kk: [
-            system[kk]
-            for system in sampled
-            if kk in system and system.get(f"find_{kk}", 0) > 0
-        ]
+        kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])]
         for kk in keys
     }
 
     data_mixed_type = "real_natoms_vec" in sampled[0]
     natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
-    for system in sampled:
-        if "atom_exclude_types" in system:
-            type_mask = AtomExcludeMask(
-                ntypes, system["atom_exclude_types"]
-            ).get_type_mask()
-            system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)
-
-    input_natoms = {
-        kk: [
-            item[natoms_key]
-            for item in sampled
-            if kk in item and item.get(f"find_{kk}", 0) > 0
-        ]
-        for kk in keys
-    }
+    input_natoms = {}
+    for kk in keys:
+        kk_natoms = []
+        for idx in global_sampled_idx.get(kk, []):
+            nn = to_numpy_array(sampled[idx][natoms_key])
+            if "atom_exclude_types" in sampled[idx]:
+                nn = nn.copy()
+                type_mask = AtomExcludeMask(
+                    ntypes, sampled[idx]["atom_exclude_types"]
+                ).get_type_mask()
+                nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1)
+            kk_natoms.append(nn)
+        input_natoms[kk] = kk_natoms
     # shape: (nframes, ndim)
     merged_output = {
-        kk: to_numpy_array(paddle.concat(outputs[kk]))
-        for kk in keys
-        if len(outputs[kk]) > 0
+        kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
     }
     # shape: (nframes, ntypes)
-
     merged_natoms = {
-        kk: to_numpy_array(paddle.concat(input_natoms[kk])[:, 2:])
+        kk: np.concatenate(input_natoms[kk])[:, 2:]
         for kk in keys
         if len(input_natoms[kk]) > 0
     }
@@ -550,53 +542,55 @@ def rmse(x: np.ndarray) -> float:
     return bias_atom_e, std_atom_e
 
 
-def compute_output_stats_atomic(
+def _compute_output_stats_atomic(
     sampled: list[dict],
     ntypes: int,
     keys: list[str],
+    atomic_sampled_idx: dict | None = None,
     model_pred: dict[str, np.ndarray] | None = None,
 ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+    """Compute output statistics from atomic labels."""
+    # return directly if no atomic samples
+    if atomic_sampled_idx is None or all(
+        len(v) == 0 for v in atomic_sampled_idx.values()
+    ):
+        return {}, {}
+
     # get label dict from sample; for each key, only picking the system with atomic labels.
     outputs = {
         kk: [
-            system["atom_" + kk]
-            for system in sampled
-            if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
+            to_numpy_array(sampled[idx]["atom_" + kk])
+            for idx in atomic_sampled_idx.get(kk, [])
         ]
         for kk in keys
     }
     natoms = {
         kk: [
-            system["atype"]
-            for system in sampled
-            if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
+            to_numpy_array(sampled[idx]["atype"])
+            for idx in atomic_sampled_idx.get(kk, [])
         ]
         for kk in keys
     }
     # reshape outputs [nframes, nloc * ndim] --> reshape to [nframes * nloc, 1, ndim] for concatenation
     # reshape natoms [nframes, nloc] --> reshape to [nframes * nolc, 1] for concatenation
-    natoms = {k: [sys_v.reshape([-1, 1]) for sys_v in v] for k, v in natoms.items()}
+    natoms = {k: [sys_v.reshape(-1, 1) for sys_v in v] for k, v in natoms.items()}
     outputs = {
         k: [
-            sys.reshape([natoms[k][sys_idx].shape[0], 1, -1])
+            sys.reshape(natoms[k][sys_idx].shape[0], 1, -1)
             for sys_idx, sys in enumerate(v)
         ]
         for k, v in outputs.items()
     }
 
     merged_output = {
-        kk: to_numpy_array(paddle.concat(outputs[kk]))
-        for kk in keys
-        if len(outputs[kk]) > 0
+        kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
     }
     merged_natoms = {
-        kk: to_numpy_array(paddle.concat(natoms[kk]))
-        for kk in keys
-        if len(natoms[kk]) > 0
+        kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0
     }
     # reshape merged data to [nf, nloc, ndim]
     merged_output = {
-        kk: merged_output[kk].reshape([*merged_natoms[kk].shape, -1])
+        kk: merged_output[kk].reshape((*merged_natoms[kk].shape, -1))
         for kk in merged_output
     }
Original file line number	Diff line number	Diff line change
`@@ -128,11 +128,10 @@ def iter(`
`128`	`128`	`device=array_api_compat.device(data[0]["coord"]),`
`129`	`129`	`)`
`130`	`130`	`for system in data:`
`131`		`- coord, atype, box, natoms = (`
	`131`	`+ coord, atype, box = (`
`132`	`132`	`system["coord"],`
`133`	`133`	`system["atype"],`
`134`	`134`	`system["box"],`
`135`		`- system["natoms"],`
`136`	`135`	`)`
`137`	`136`	`(`
`138`	`137`	`extended_coord,`
Original file line number	Diff line number	Diff line change
`@@ -107,11 +107,10 @@ def iter(`
`107`	`107`	`"last_dim should be 1 for raial-only or 4 for full descriptor."`
`108`	`108`	`)`
`109`	`109`	`for system in data:`
`110`		`- coord, atype, box, natoms = (`
	`110`	`+ coord, atype, box = (`
`111`	`111`	`system["coord"],`
`112`	`112`	`system["atype"],`
`113`	`113`	`system["box"],`
`114`		`- system["natoms"],`
`115`	`114`	`)`
`116`	`115`	`(`
`117`	`116`	`extended_coord,`