deepmodeling
diff --git a/‎deepmd/dpmodel/atomic_model/base_atomic_model.py‎
Lines changed: 62 additions & 0 deletions b/‎deepmd/dpmodel/atomic_model/base_atomic_model.py‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎deepmd/dpmodel/atomic_model/dp_atomic_model.py‎
Lines changed: 5 additions & 0 deletions b/‎deepmd/dpmodel/atomic_model/dp_atomic_model.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎deepmd/dpmodel/atomic_model/linear_atomic_model.py‎
Lines changed: 14 additions & 1 deletion b/‎deepmd/dpmodel/atomic_model/linear_atomic_model.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎deepmd/dpmodel/atomic_model/pairtab_atomic_model.py‎
Lines changed: 10 additions & 0 deletions b/‎deepmd/dpmodel/atomic_model/pairtab_atomic_model.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎deepmd/dpmodel/infer/deep_eval.py‎
Lines changed: 28 additions & 0 deletions b/‎deepmd/dpmodel/infer/deep_eval.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎deepmd/dpmodel/model/make_model.py‎
Lines changed: 8 additions & 1 deletion b/‎deepmd/dpmodel/model/make_model.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 56 additions & 0 deletions b/‎deepmd/dpmodel/utils/stat.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎deepmd/entrypoints/show.py‎
Lines changed: 8 additions & 1 deletion b/‎deepmd/entrypoints/show.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎deepmd/pt/infer/deep_eval.py‎
Lines changed: 8 additions & 0 deletions b/‎deepmd/pt/infer/deep_eval.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎deepmd/pt/model/atomic_model/base_atomic_model.py‎
Lines changed: 40 additions & 0 deletions b/‎deepmd/pt/model/atomic_model/base_atomic_model.py‎
Lines changed: 40 additions & 0 deletions
@@ -62,6 +62,45 @@ def __init__(
         self.rcond = rcond
         self.preset_out_bias = preset_out_bias
         self.data_stat_protect = data_stat_protect
+        self._observed_type: list[str] | None = None
+
+    @property
+    def observed_type(self) -> list[str] | None:
+        """Get the observed element type list from data statistics."""
+        return self._observed_type
+
+    def _collect_and_set_observed_type(
+        self,
+        sampled_func: Callable[[], list[dict]],
+        stat_file_path: DPPath | None,
+        preset_observed_type: list[str] | None,
+    ) -> None:
+        """Collect observed types with priority: preset > stat_file > compute.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames.
+        stat_file_path
+            The path to the statistics files (should already include type_map suffix).
+        preset_observed_type
+            User-specified observed types that take highest priority.
+        """
+        from deepmd.dpmodel.utils.stat import (
+            _restore_observed_type_from_file,
+            _save_observed_type_to_file,
+            collect_observed_types,
+        )
+
+        if preset_observed_type is not None:
+            self._observed_type = preset_observed_type
+        else:
+            observed = _restore_observed_type_from_file(stat_file_path)
+            if observed is None:
+                sampled = sampled_func()
+                observed = collect_observed_types(sampled, self.type_map)
+                _save_observed_type_to_file(stat_file_path, observed)
+            self._observed_type = observed
 
     def init_out_stat(self) -> None:
         """Initialize the output bias."""
@@ -271,6 +310,29 @@ def get_compute_stats_distinguish_types(self) -> bool:
         """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
         return True
 
+    def compute_or_load_stat(
+        self,
+        sampled_func: Callable[[], list[dict]],
+        stat_file_path: DPPath | None = None,
+        compute_or_load_out_stat: bool = True,
+        preset_observed_type: list[str] | None = None,
+    ) -> None:
+        """Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The path to the stat file.
+        compute_or_load_out_stat : bool
+            Whether to compute the output statistics.
+            If False, it will only compute the input statistics
+            (e.g. mean and standard deviation of descriptors).
+        """
+        raise NotImplementedError
+
     def compute_or_load_out_stat(
         self,
         merged: Callable[[], list[dict]] | list[dict],
 
@@ -201,6 +201,7 @@ def compute_or_load_stat(
         sampled_func: Callable[[], list[dict]],
         stat_file_path: DPPath | None = None,
         compute_or_load_out_stat: bool = True,
+        preset_observed_type: list[str] | None = None,
     ) -> None:
         """Compute or load the statistics parameters of the model,
         such as mean and standard deviation of descriptors or the energy bias of the fitting net.
@@ -227,6 +228,10 @@ def compute_or_load_stat(
         if compute_or_load_out_stat:
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+        self._collect_and_set_observed_type(
+            wrapped_sampler, stat_file_path, preset_observed_type
+        )
+
     def change_type_map(
         self, type_map: list[str], model_with_new_type_stat: Any | None = None
     ) -> None:
 
@@ -349,6 +349,7 @@ def compute_or_load_stat(
         sampled_func: Callable[[], list[dict]],
         stat_file_path: DPPath | None = None,
         compute_or_load_out_stat: bool = True,
+        preset_observed_type: list[str] | None = None,
     ) -> None:
         """Compute or load the statistics parameters of the model.
 
@@ -364,9 +365,21 @@ def compute_or_load_stat(
         compute_or_load_out_stat : bool
             Whether to compute the output statistics.
         """
+        # Compute observed type once at parent level, then propagate to
+        # sub-models via preset_observed_type to avoid redundant computation.
+        obs_stat_path = stat_file_path
+        if obs_stat_path is not None and self.type_map is not None:
+            obs_stat_path = obs_stat_path / " ".join(self.type_map)
+        self._collect_and_set_observed_type(
+            sampled_func, obs_stat_path, preset_observed_type
+        )
+
         for md in self.models:
             md.compute_or_load_stat(
-                sampled_func, stat_file_path, compute_or_load_out_stat=False
+                sampled_func,
+                stat_file_path,
+                compute_or_load_out_stat=False,
+                preset_observed_type=self._observed_type,
             )
 
         if stat_file_path is not None and self.type_map is not None:
 
@@ -216,6 +216,7 @@ def compute_or_load_stat(
         sampled_func: Callable[[], list[dict]],
         stat_file_path: DPPath | None = None,
         compute_or_load_out_stat: bool = True,
+        preset_observed_type: list[str] | None = None,
     ) -> None:
         """Compute or load the statistics parameters of the model.
 
@@ -235,6 +236,15 @@ def compute_or_load_stat(
             wrapped_sampler = self._make_wrapped_sampler(sampled_func)
             self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
+        if stat_file_path is not None and self.type_map is not None:
+            stat_file_path /= " ".join(self.type_map)
+
+        self._collect_and_set_observed_type(
+            sampled_func if callable(sampled_func) else lambda: sampled_func,
+            stat_file_path,
+            preset_observed_type,
+        )
+
     def forward_atomic(
         self,
         extended_coord: Array,
 
@@ -50,6 +50,9 @@
 from deepmd.infer.deep_wfc import (
     DeepWFC,
 )
+from deepmd.utils.econf_embd import (
+    sort_element_type,
+)
 
 if TYPE_CHECKING:
     import ase.neighborlist
@@ -403,6 +406,31 @@ def get_model_def_script(self) -> dict:
         """Get model definition script."""
         return json.loads(self.dp.get_model_def_script())
 
+    def get_observed_types(self) -> dict:
+        """Get observed types (elements) of the model during data statistics.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the information of observed type in the model:
+            - 'type_num': the total number of observed types in this model.
+            - 'observed_type': a list of the observed types in this model.
+        """
+        # Try metadata first (from model_def_script)
+        model_def_script = self.get_model_def_script()
+        observed_type_list = model_def_script.get("info", {}).get("observed_type")
+        if observed_type_list is not None:
+            return {
+                "type_num": len(observed_type_list),
+                "observed_type": observed_type_list,
+            }
+        # Fallback: bias-based approach for old models
+        observed_type_list = self.dp.get_observed_type_list()
+        return {
+            "type_num": len(observed_type_list),
+            "observed_type": sort_element_type(observed_type_list),
+        }
+
     def get_model(self) -> "BaseModel":
         """Get the dpmodel BaseModel.
 
 
@@ -381,6 +381,8 @@ def get_out_bias(self) -> Array:
         def get_observed_type_list(self) -> list[str]:
             """Get observed types (elements) of the model during data statistics.
 
+            Bias-based fallback for old models without metadata.
+
             Returns
             -------
             list[str]
@@ -718,6 +720,7 @@ def compute_or_load_stat(
             self,
             sampled_func: Callable[[], Any],
             stat_file_path: DPPath | None = None,
+            preset_observed_type: list[str] | None = None,
         ) -> None:
             """Compute or load the statistics parameters of the model.
 
@@ -728,8 +731,12 @@ def compute_or_load_stat(
                 data systems.
             stat_file_path
                 The path to the stat file.
+            preset_observed_type
+                User-specified observed types that take highest priority.
             """
-            self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
+            self.atomic_model.compute_or_load_stat(
+                sampled_func, stat_file_path, preset_observed_type=preset_observed_type
+            )
 
         def get_model_def_script(self) -> str:
             """Get the model definition script."""
 
@@ -29,6 +29,62 @@
 log = logging.getLogger(__name__)
 
 
+def collect_observed_types(sampled: list[dict], type_map: list[str]) -> list[str]:
+    """Collect observed element types from sampled training data.
+
+    Parameters
+    ----------
+    sampled : list[dict]
+        Sampled data from different data systems. Each dict must contain
+        ``"atype"`` with shape ``[nframes, natoms]``.
+    type_map : list[str]
+        Mapping from type index to element symbol.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of observed element symbols.
+    """
+    from deepmd.utils.econf_embd import (
+        sort_element_type,
+    )
+
+    observed_indices: set[int] = set()
+    for system in sampled:
+        atype = to_numpy_array(system["atype"])  # shape: [nframes, natoms]
+        observed_indices.update(np.unique(atype).tolist())
+    observed_types = [
+        type_map[i] for i in sorted(observed_indices) if i < len(type_map)
+    ]
+    return sort_element_type(observed_types)
+
+
+def _restore_observed_type_from_file(
+    stat_file_path: DPPath | None,
+) -> list[str] | None:
+    """Try to load observed_type from stat file."""
+    if stat_file_path is None:
+        return None
+    fp = stat_file_path / "observed_type"
+    if fp.is_file():
+        arr = fp.load_numpy()
+        # Decode bytes back to str if stored as bytes (for h5py compatibility)
+        return [x.decode() if isinstance(x, bytes) else x for x in arr.tolist()]
+    return None
+
+
+def _save_observed_type_to_file(
+    stat_file_path: DPPath | None, observed_type: list[str]
+) -> None:
+    """Save observed_type to stat file."""
+    if stat_file_path is None:
+        return
+    stat_file_path.mkdir(exist_ok=True, parents=True)
+    fp = stat_file_path / "observed_type"
+    # Use bytes dtype for h5py compatibility (h5py cannot store Unicode strings)
+    fp.save_numpy(np.array(observed_type, dtype="S"))
+
+
 def _restore_from_file(
     stat_file_path: DPPath,
     keys: list[str],
 
@@ -126,6 +126,13 @@ def show(
             )
         else:
             log.info("The observed types for this model: ")
-            observed_types = model.get_observed_types()
+            observed_type_list = model_params.get("info", {}).get("observed_type")
+            if observed_type_list is not None:
+                observed_types = {
+                    "type_num": len(observed_type_list),
+                    "observed_type": observed_type_list,
+                }
+            else:
+                observed_types = model.get_observed_types()
             log.info(f"Number of observed types: {observed_types['type_num']} ")
             log.info(f"Observed types: {observed_types['observed_type']} ")
@@ -736,6 +736,14 @@ def get_observed_types(self) -> dict:
             - 'type_num': the total number of observed types in this model.
             - 'observed_type': a list of the observed types in this model.
         """
+        # Try metadata first (from model_def_script, already a dict)
+        observed_type_list = self.model_def_script.get("info", {}).get("observed_type")
+        if observed_type_list is not None:
+            return {
+                "type_num": len(observed_type_list),
+                "observed_type": observed_type_list,
+            }
+        # Fallback: bias-based approach for old models
         observed_type_list = self.dp.model["Default"].get_observed_type_list()
         return {
             "type_num": len(observed_type_list),
 
@@ -90,6 +90,45 @@ def __init__(
         self.rcond = rcond
         self.preset_out_bias = preset_out_bias
         self.data_stat_protect = data_stat_protect
+        self._observed_type: list[str] | None = None
+
+    @property
+    def observed_type(self) -> list[str] | None:
+        """Get the observed element type list from data statistics."""
+        return self._observed_type
+
+    def _collect_and_set_observed_type(
+        self,
+        sampled_func: Callable[[], list[dict]],
+        stat_file_path: "DPPath | None",
+        preset_observed_type: list[str] | None,
+    ) -> None:
+        """Collect observed types with priority: preset > stat_file > compute.
+
+        Parameters
+        ----------
+        sampled_func
+            The lazy sampled function to get data frames.
+        stat_file_path
+            The path to the statistics files (should already include type_map suffix).
+        preset_observed_type
+            User-specified observed types that take highest priority.
+        """
+        from deepmd.dpmodel.utils.stat import (
+            _restore_observed_type_from_file,
+            _save_observed_type_to_file,
+            collect_observed_types,
+        )
+
+        if preset_observed_type is not None:
+            self._observed_type = preset_observed_type
+        else:
+            observed = _restore_observed_type_from_file(stat_file_path)
+            if observed is None:
+                sampled = sampled_func()
+                observed = collect_observed_types(sampled, self.type_map)
+                _save_observed_type_to_file(stat_file_path, observed)
+            self._observed_type = observed
 
     def init_out_stat(self) -> None:
         """Initialize the output bias."""
@@ -376,6 +415,7 @@ def compute_or_load_stat(
         merged: Callable[[], list[dict]] | list[dict],
         stat_file_path: DPPath | None = None,
         compute_or_load_out_stat: bool = True,
+        preset_observed_type: list[str] | None = None,
     ) -> NoReturn:
         """
         Compute or load the statistics parameters of the model,