deepmodeling
diff --git a/‎deepmd/entrypoints/test.py‎
Lines changed: 30 additions & 14 deletions b/‎deepmd/entrypoints/test.py‎
Lines changed: 30 additions & 14 deletions
diff --git a/‎deepmd/pt/loss/xas.py‎
Lines changed: 200 additions & 20 deletions b/‎deepmd/pt/loss/xas.py‎
Lines changed: 200 additions & 20 deletions
diff --git a/‎deepmd/pt/model/atomic_model/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎deepmd/pt/model/atomic_model/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -887,17 +887,20 @@ def test_property(
             high_prec=True,
         )
 
+    is_xas = var_name == "xas"
+
     if dp.get_dim_fparam() > 0:
         data.add(
             "fparam", dp.get_dim_fparam(), atomic=False, must=True, high_prec=False
         )
     if dp.get_dim_aparam() > 0:
         data.add("aparam", dp.get_dim_aparam(), atomic=True, must=True, high_prec=False)
 
-    # sel_type: optional per-frame type index for element-wise mean reduction (XAS)
-    data.add(
-        "sel_type", 1, atomic=False, must=False, high_prec=False, default=float(-1)
-    )
+    # XAS requires sel_type.npy (per-frame absorbing element type index)
+    if is_xas:
+        data.add(
+            "sel_type", 1, atomic=False, must=True, high_prec=False
+        )
 
     test_data = data.get_test()
     mixed_type = data.mixed_type
@@ -923,12 +926,8 @@ def test_property(
     else:
         aparam = None
 
-    # detect whether this system provides sel_type (XAS-style reduction)
-    sel_type_raw = test_data["sel_type"][:numb_test, 0]  # [numb_test]
-    has_sel_type = bool((sel_type_raw >= 0).all())
-
-    # for sel_type reduction we need per-atom outputs
-    eval_atomic = has_atom_property or has_sel_type
+    # XAS: per-atom outputs are needed to average over absorbing-element atoms
+    eval_atomic = has_atom_property or is_xas
     ret = dp.eval(
         coord,
         box,
@@ -939,27 +938,44 @@ def test_property(
         mixed_type=mixed_type,
     )
 
-    if has_sel_type:
+    if is_xas:
         # ret[1]: per-atom property [numb_test, natoms, task_dim]
         atom_prop = ret[1].reshape([numb_test, natoms, dp.task_dim])
-        # atype for all frames
         if mixed_type:
             atype_frames = atype  # [numb_test, natoms]
         else:
             atype_frames = np.tile(atype, (numb_test, 1))  # [numb_test, natoms]
-        sel_type_int = sel_type_raw.astype(int)
+        sel_type_int = test_data["sel_type"][:numb_test, 0].astype(int)
         property = np.zeros([numb_test, dp.task_dim], dtype=atom_prop.dtype)
         for i in range(numb_test):
             t = sel_type_int[i]
             mask = atype_frames[i] == t  # [natoms]
             count = max(mask.sum(), 1)
             property[i] = atom_prop[i][mask].sum(axis=0) / count
+
+        # Add back the per-(type, edge) energy reference so output is in
+        # absolute eV (matching label format).  xas_e_ref is saved in the
+        # model checkpoint by XASLoss.compute_output_stats.
+        try:
+            xas_e_ref = dp.dp.model["Default"].atomic_model.xas_e_ref
+        except AttributeError:
+            xas_e_ref = None
+        if xas_e_ref is not None and fparam is not None:
+            import torch as _torch
+            edge_idx_all = _torch.tensor(
+                fparam.reshape(numb_test, -1)
+            ).argmax(dim=-1).numpy()
+            e_ref_np = xas_e_ref.cpu().numpy()  # [ntypes, nfparam, 2]
+            for i in range(numb_test):
+                t = sel_type_int[i]
+                e = int(edge_idx_all[i])
+                property[i, :2] += e_ref_np[t, e]
     else:
         property = ret[0]
 
     property = property.reshape([numb_test, dp.task_dim])
 
-    if has_atom_property:
+    if has_atom_property and not is_xas:
         aproperty = ret[1]
         aproperty = aproperty.reshape([numb_test, natoms * dp.task_dim])
 
 
@@ -1,21 +1,15 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
-from typing import (
-    Any,
-)
+from collections import defaultdict
+from typing import Any
 
+import numpy as np
 import torch
 import torch.nn.functional as F
 
-from deepmd.pt.loss.loss import (
-    TaskLoss,
-)
-from deepmd.pt.utils import (
-    env,
-)
-from deepmd.utils.data import (
-    DataRequirementItem,
-)
+from deepmd.pt.loss.loss import TaskLoss
+from deepmd.pt.utils import env
+from deepmd.utils.data import DataRequirementItem
 
 log = logging.getLogger(__name__)
 
@@ -28,10 +22,31 @@ class XASLoss(TaskLoss):
     in each training system) and takes their mean, then computes a loss against
     the per-frame XAS label.
 
+    Energy normalization
+    --------------------
+    XAS labels contain absolute edge energies (E_min, E_max in eV) that vary
+    enormously across element-edge pairs (H_K ~14 eV, Th_K ~110000 eV).
+    Training directly on absolute values causes gradient instability because
+    the energy dimensions dwarf the intensity dimensions.
+
+    ``compute_output_stats`` computes a reference energy ``e_ref[t, e]`` for
+    every ``(absorbing_type t, edge_index e)`` combination from the training
+    data and stores it as a registered buffer.  During training, ``forward``
+    normalises labels and predictions by subtracting the per-frame reference
+    so that the loss is computed on chemical shifts (±few eV) and normalised
+    intensities—quantities of comparable magnitude.
+
+    The buffer is saved in the model checkpoint, eliminating any need for
+    external normalisation files.
+
     Parameters
     ----------
     task_dim : int
         Output dimension of the fitting net (e.g. 102 = E_min + E_max + 100 pts).
+    ntypes : int
+        Number of atom types in the model.
+    nfparam : int
+        Length of the fparam one-hot vector (= number of edge types).
     var_name : str
         Property name, must match ``property_name`` in the fitting config.
     loss_func : str
@@ -45,6 +60,8 @@ class XASLoss(TaskLoss):
     def __init__(
         self,
         task_dim: int,
+        ntypes: int,
+        nfparam: int,
         var_name: str = "xas",
         loss_func: str = "smooth_mae",
         metric: list[str] = ["mae"],
@@ -53,11 +70,141 @@ def __init__(
     ) -> None:
         super().__init__()
         self.task_dim = task_dim
+        self.ntypes = ntypes
+        self.nfparam = nfparam
         self.var_name = var_name
         self.loss_func = loss_func
         self.metric = metric
         self.beta = beta
 
+        # e_ref[sel_type_idx, edge_idx, 0] = mean E_min  (eV)
+        # e_ref[sel_type_idx, edge_idx, 1] = mean E_max  (eV)
+        # Shape: [ntypes, nfparam, 2]. Filled by compute_output_stats; zero until then.
+        self.register_buffer(
+            "e_ref",
+            torch.zeros(ntypes, nfparam, 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION),
+        )
+
+    # ------------------------------------------------------------------
+    # Stat phase: compute per-(absorbing_type, edge) reference energies
+    # ------------------------------------------------------------------
+    def compute_output_stats(
+        self,
+        sampled: list[dict],
+        model: "torch.nn.Module | None" = None,
+    ) -> None:
+        """Compute ``e_ref`` and fix model energy-dim bias/std.
+
+        Called once before training starts.  Requires ``xas``, ``sel_type``,
+        and ``fparam`` in at least some samples.
+
+        Parameters
+        ----------
+        sampled : list[dict]
+            List of data batches from ``make_stat_input``.
+        model : nn.Module, optional
+            The full DeePMD model.  When given, the per-atom property model's
+            ``out_bias`` and ``out_std`` for the two energy dimensions (E_min,
+            E_max) are reset to 0 / 1 so the NN predicts *chemical shifts*
+            (±few eV) instead of absolute energies (~thousands of eV).
+            Without this reset the stat-initialised ``out_std ≈ 26 000 eV``
+            amplifies weight-update steps by 26 000×, causing immediate
+            gradient explosion.
+        """
+        accum: dict[tuple[int, int], list] = defaultdict(list)
+
+        for frame in sampled:
+            if (
+                self.var_name not in frame
+                or "sel_type" not in frame
+                or "fparam" not in frame
+            ):
+                continue
+            xas = frame[self.var_name]  # tensor, various shapes
+            sel_type = frame["sel_type"]
+            fparam = frame["fparam"]
+
+            # flatten to [nf, task_dim], [nf], [nf, nfparam]
+            xas = xas.reshape(-1, self.task_dim)
+            sel_type = sel_type.reshape(-1).long()
+            fparam = fparam.reshape(-1, self.nfparam)
+            edge_idx = fparam.argmax(dim=-1)
+
+            nf = xas.shape[0]
+            for i in range(nf):
+                t = int(sel_type[i].item())
+                e = int(edge_idx[i].item())
+                if 0 <= t < self.ntypes and 0 <= e < self.nfparam:
+                    accum[(t, e)].append(xas[i, :2].detach().cpu().numpy())
+
+        if not accum:
+            log.warning(
+                "XASLoss.compute_output_stats: no frames with xas+sel_type+fparam found; "
+                "e_ref remains zero. Training may be unstable."
+            )
+            return
+
+        e_ref = torch.zeros(
+            self.ntypes, self.nfparam, 2, dtype=env.GLOBAL_PT_FLOAT_PRECISION
+        )
+        for (t, e), vals in accum.items():
+            e_ref[t, e] = torch.tensor(
+                np.mean(vals, axis=0), dtype=env.GLOBAL_PT_FLOAT_PRECISION
+            )
+            log.info(
+                f"XASLoss e_ref: type={t}, edge={e} -> "
+                f"E_min_ref={float(e_ref[t,e,0]):.2f} eV, "
+                f"E_max_ref={float(e_ref[t,e,1]):.2f} eV  "
+                f"(n={len(vals)})"
+            )
+
+        self.e_ref.copy_(e_ref)
+        log.info(
+            f"XASLoss: e_ref computed for {len(accum)} (sel_type, edge) combinations."
+        )
+
+        if model is not None:
+            try:
+                am = model.atomic_model
+
+                # 1. Copy e_ref into the model's own buffer so it is saved
+                #    in the checkpoint and available at inference time without
+                #    any external reference file (analogous to out_bias).
+                if getattr(am, "xas_e_ref", None) is not None:
+                    am.xas_e_ref.copy_(e_ref.to(am.xas_e_ref.dtype))
+                    log.info("XASLoss: copied e_ref → model.atomic_model.xas_e_ref.")
+
+                # 2. Reset energy-dim out_bias/out_std so the NN predicts
+                #    chemical shifts instead of absolute energies.
+                #
+                #    Why this is necessary
+                #    ----------------------
+                #    The model stat phase initialises
+                #      out_bias[:, :2] ≈ global_mean(E_min, E_max) ≈ 19 000 eV
+                #      out_std[:, :2]  ≈ global_std(E_min, E_max)  ≈ 26 000 eV
+                #    so  atom_xas[:, 0] = NN_raw[:, 0] * 26 000 + 19 000.
+                #    A single Adam step changes NN_raw by ~lr, which changes
+                #    the physical output by lr × 26 000 = 2.7 eV — the same
+                #    instability as out_bias for energy fitting if the reference
+                #    is wrong.  With out_std=1 / out_bias=0, the NN output for
+                #    energy dims is interpreted directly as a chemical shift
+                #    (target ≈ label − e_ref ≈ ±few eV), keeping gradient
+                #    magnitudes O(1) and training stable.
+                key_idx = am.bias_keys.index(self.var_name)
+                with torch.no_grad():
+                    am.out_bias[key_idx, :, :2] = 0.0
+                    am.out_std[key_idx, :, :2] = 1.0
+                log.info(
+                    "XASLoss: reset out_bias[:,:2]=0 and out_std[:,:2]=1 "
+                    "for energy dims (model predicts chemical shifts; "
+                    "xas_e_ref restores absolute energies at inference)."
+                )
+            except Exception as exc:
+                log.warning(f"XASLoss: could not update model energy-dim stats: {exc}")
+
+    # ------------------------------------------------------------------
+    # Forward
+    # ------------------------------------------------------------------
     def forward(
         self,
         input_dict: dict[str, torch.Tensor],
@@ -76,7 +223,7 @@ def forward(
         # sel_type from label: [nf, 1] float → [nf] int
         sel_type = label["sel_type"][:, 0].long()
 
-        # element-wise mean: for each frame average over atoms of sel_type
+        # element-wise mean: average atom_prop over atoms of sel_type per frame
         nf, nloc, td = atom_prop.shape
         pred = torch.zeros(nf, td, dtype=atom_prop.dtype, device=atom_prop.device)
         for i in range(nf):
@@ -87,27 +234,60 @@ def forward(
 
         label_xas = label[self.var_name]  # [nf, task_dim]
 
+        # --- per-frame reference energy lookup ---
+        # edge_idx = argmax of one-hot fparam
+        fparam = input_dict.get("fparam")
+        if fparam is not None and fparam.numel() > 0:
+            edge_idx = fparam.reshape(nf, -1).argmax(dim=-1).clamp(0, self.nfparam - 1)
+        else:
+            edge_idx = torch.zeros(nf, dtype=torch.long, device=pred.device)
+
+        # e_ref_frame: [nf, 2]  (E_min_ref, E_max_ref for each frame)
+        e_ref_frame = self.e_ref[sel_type, edge_idx]  # [nf, 2]
+
+        # Shift the energy-dim TARGETS only.
+        #
+        # After compute_output_stats has reset out_bias[:,:2]=0 / out_std[:,:2]=1,
+        # the model outputs raw NN values ≈ 0 for dims 0,1.  We train those
+        # dims against (label − e_ref), i.e. the chemical shift (±few eV),
+        # keeping gradient magnitudes O(1).  Intensity dims (2:) are trained
+        # against the original label values unchanged.
+        #
+        # At inference, we add e_ref back to get the absolute edge energy.
+        label_shifted = label_xas.clone()
+        label_shifted[:, :2] = label_xas[:, :2] - e_ref_frame
+
+        # --- loss ---
         loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
         if self.loss_func == "smooth_mae":
-            loss += F.smooth_l1_loss(pred, label_xas, reduction="sum", beta=self.beta)
+            loss += F.smooth_l1_loss(
+                pred, label_shifted, reduction="sum", beta=self.beta
+            )
         elif self.loss_func == "mae":
-            loss += F.l1_loss(pred, label_xas, reduction="sum")
+            loss += F.l1_loss(pred, label_shifted, reduction="sum")
         elif self.loss_func == "mse":
-            loss += F.mse_loss(pred, label_xas, reduction="sum")
+            loss += F.mse_loss(pred, label_shifted, reduction="sum")
         elif self.loss_func == "rmse":
-            loss += torch.sqrt(F.mse_loss(pred, label_xas, reduction="mean"))
+            loss += torch.sqrt(F.mse_loss(pred, label_shifted, reduction="mean"))
         else:
             raise RuntimeError(f"Unknown loss function: {self.loss_func}")
 
+        # --- metrics ---
         more_loss: dict[str, torch.Tensor] = {}
         if "mae" in self.metric:
-            more_loss["mae"] = F.l1_loss(pred, label_xas, reduction="mean").detach()
+            more_loss["mae"] = F.l1_loss(
+                pred, label_shifted, reduction="mean"
+            ).detach()
         if "rmse" in self.metric:
             more_loss["rmse"] = torch.sqrt(
-                F.mse_loss(pred, label_xas, reduction="mean")
+                F.mse_loss(pred, label_shifted, reduction="mean")
             ).detach()
 
-        model_pred[self.var_name] = pred
+        # Absolute prediction: add e_ref back to energy dims for eval / output
+        pred_abs = pred.clone()
+        pred_abs[:, :2] = pred[:, :2] + e_ref_frame
+        model_pred[self.var_name] = pred_abs
+
         return model_pred, loss, more_loss
 
     @property
 
@@ -41,6 +41,7 @@
 )
 from .property_atomic_model import (
     DPPropertyAtomicModel,
+    DPXASAtomicModel,
 )
 
 __all__ = [
@@ -51,6 +52,7 @@
     "DPEnergyAtomicModel",
     "DPPolarAtomicModel",
     "DPPropertyAtomicModel",
+    "DPXASAtomicModel",
     "DPZBLLinearEnergyAtomicModel",
     "LinearEnergyAtomicModel",
     "PairTabAtomicModel",
Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@`
`41`	`41`	`)`
`42`	`42`	`from .property_atomic_model import (`
`43`	`43`	`DPPropertyAtomicModel,`
	`44`	`+ DPXASAtomicModel,`
`44`	`45`	`)`
`45`	`46`
`46`	`47`	`__all__ = [`
`@@ -51,6 +52,7 @@`
`51`	`52`	`"DPEnergyAtomicModel",`
`52`	`53`	`"DPPolarAtomicModel",`
`53`	`54`	`"DPPropertyAtomicModel",`
	`55`	`+ "DPXASAtomicModel",`
`54`	`56`	`"DPZBLLinearEnergyAtomicModel",`
`55`	`57`	`"LinearEnergyAtomicModel",`
`56`	`58`	`"PairTabAtomicModel",`