Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions deepmd/dpmodel/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ class EnergyLoss(Loss):
If true, use L2 norm of force vectors for loss calculation when loss_func='mae' or use_huber is True.
Instead of computing loss on force components, computes loss on ||F_pred - F_label||_2.
This treats the force vector as a whole rather than three independent components.
intensive : bool
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Comment on lines +94 to +99
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intensive parameter doc says losses are normalized by 1/N² when enabled, but in code it only affects the non-Huber MSE energy/virial branches; MAE remains 1/N and the Huber path scales residuals by 1/N before applying Huber. Please narrow the wording so it accurately reflects which loss modes are impacted.

Suggested change
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
If true, the non-Huber MSE energy and virial losses use intensive normalization,
i.e. a 1/N^2 factor instead of the legacy 1/N scaling. This matches per-atom
RMSE-style normalization for those terms. MAE and Huber modes use different
scaling and are not affected in the same way by this flag.
If false (default), the legacy normalization is used for the affected terms.
The default is false for backward compatibility with models trained using
deepmd-kit <= 3.0.1.

Copilot uses AI. Check for mistakes.
**kwargs
Other keyword arguments.
"""
Expand All @@ -116,6 +123,7 @@ def __init__(
huber_delta: float | list[float] = 0.01,
loss_func: str = "mse",
f_use_norm: bool = False,
intensive: bool = False,
**kwargs: Any,
) -> None:
# Validate loss_func
Expand Down Expand Up @@ -155,6 +163,7 @@ def __init__(
self.use_huber = use_huber
self.huber_delta = huber_delta
self.f_use_norm = f_use_norm
self.intensive = intensive
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
raise RuntimeError(
"f_use_norm can only be True when use_huber or loss_func='mae'."
Expand Down Expand Up @@ -256,11 +265,15 @@ def call(

loss = 0
more_loss = {}
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive else 1
if self.has_e:
if self.loss_func == "mse":
l2_ener_loss = xp.mean(xp.square(energy - energy_hat))
if not self.use_huber:
loss += atom_norm_ener * (pref_e * l2_ener_loss)
loss += atom_norm_ener**norm_exp * (pref_e * l2_ener_loss)
Comment on lines +268 to +276
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are existing cross-backend loss consistency tests (e.g. source/tests/consistent/loss/test_ener.py) but they don’t exercise the new intensive normalization. Please add test coverage that toggles intensive=True and verifies the expected 1/N² scaling for the non-Huber MSE energy/virial terms (and that serialization round-trips preserve the flag).

Copilot uses AI. Check for mistakes.
else:
l_huber_loss = custom_huber_loss(
atom_norm_ener * energy,
Expand Down Expand Up @@ -335,7 +348,7 @@ def call(
xp.square(virial_hat_reshape - virial_reshape),
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * virial_reshape,
Expand Down Expand Up @@ -525,7 +538,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -546,6 +559,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive": self.intensive,
}

@classmethod
Expand All @@ -563,6 +577,10 @@ def deserialize(cls, data: dict) -> "Loss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Backward compatibility: version 1-2 used legacy normalization
if version < 3:
data.setdefault("intensive", False)
return cls(**data)
26 changes: 22 additions & 4 deletions deepmd/dpmodel/loss/ener_spin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ class EnergySpinLoss(Loss):
if true, the energy will be computed as \sum_i c_i E_i
loss_func : str
Loss function type: 'mse' or 'mae'.
intensive : bool
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Comment on lines +54 to +59
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EnergySpinLoss supports MAE, but the intensive doc currently claims 1/N² normalization generally. In the code, intensive only changes the MSE energy/virial normalization via atom_norm**norm_exp; MAE is handled differently. Please update the docstring to state explicitly which loss functions/terms are affected.

Suggested change
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
If true, the MSE energy and virial terms use intensive normalization,
i.e. an additional normalization by the square of the number of atoms
(1/N^2) instead of the legacy (1/N) behavior. This keeps those MSE loss
terms consistent with per-atom RMSE reporting and less dependent on
system size. This option does not change the MAE formulation, which is
handled separately. The default is false for backward compatibility with
models trained using deepmd-kit <= 3.0.1.

Copilot uses AI. Check for mistakes.
**kwargs
Other keyword arguments.
"""
Expand All @@ -69,6 +76,7 @@ def __init__(
limit_pref_ae: float = 0.0,
enable_atom_ener_coeff: bool = False,
loss_func: str = "mse",
intensive: bool = False,
**kwargs: Any,
) -> None:
valid_loss_funcs = ["mse", "mae"]
Expand All @@ -89,6 +97,7 @@ def __init__(
self.start_pref_ae = start_pref_ae
self.limit_pref_ae = limit_pref_ae
self.enable_atom_ener_coeff = enable_atom_ener_coeff
self.intensive = intensive
self.has_e = self.start_pref_e != 0.0 or self.limit_pref_e != 0.0
self.has_fr = self.start_pref_fr != 0.0 or self.limit_pref_fr != 0.0
self.has_fm = self.start_pref_fm != 0.0 or self.limit_pref_fm != 0.0
Expand Down Expand Up @@ -117,6 +126,10 @@ def call(
loss = 0
more_loss = {}
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive else 1

if self.has_e:
energy_pred = model_dict["energy"]
Expand All @@ -130,7 +143,7 @@ def call(
energy_pred = xp.sum(atom_ener_coeff * atom_ener_pred, axis=1)
if self.loss_func == "mse":
l2_ener_loss = xp.mean(xp.square(energy_pred - energy_label))
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
Comment on lines 128 to +146
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Existing loss tests (e.g. source/tests/consistent/loss/test_ener_spin.py) don’t cover the new intensive option. Please extend tests to run with intensive=True and validate the expected 1/N² scaling for MSE energy/virial (and confirm serialize()/deserialize() preserve the flag/version bump).

Copilot uses AI. Check for mistakes.
more_loss["rmse_e"] = self.display_if_exist(
xp.sqrt(l2_ener_loss) * atom_norm, find_energy
)
Expand Down Expand Up @@ -238,7 +251,7 @@ def call(
diff_v = virial_label - virial_pred
if self.loss_func == "mse":
l2_virial_loss = xp.mean(xp.square(diff_v))
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
more_loss["rmse_v"] = self.display_if_exist(
xp.sqrt(l2_virial_loss) * atom_norm, find_virial
)
Expand Down Expand Up @@ -326,7 +339,7 @@ def serialize(self) -> dict:
"""Serialize the loss module."""
return {
"@class": "EnergySpinLoss",
"@version": 1,
"@version": 2,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -340,12 +353,17 @@ def serialize(self) -> dict:
"limit_pref_ae": self.limit_pref_ae,
"enable_atom_ener_coeff": self.enable_atom_ener_coeff,
"loss_func": self.loss_func,
"intensive": self.intensive,
}

@classmethod
def deserialize(cls, data: dict) -> "EnergySpinLoss":
"""Deserialize the loss module."""
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
version = data.pop("@version")
check_version_compatibility(version, 2, 1)
data.pop("@class")
# Backward compatibility: version 1 used legacy normalization
if version < 2:
data.setdefault("intensive", False)
return cls(**data)
26 changes: 22 additions & 4 deletions deepmd/pd/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
use_huber: bool = False,
huber_delta: float | list[float] = 0.01,
f_use_norm: bool = False,
intensive: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on energy, force and virial.
Expand Down Expand Up @@ -119,6 +120,13 @@ def __init__(
f_use_norm : bool
If True, use L2 norm of force vectors for loss calculation.
Not implemented in PD backend, only for serialization compatibility.
intensive : bool
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Comment on lines +124 to +129
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intensive docstring currently presents normalization as a simple 1/N² vs 1/N toggle. In practice here, the new exponent is only used for non-Huber MSE energy/virial; MAE remains 1/N and the Huber branch already works on per-atom residuals (scaled by 1/N) regardless. Please update the doc to reflect these specifics so users know when intensive actually changes the loss.

Suggested change
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Controls the normalization used for energy and virial losses in the
non-Huber MSE path. If true, those terms use intensive normalization
(1/N^2) instead of the legacy normalization (1/N), where N is the
number of atoms. This makes the corresponding MSE loss consistent with
per-atom RMSE reporting.
This flag does not change MAE normalization, which remains 1/N, and it
does not affect the Huber branch, which already operates on per-atom
residuals scaled by 1/N regardless of this setting. The default is false
for backward compatibility with models trained using deepmd-kit <= 3.0.1.

Copilot uses AI. Check for mistakes.
**kwargs
Other keyword arguments.
"""
Expand Down Expand Up @@ -161,6 +169,7 @@ def __init__(
self.inference = inference
self.use_huber = use_huber
self.huber_delta = huber_delta
self.intensive = intensive
(
self._huber_delta_energy,
self._huber_delta_force,
Expand Down Expand Up @@ -218,6 +227,10 @@ def forward(
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive else 1
if self.has_e and "energy" in model_pred and "energy" in label:
energy_pred = model_pred["energy"]
energy_label = label["energy"]
Expand All @@ -243,7 +256,7 @@ def forward(
l2_ener_loss.detach(), find_energy
)
if not self.use_huber:
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * energy_pred,
Expand Down Expand Up @@ -414,7 +427,7 @@ def forward(
l2_virial_loss.detach(), find_virial
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * model_pred["virial"].reshape([-1]),
Expand Down Expand Up @@ -564,7 +577,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -585,6 +598,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive": self.intensive,
}

@classmethod
Expand All @@ -602,8 +616,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Handle backward compatibility for older versions without intensive
if version < 3:
data.setdefault("intensive", False)
return cls(**data)


Expand Down
26 changes: 22 additions & 4 deletions deepmd/pt/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
use_huber: bool = False,
f_use_norm: bool = False,
huber_delta: float | list[float] = 0.01,
intensive: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on energy, force and virial.
Expand Down Expand Up @@ -120,6 +121,13 @@ def __init__(
The threshold delta (D) used for Huber loss, controlling transition between
L2 and L1 loss. It can be either one float shared by all terms or a list of
three values ordered as [energy, force, virial].
intensive : bool
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Comment on lines +125 to +130
Copy link

Copilot AI Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intensive docstring says energy/virial loss is normalized by 1/N² when enabled. In this implementation, 1/N² scaling is applied to the non-Huber MSE energy/virial terms; MAE remains 1/N and the Huber path normalizes residuals by 1/N before applying Huber (so intensive does not provide a pure 1/N vs 1/N² toggle there). Please clarify the docstring accordingly.

Suggested change
If true, energy and virial losses are computed as intensive quantities,
normalized by the square of the number of atoms (1/N^2). This ensures the loss
value is independent of system size and consistent with per-atom RMSE reporting.
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
with system size. The default is false for backward compatibility with models trained
using deepmd-kit <= 3.0.1.
Controls size normalization for energy and virial loss terms. For the non-Huber
MSE path, setting this to true applies 1/N^2 scaling, while false uses the legacy
1/N scaling. For MAE, the normalization remains 1/N. For Huber loss, residuals are
first normalized by 1/N before applying the Huber formula, so this option does not
provide a pure 1/N versus 1/N^2 toggle in that path. The default is false for
backward compatibility with models trained using deepmd-kit <= 3.0.1.

Copilot uses AI. Check for mistakes.
**kwargs
Other keyword arguments.
"""
Expand Down Expand Up @@ -163,6 +171,7 @@ def __init__(
self.inference = inference
self.use_huber = use_huber
self.f_use_norm = f_use_norm
self.intensive = intensive
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
raise RuntimeError(
"f_use_norm can only be True when use_huber or loss_func='mae'."
Expand Down Expand Up @@ -225,6 +234,10 @@ def forward(
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive else 1
if self.has_e and "energy" in model_pred and "energy" in label:
energy_pred = model_pred["energy"]
energy_label = label["energy"]
Expand All @@ -250,7 +263,7 @@ def forward(
l2_ener_loss.detach(), find_energy
)
if not self.use_huber:
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
else:
Comment on lines 265 to 267
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are tests comparing losses across backends, but they don’t assert the intended natoms-scaling explicitly. Since this change fixes a subtle normalization bug (MSE energy/virial terms should scale with 1/natoms^2), consider adding a small regression test that checks the loss contribution scales correctly when natoms changes, so future refactors can’t accidentally revert to 1/natoms.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

l_huber_loss = custom_huber_loss(
atom_norm * energy_pred,
Expand Down Expand Up @@ -432,7 +445,7 @@ def forward(
l2_virial_loss.detach(), find_virial
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * model_pred["virial"].reshape(-1),
Expand Down Expand Up @@ -599,7 +612,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -620,6 +633,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive": self.intensive,
}

@classmethod
Expand All @@ -637,8 +651,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Handle backward compatibility for older versions without intensive
if version < 3:
data.setdefault("intensive", False)
return cls(**data)


Expand Down
Loading
Loading