Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f261151
fix: Square atom_norm in non-Huber energy and virial loss calculations.
anyangml Mar 23, 2026
247f053
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 23, 2026
1bea859
Merge branch 'master' into fix/rmse-loss-normalization
iProzd Apr 13, 2026
11be908
feat(loss): add intensive parameter for backward-compatible RMSE norm…
Copilot Apr 16, 2026
e8331ab
docs(loss): improve comments explaining intensive normalization behavior
Copilot Apr 16, 2026
601b2ef
fix(loss): change intensive default to False for backward compatibility
Copilot Apr 16, 2026
e86952b
fix(loss): add intensive key to serialization for all backends
Copilot Apr 16, 2026
38c97ff
fix(loss): add intensive parameter to TF EnerSpinLoss class
Copilot Apr 17, 2026
60c5ff9
Merge pull request #4 from anyangml/copilot/add-switch-rmse-normaliza…
anyangml Apr 17, 2026
b097404
Merge branch 'master' into fix/rmse-loss-normalization
iProzd Apr 17, 2026
736e731
fix(loss): update intensive parameter documentation and add test cove…
Copilot Apr 19, 2026
45cffad
Merge pull request #5 from anyangml/copilot/update-code-based-on-revi…
anyangml Apr 19, 2026
2e9e600
fix: address PR review comments for intensive normalization
Copilot Apr 20, 2026
dc49c1f
Merge pull request #6 from anyangml/copilot/apply-comments-from-threa…
anyangml Apr 20, 2026
2ce4029
Update deepmd/utils/argcheck.py
anyangml Apr 20, 2026
0bdd46f
doc: add explaination
anyangml Apr 20, 2026
c856d72
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 20, 2026
e329579
Merge branch 'deepmodeling:master' into fix/rmse-loss-normalization
anyangml Apr 21, 2026
29a7833
chore: typo fix
anyangml Apr 21, 2026
a22767f
Update default version for backward compatibility
anyangml Apr 23, 2026
be51371
chore: rename param
anyangml Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions deepmd/dpmodel/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ class EnergyLoss(Loss):
If true, use L2 norm of force vectors for loss calculation when loss_func='mae' or use_huber is True.
Instead of computing loss on force components, computes loss on ||F_pred - F_label||_2.
This treats the force vector as a whole rather than three independent components.
intensive_ener_virial : bool
If true, the non-Huber MSE energy and virial losses use intensive normalization,
i.e. a 1/N^2 factor instead of the legacy 1/N scaling. This matches per-atom
RMSE-style normalization for those terms. MAE and Huber modes use different
scaling and are not affected in the same way by this flag.
If false (default), the legacy normalization is used for the affected terms.
The default is false for backward compatibility with models trained using
deepmd-kit <= 3.1.3.
**kwargs
Other keyword arguments.
"""
Expand All @@ -116,6 +124,7 @@ def __init__(
huber_delta: float | list[float] = 0.01,
loss_func: str = "mse",
f_use_norm: bool = False,
intensive_ener_virial: bool = False,
**kwargs: Any,
) -> None:
# Validate loss_func
Expand Down Expand Up @@ -155,6 +164,7 @@ def __init__(
self.use_huber = use_huber
self.huber_delta = huber_delta
self.f_use_norm = f_use_norm
self.intensive_ener_virial = intensive_ener_virial
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
raise RuntimeError(
"f_use_norm can only be True when use_huber or loss_func='mae'."
Expand Down Expand Up @@ -256,11 +266,15 @@ def call(

loss = 0
more_loss = {}
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive_ener_virial=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive_ener_virial=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive_ener_virial else 1
if self.has_e:
if self.loss_func == "mse":
l2_ener_loss = xp.mean(xp.square(energy - energy_hat))
if not self.use_huber:
loss += atom_norm_ener * (pref_e * l2_ener_loss)
loss += atom_norm_ener**norm_exp * (pref_e * l2_ener_loss)
Comment thread
anyangml marked this conversation as resolved.
else:
l_huber_loss = custom_huber_loss(
atom_norm_ener * energy,
Expand Down Expand Up @@ -335,7 +349,7 @@ def call(
xp.square(virial_hat_reshape - virial_reshape),
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * virial_reshape,
Expand Down Expand Up @@ -525,7 +539,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -546,6 +560,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive_ener_virial": self.intensive_ener_virial,
}

@classmethod
Expand All @@ -563,6 +578,10 @@ def deserialize(cls, data: dict) -> "Loss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Backward compatibility: version 1-2 used legacy normalization
if version < 3:
data.setdefault("intensive_ener_virial", False)
return cls(**data)
27 changes: 23 additions & 4 deletions deepmd/dpmodel/loss/ener_spin.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ class EnergySpinLoss(Loss):
if true, the energy will be computed as \sum_i c_i E_i
loss_func : str
Loss function type: 'mse' or 'mae'.
intensive_ener_virial : bool
If true, the MSE energy and virial terms use intensive normalization,
i.e. an additional normalization by the square of the number of atoms
(1/N^2) instead of the legacy (1/N) behavior. This keeps those MSE loss
terms consistent with per-atom RMSE reporting and less dependent on
system size. This option does not change the MAE formulation, which is
handled separately. The default is false for backward compatibility with
models trained using deepmd-kit <= 3.1.3.
**kwargs
Other keyword arguments.
"""
Expand All @@ -69,6 +77,7 @@ def __init__(
limit_pref_ae: float = 0.0,
enable_atom_ener_coeff: bool = False,
loss_func: str = "mse",
intensive_ener_virial: bool = False,
**kwargs: Any,
) -> None:
valid_loss_funcs = ["mse", "mae"]
Expand All @@ -89,6 +98,7 @@ def __init__(
self.start_pref_ae = start_pref_ae
self.limit_pref_ae = limit_pref_ae
self.enable_atom_ener_coeff = enable_atom_ener_coeff
self.intensive_ener_virial = intensive_ener_virial
self.has_e = self.start_pref_e != 0.0 or self.limit_pref_e != 0.0
self.has_fr = self.start_pref_fr != 0.0 or self.limit_pref_fr != 0.0
self.has_fm = self.start_pref_fm != 0.0 or self.limit_pref_fm != 0.0
Expand Down Expand Up @@ -117,6 +127,10 @@ def call(
loss = 0
more_loss = {}
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive_ener_virial=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive_ener_virial=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive_ener_virial else 1

if self.has_e:
energy_pred = model_dict["energy"]
Expand All @@ -130,7 +144,7 @@ def call(
energy_pred = xp.sum(atom_ener_coeff * atom_ener_pred, axis=1)
if self.loss_func == "mse":
l2_ener_loss = xp.mean(xp.square(energy_pred - energy_label))
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
Comment thread
anyangml marked this conversation as resolved.
more_loss["rmse_e"] = self.display_if_exist(
xp.sqrt(l2_ener_loss) * atom_norm, find_energy
)
Expand Down Expand Up @@ -238,7 +252,7 @@ def call(
diff_v = virial_label - virial_pred
if self.loss_func == "mse":
l2_virial_loss = xp.mean(xp.square(diff_v))
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
more_loss["rmse_v"] = self.display_if_exist(
xp.sqrt(l2_virial_loss) * atom_norm, find_virial
)
Expand Down Expand Up @@ -326,7 +340,7 @@ def serialize(self) -> dict:
"""Serialize the loss module."""
return {
"@class": "EnergySpinLoss",
"@version": 1,
"@version": 2,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -340,12 +354,17 @@ def serialize(self) -> dict:
"limit_pref_ae": self.limit_pref_ae,
"enable_atom_ener_coeff": self.enable_atom_ener_coeff,
"loss_func": self.loss_func,
"intensive_ener_virial": self.intensive_ener_virial,
}

@classmethod
def deserialize(cls, data: dict) -> "EnergySpinLoss":
"""Deserialize the loss module."""
data = data.copy()
check_version_compatibility(data.pop("@version"), 1, 1)
version = data.pop("@version")
check_version_compatibility(version, 2, 1)
data.pop("@class")
# Backward compatibility: version 1 used legacy normalization
if version < 2:
data.setdefault("intensive_ener_virial", False)
return cls(**data)
26 changes: 22 additions & 4 deletions deepmd/pd/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
use_huber: bool = False,
huber_delta: float | list[float] = 0.01,
f_use_norm: bool = False,
intensive_ener_virial: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on energy, force and virial.
Expand Down Expand Up @@ -119,6 +120,13 @@ def __init__(
f_use_norm : bool
If True, use L2 norm of force vectors for loss calculation.
Not implemented in PD backend, only for serialization compatibility.
intensive_ener_virial : bool
Controls size normalization for energy and virial loss terms. For the non-Huber
MSE path, setting this to true applies 1/N^2 scaling, while false uses the legacy
1/N scaling. For MAE, the normalization remains 1/N. For Huber loss, residuals are
first normalized by 1/N before applying the Huber formula, so this option does not
provide a pure 1/N versus 1/N^2 toggle in that path. The default is false for
backward compatibility with models trained using deepmd-kit <= 3.1.3.
**kwargs
Other keyword arguments.
"""
Expand Down Expand Up @@ -161,6 +169,7 @@ def __init__(
self.inference = inference
self.use_huber = use_huber
self.huber_delta = huber_delta
self.intensive_ener_virial = intensive_ener_virial
(
self._huber_delta_energy,
self._huber_delta_force,
Expand Down Expand Up @@ -218,6 +227,10 @@ def forward(
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive_ener_virial=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive_ener_virial=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive_ener_virial else 1
if self.has_e and "energy" in model_pred and "energy" in label:
energy_pred = model_pred["energy"]
energy_label = label["energy"]
Expand All @@ -243,7 +256,7 @@ def forward(
l2_ener_loss.detach(), find_energy
)
if not self.use_huber:
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * energy_pred,
Expand Down Expand Up @@ -414,7 +427,7 @@ def forward(
l2_virial_loss.detach(), find_virial
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * model_pred["virial"].reshape([-1]),
Expand Down Expand Up @@ -564,7 +577,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -585,6 +598,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive_ener_virial": self.intensive_ener_virial,
}

@classmethod
Expand All @@ -602,8 +616,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Handle backward compatibility for older versions without intensive_ener_virial
if version < 3:
data.setdefault("intensive_ener_virial", False)
return cls(**data)


Expand Down
26 changes: 22 additions & 4 deletions deepmd/pt/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
use_huber: bool = False,
f_use_norm: bool = False,
huber_delta: float | list[float] = 0.01,
intensive_ener_virial: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on energy, force and virial.
Expand Down Expand Up @@ -120,6 +121,13 @@ def __init__(
The threshold delta (D) used for Huber loss, controlling transition between
L2 and L1 loss. It can be either one float shared by all terms or a list of
three values ordered as [energy, force, virial].
intensive_ener_virial : bool
Controls size normalization for energy and virial loss terms. For the non-Huber
MSE path, setting this to true applies 1/N^2 scaling, while false uses the legacy
1/N scaling. For MAE, the normalization remains 1/N. For Huber loss, residuals are
first normalized by 1/N before applying the Huber formula, so this option does not
provide a pure 1/N versus 1/N^2 toggle in that path. The default is false for
backward compatibility with models trained using deepmd-kit <= 3.1.3.
**kwargs
Other keyword arguments.
"""
Expand Down Expand Up @@ -163,6 +171,7 @@ def __init__(
self.inference = inference
self.use_huber = use_huber
self.f_use_norm = f_use_norm
self.intensive_ener_virial = intensive_ener_virial
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
raise RuntimeError(
"f_use_norm can only be True when use_huber or loss_func='mae'."
Expand Down Expand Up @@ -225,6 +234,10 @@ def forward(
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
# Normalization exponent controls loss scaling with system size:
# - norm_exp=2 (intensive_ener_virial=True): loss uses 1/N² scaling, making it independent of system size
# - norm_exp=1 (intensive_ener_virial=False, legacy): loss uses 1/N scaling, which varies with system size
norm_exp = 2 if self.intensive_ener_virial else 1
if self.has_e and "energy" in model_pred and "energy" in label:
energy_pred = model_pred["energy"]
energy_label = label["energy"]
Expand All @@ -250,7 +263,7 @@ def forward(
l2_ener_loss.detach(), find_energy
)
if not self.use_huber:
loss += atom_norm * (pref_e * l2_ener_loss)
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
else:
Comment thread
anyangml marked this conversation as resolved.
l_huber_loss = custom_huber_loss(
atom_norm * energy_pred,
Expand Down Expand Up @@ -432,7 +445,7 @@ def forward(
l2_virial_loss.detach(), find_virial
)
if not self.use_huber:
loss += atom_norm * (pref_v * l2_virial_loss)
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
else:
l_huber_loss = custom_huber_loss(
atom_norm * model_pred["virial"].reshape(-1),
Expand Down Expand Up @@ -599,7 +612,7 @@ def serialize(self) -> dict:
"""
return {
"@class": "EnergyLoss",
"@version": 2,
"@version": 3,
"starter_learning_rate": self.starter_learning_rate,
"start_pref_e": self.start_pref_e,
"limit_pref_e": self.limit_pref_e,
Expand All @@ -620,6 +633,7 @@ def serialize(self) -> dict:
"huber_delta": self.huber_delta,
"loss_func": self.loss_func,
"f_use_norm": self.f_use_norm,
"intensive_ener_virial": self.intensive_ener_virial,
}

@classmethod
Expand All @@ -637,8 +651,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
The deserialized loss module
"""
data = data.copy()
check_version_compatibility(data.pop("@version"), 2, 1)
version = data.pop("@version")
check_version_compatibility(version, 3, 1)
data.pop("@class")
# Handle backward compatibility for older versions without intensive_ener_virial
if version < 3:
data.setdefault("intensive_ener_virial", False)
return cls(**data)


Expand Down
Loading
Loading