Skip to content

Commit 60c5ff9

Browse files
authored
Merge pull request #4 from anyangml/copilot/add-switch-rmse-normalization-fix
feat(loss): add intensive parameter for backward-compatible RMSE normalization
2 parents 1bea859 + 38c97ff commit 60c5ff9

7 files changed

Lines changed: 227 additions & 26 deletions

File tree

deepmd/dpmodel/loss/ener.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ class EnergyLoss(Loss):
9090
If true, use L2 norm of force vectors for loss calculation when loss_func='mae' or use_huber is True.
9191
Instead of computing loss on force components, computes loss on ||F_pred - F_label||_2.
9292
This treats the force vector as a whole rather than three independent components.
93+
intensive : bool
94+
If true, energy and virial losses are computed as intensive quantities,
95+
normalized by the square of the number of atoms (1/N^2). This ensures the loss
96+
value is independent of system size and consistent with per-atom RMSE reporting.
97+
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
98+
with system size. The default is false for backward compatibility with models trained
99+
using deepmd-kit <= 3.0.1.
93100
**kwargs
94101
Other keyword arguments.
95102
"""
@@ -116,6 +123,7 @@ def __init__(
116123
huber_delta: float | list[float] = 0.01,
117124
loss_func: str = "mse",
118125
f_use_norm: bool = False,
126+
intensive: bool = False,
119127
**kwargs: Any,
120128
) -> None:
121129
# Validate loss_func
@@ -155,6 +163,7 @@ def __init__(
155163
self.use_huber = use_huber
156164
self.huber_delta = huber_delta
157165
self.f_use_norm = f_use_norm
166+
self.intensive = intensive
158167
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
159168
raise RuntimeError(
160169
"f_use_norm can only be True when use_huber or loss_func='mae'."
@@ -256,11 +265,15 @@ def call(
256265

257266
loss = 0
258267
more_loss = {}
268+
# Normalization exponent controls loss scaling with system size:
269+
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
270+
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
271+
norm_exp = 2 if self.intensive else 1
259272
if self.has_e:
260273
if self.loss_func == "mse":
261274
l2_ener_loss = xp.mean(xp.square(energy - energy_hat))
262275
if not self.use_huber:
263-
loss += atom_norm_ener**2 * (pref_e * l2_ener_loss)
276+
loss += atom_norm_ener**norm_exp * (pref_e * l2_ener_loss)
264277
else:
265278
l_huber_loss = custom_huber_loss(
266279
atom_norm_ener * energy,
@@ -335,7 +348,7 @@ def call(
335348
xp.square(virial_hat_reshape - virial_reshape),
336349
)
337350
if not self.use_huber:
338-
loss += atom_norm**2 * (pref_v * l2_virial_loss)
351+
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
339352
else:
340353
l_huber_loss = custom_huber_loss(
341354
atom_norm * virial_reshape,
@@ -525,7 +538,7 @@ def serialize(self) -> dict:
525538
"""
526539
return {
527540
"@class": "EnergyLoss",
528-
"@version": 2,
541+
"@version": 3,
529542
"starter_learning_rate": self.starter_learning_rate,
530543
"start_pref_e": self.start_pref_e,
531544
"limit_pref_e": self.limit_pref_e,
@@ -546,6 +559,7 @@ def serialize(self) -> dict:
546559
"huber_delta": self.huber_delta,
547560
"loss_func": self.loss_func,
548561
"f_use_norm": self.f_use_norm,
562+
"intensive": self.intensive,
549563
}
550564

551565
@classmethod
@@ -563,6 +577,10 @@ def deserialize(cls, data: dict) -> "Loss":
563577
The deserialized loss module
564578
"""
565579
data = data.copy()
566-
check_version_compatibility(data.pop("@version"), 2, 1)
580+
version = data.pop("@version")
581+
check_version_compatibility(version, 3, 1)
567582
data.pop("@class")
583+
# Backward compatibility: version 1-2 used legacy normalization
584+
if version < 3:
585+
data.setdefault("intensive", False)
568586
return cls(**data)

deepmd/dpmodel/loss/ener_spin.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,13 @@ class EnergySpinLoss(Loss):
5050
if true, the energy will be computed as \sum_i c_i E_i
5151
loss_func : str
5252
Loss function type: 'mse' or 'mae'.
53+
intensive : bool
54+
If true, energy and virial losses are computed as intensive quantities,
55+
normalized by the square of the number of atoms (1/N^2). This ensures the loss
56+
value is independent of system size and consistent with per-atom RMSE reporting.
57+
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
58+
with system size. The default is false for backward compatibility with models trained
59+
using deepmd-kit <= 3.0.1.
5360
**kwargs
5461
Other keyword arguments.
5562
"""
@@ -69,6 +76,7 @@ def __init__(
6976
limit_pref_ae: float = 0.0,
7077
enable_atom_ener_coeff: bool = False,
7178
loss_func: str = "mse",
79+
intensive: bool = False,
7280
**kwargs: Any,
7381
) -> None:
7482
valid_loss_funcs = ["mse", "mae"]
@@ -89,6 +97,7 @@ def __init__(
8997
self.start_pref_ae = start_pref_ae
9098
self.limit_pref_ae = limit_pref_ae
9199
self.enable_atom_ener_coeff = enable_atom_ener_coeff
100+
self.intensive = intensive
92101
self.has_e = self.start_pref_e != 0.0 or self.limit_pref_e != 0.0
93102
self.has_fr = self.start_pref_fr != 0.0 or self.limit_pref_fr != 0.0
94103
self.has_fm = self.start_pref_fm != 0.0 or self.limit_pref_fm != 0.0
@@ -117,6 +126,10 @@ def call(
117126
loss = 0
118127
more_loss = {}
119128
atom_norm = 1.0 / natoms
129+
# Normalization exponent controls loss scaling with system size:
130+
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
131+
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
132+
norm_exp = 2 if self.intensive else 1
120133

121134
if self.has_e:
122135
energy_pred = model_dict["energy"]
@@ -130,7 +143,7 @@ def call(
130143
energy_pred = xp.sum(atom_ener_coeff * atom_ener_pred, axis=1)
131144
if self.loss_func == "mse":
132145
l2_ener_loss = xp.mean(xp.square(energy_pred - energy_label))
133-
loss += atom_norm * (pref_e * l2_ener_loss)
146+
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
134147
more_loss["rmse_e"] = self.display_if_exist(
135148
xp.sqrt(l2_ener_loss) * atom_norm, find_energy
136149
)
@@ -238,7 +251,7 @@ def call(
238251
diff_v = virial_label - virial_pred
239252
if self.loss_func == "mse":
240253
l2_virial_loss = xp.mean(xp.square(diff_v))
241-
loss += atom_norm * (pref_v * l2_virial_loss)
254+
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
242255
more_loss["rmse_v"] = self.display_if_exist(
243256
xp.sqrt(l2_virial_loss) * atom_norm, find_virial
244257
)
@@ -326,7 +339,7 @@ def serialize(self) -> dict:
326339
"""Serialize the loss module."""
327340
return {
328341
"@class": "EnergySpinLoss",
329-
"@version": 1,
342+
"@version": 2,
330343
"starter_learning_rate": self.starter_learning_rate,
331344
"start_pref_e": self.start_pref_e,
332345
"limit_pref_e": self.limit_pref_e,
@@ -340,12 +353,17 @@ def serialize(self) -> dict:
340353
"limit_pref_ae": self.limit_pref_ae,
341354
"enable_atom_ener_coeff": self.enable_atom_ener_coeff,
342355
"loss_func": self.loss_func,
356+
"intensive": self.intensive,
343357
}
344358

345359
@classmethod
346360
def deserialize(cls, data: dict) -> "EnergySpinLoss":
347361
"""Deserialize the loss module."""
348362
data = data.copy()
349-
check_version_compatibility(data.pop("@version"), 1, 1)
363+
version = data.pop("@version")
364+
check_version_compatibility(version, 2, 1)
350365
data.pop("@class")
366+
# Backward compatibility: version 1 used legacy normalization
367+
if version < 2:
368+
data.setdefault("intensive", False)
351369
return cls(**data)

deepmd/pd/loss/ener.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def __init__(
6161
use_huber: bool = False,
6262
huber_delta: float | list[float] = 0.01,
6363
f_use_norm: bool = False,
64+
intensive: bool = False,
6465
**kwargs: Any,
6566
) -> None:
6667
r"""Construct a layer to compute loss on energy, force and virial.
@@ -119,6 +120,13 @@ def __init__(
119120
f_use_norm : bool
120121
If True, use L2 norm of force vectors for loss calculation.
121122
Not implemented in PD backend, only for serialization compatibility.
123+
intensive : bool
124+
If true, energy and virial losses are computed as intensive quantities,
125+
normalized by the square of the number of atoms (1/N^2). This ensures the loss
126+
value is independent of system size and consistent with per-atom RMSE reporting.
127+
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
128+
with system size. The default is false for backward compatibility with models trained
129+
using deepmd-kit <= 3.0.1.
122130
**kwargs
123131
Other keyword arguments.
124132
"""
@@ -161,6 +169,7 @@ def __init__(
161169
self.inference = inference
162170
self.use_huber = use_huber
163171
self.huber_delta = huber_delta
172+
self.intensive = intensive
164173
(
165174
self._huber_delta_energy,
166175
self._huber_delta_force,
@@ -218,6 +227,10 @@ def forward(
218227
# more_loss['log_keys'] = [] # showed when validation on the fly
219228
# more_loss['test_keys'] = [] # showed when doing dp test
220229
atom_norm = 1.0 / natoms
230+
# Normalization exponent controls loss scaling with system size:
231+
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
232+
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
233+
norm_exp = 2 if self.intensive else 1
221234
if self.has_e and "energy" in model_pred and "energy" in label:
222235
energy_pred = model_pred["energy"]
223236
energy_label = label["energy"]
@@ -243,7 +256,7 @@ def forward(
243256
l2_ener_loss.detach(), find_energy
244257
)
245258
if not self.use_huber:
246-
loss += atom_norm**2 * (pref_e * l2_ener_loss)
259+
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
247260
else:
248261
l_huber_loss = custom_huber_loss(
249262
atom_norm * energy_pred,
@@ -414,7 +427,7 @@ def forward(
414427
l2_virial_loss.detach(), find_virial
415428
)
416429
if not self.use_huber:
417-
loss += atom_norm**2 * (pref_v * l2_virial_loss)
430+
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
418431
else:
419432
l_huber_loss = custom_huber_loss(
420433
atom_norm * model_pred["virial"].reshape([-1]),
@@ -564,7 +577,7 @@ def serialize(self) -> dict:
564577
"""
565578
return {
566579
"@class": "EnergyLoss",
567-
"@version": 2,
580+
"@version": 3,
568581
"starter_learning_rate": self.starter_learning_rate,
569582
"start_pref_e": self.start_pref_e,
570583
"limit_pref_e": self.limit_pref_e,
@@ -585,6 +598,7 @@ def serialize(self) -> dict:
585598
"huber_delta": self.huber_delta,
586599
"loss_func": self.loss_func,
587600
"f_use_norm": self.f_use_norm,
601+
"intensive": self.intensive,
588602
}
589603

590604
@classmethod
@@ -602,8 +616,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
602616
The deserialized loss module
603617
"""
604618
data = data.copy()
605-
check_version_compatibility(data.pop("@version"), 2, 1)
619+
version = data.pop("@version")
620+
check_version_compatibility(version, 3, 1)
606621
data.pop("@class")
622+
# Handle backward compatibility for older versions without intensive
623+
if version < 3:
624+
data.setdefault("intensive", False)
607625
return cls(**data)
608626

609627

deepmd/pt/loss/ener.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def __init__(
6161
use_huber: bool = False,
6262
f_use_norm: bool = False,
6363
huber_delta: float | list[float] = 0.01,
64+
intensive: bool = False,
6465
**kwargs: Any,
6566
) -> None:
6667
r"""Construct a layer to compute loss on energy, force and virial.
@@ -120,6 +121,13 @@ def __init__(
120121
The threshold delta (D) used for Huber loss, controlling transition between
121122
L2 and L1 loss. It can be either one float shared by all terms or a list of
122123
three values ordered as [energy, force, virial].
124+
intensive : bool
125+
If true, energy and virial losses are computed as intensive quantities,
126+
normalized by the square of the number of atoms (1/N^2). This ensures the loss
127+
value is independent of system size and consistent with per-atom RMSE reporting.
128+
If false (default), uses the legacy normalization (1/N), which may cause the loss to scale
129+
with system size. The default is false for backward compatibility with models trained
130+
using deepmd-kit <= 3.0.1.
123131
**kwargs
124132
Other keyword arguments.
125133
"""
@@ -163,6 +171,7 @@ def __init__(
163171
self.inference = inference
164172
self.use_huber = use_huber
165173
self.f_use_norm = f_use_norm
174+
self.intensive = intensive
166175
if self.f_use_norm and not (self.use_huber or self.loss_func == "mae"):
167176
raise RuntimeError(
168177
"f_use_norm can only be True when use_huber or loss_func='mae'."
@@ -225,6 +234,10 @@ def forward(
225234
# more_loss['log_keys'] = [] # showed when validation on the fly
226235
# more_loss['test_keys'] = [] # showed when doing dp test
227236
atom_norm = 1.0 / natoms
237+
# Normalization exponent controls loss scaling with system size:
238+
# - norm_exp=2 (intensive=True): loss uses 1/N² scaling, making it independent of system size
239+
# - norm_exp=1 (intensive=False, legacy): loss uses 1/N scaling, which varies with system size
240+
norm_exp = 2 if self.intensive else 1
228241
if self.has_e and "energy" in model_pred and "energy" in label:
229242
energy_pred = model_pred["energy"]
230243
energy_label = label["energy"]
@@ -250,7 +263,7 @@ def forward(
250263
l2_ener_loss.detach(), find_energy
251264
)
252265
if not self.use_huber:
253-
loss += atom_norm**2 * (pref_e * l2_ener_loss)
266+
loss += atom_norm**norm_exp * (pref_e * l2_ener_loss)
254267
else:
255268
l_huber_loss = custom_huber_loss(
256269
atom_norm * energy_pred,
@@ -432,7 +445,7 @@ def forward(
432445
l2_virial_loss.detach(), find_virial
433446
)
434447
if not self.use_huber:
435-
loss += atom_norm**2 * (pref_v * l2_virial_loss)
448+
loss += atom_norm**norm_exp * (pref_v * l2_virial_loss)
436449
else:
437450
l_huber_loss = custom_huber_loss(
438451
atom_norm * model_pred["virial"].reshape(-1),
@@ -599,7 +612,7 @@ def serialize(self) -> dict:
599612
"""
600613
return {
601614
"@class": "EnergyLoss",
602-
"@version": 2,
615+
"@version": 3,
603616
"starter_learning_rate": self.starter_learning_rate,
604617
"start_pref_e": self.start_pref_e,
605618
"limit_pref_e": self.limit_pref_e,
@@ -620,6 +633,7 @@ def serialize(self) -> dict:
620633
"huber_delta": self.huber_delta,
621634
"loss_func": self.loss_func,
622635
"f_use_norm": self.f_use_norm,
636+
"intensive": self.intensive,
623637
}
624638

625639
@classmethod
@@ -637,8 +651,12 @@ def deserialize(cls, data: dict) -> "TaskLoss":
637651
The deserialized loss module
638652
"""
639653
data = data.copy()
640-
check_version_compatibility(data.pop("@version"), 2, 1)
654+
version = data.pop("@version")
655+
check_version_compatibility(version, 3, 1)
641656
data.pop("@class")
657+
# Handle backward compatibility for older versions without intensive
658+
if version < 3:
659+
data.setdefault("intensive", False)
642660
return cls(**data)
643661

644662

0 commit comments

Comments
 (0)