Skip to content

Commit dfb8c3f

Browse files
wanghan-iapcmHan Wang
andauthored
chore(pt): mv the input stat update to model_change_out_bias (#5266)
cleanup the logic of change_out_bias: it only change the output bias, does not update the fitting input stat. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Refactor** * Adjusted how bias-adjustment triggers recomputation of fitting/input statistics during model training, streamlining the behavior and adding a conditional recompute path for certain model types. * **Tests** * Added cross-checking tests to ensure the new and previous bias/statistic update paths produce consistent fitting statistics. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
1 parent 898c672 commit dfb8c3f

6 files changed

Lines changed: 249 additions & 4 deletions

File tree

deepmd/pd/model/model/make_model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,6 @@ def change_out_bias(
231231
merged,
232232
bias_adjust_mode=bias_adjust_mode,
233233
)
234-
if bias_adjust_mode == "set-by-statistic":
235-
self.atomic_model.compute_fitting_input_stat(merged)
236234

237235
def forward_common_lower(
238236
self,

deepmd/pd/train/training.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,13 @@ def model_change_out_bias(
14091409
)
14101410
new_bias = deepcopy(_model.get_out_bias())
14111411

1412+
from deepmd.pd.model.model.dp_model import (
1413+
DPModelCommon,
1414+
)
1415+
1416+
if isinstance(_model, DPModelCommon) and _bias_adjust_mode == "set-by-statistic":
1417+
_model.get_fitting_net().compute_input_stats(_sample_func)
1418+
14121419
model_type_map = _model.get_type_map()
14131420
log.info(
14141421
f"Change output bias of {model_type_map!s} "

deepmd/pt/model/model/make_model.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,6 @@ def change_out_bias(
233233
merged,
234234
bias_adjust_mode=bias_adjust_mode,
235235
)
236-
if bias_adjust_mode == "set-by-statistic":
237-
self.atomic_model.compute_fitting_input_stat(merged)
238236

239237
def forward_common_lower(
240238
self,

deepmd/pt/train/training.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,6 +1814,13 @@ def model_change_out_bias(
18141814
)
18151815
new_bias = deepcopy(_model.get_out_bias())
18161816

1817+
from deepmd.pt.model.model.dp_model import (
1818+
DPModelCommon,
1819+
)
1820+
1821+
if isinstance(_model, DPModelCommon) and _bias_adjust_mode == "set-by-statistic":
1822+
_model.get_fitting_net().compute_input_stats(_sample_func)
1823+
18171824
model_type_map = _model.get_type_map()
18181825
log.info(
18191826
f"Change output bias of {model_type_map!s} from {to_numpy_array(old_bias).reshape(-1)!s} to {to_numpy_array(new_bias).reshape(-1)!s}."

source/tests/pd/test_training.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,5 +236,122 @@ def tearDown(self) -> None:
236236
DPTrainTest.tearDown(self)
237237

238238

239+
class TestModelChangeOutBiasFittingStat(unittest.TestCase):
240+
"""Verify model_change_out_bias produces the same fitting stat as the old code path.
241+
242+
The old code called compute_fitting_input_stat inside change_out_bias (make_model.py).
243+
The new code calls get_fitting_net().compute_input_stats() separately in
244+
model_change_out_bias (training.py). This test verifies they produce identical
245+
out_bias, fparam_avg, and fparam_inv_std.
246+
"""
247+
248+
def test_fitting_stat_consistency(self) -> None:
249+
from deepmd.pd.model.model import get_model as get_model_pd
250+
from deepmd.pd.model.model.ener_model import EnergyModel as EnergyModelPD
251+
from deepmd.pd.train.training import (
252+
model_change_out_bias,
253+
)
254+
from deepmd.pd.utils.utils import to_numpy_array as paddle_to_numpy
255+
from deepmd.pd.utils.utils import to_paddle_tensor as numpy_to_paddle
256+
from deepmd.utils.argcheck import model_args as model_args_fn
257+
258+
# Build a model with numb_fparam=2 so fitting stat is non-trivial
259+
model_params = model_args_fn().normalize_value(
260+
{
261+
"type_map": ["O", "H"],
262+
"descriptor": {
263+
"type": "se_e2_a",
264+
"sel": [20, 20],
265+
"rcut_smth": 0.50,
266+
"rcut": 6.00,
267+
"neuron": [3, 6],
268+
"resnet_dt": False,
269+
"axis_neuron": 2,
270+
"precision": "float64",
271+
"type_one_side": True,
272+
"seed": 1,
273+
},
274+
"fitting_net": {
275+
"neuron": [5, 5],
276+
"resnet_dt": True,
277+
"precision": "float64",
278+
"seed": 1,
279+
"numb_fparam": 2,
280+
},
281+
},
282+
trim_pattern="_*",
283+
)
284+
285+
# Create two identical models via serialize/deserialize
286+
model_orig = get_model_pd(model_params)
287+
serialized = model_orig.serialize()
288+
model_a = EnergyModelPD.deserialize(deepcopy(serialized))
289+
model_b = EnergyModelPD.deserialize(deepcopy(serialized))
290+
291+
# Build mock stat data with fparam
292+
nframes = 4
293+
natoms = 6
294+
coords = np.random.default_rng(42).random((nframes, natoms, 3)) * 13.0
295+
atype = np.array([[0, 0, 1, 1, 1, 1]] * nframes, dtype=np.int32)
296+
box = np.tile(
297+
np.eye(3, dtype=np.float64).reshape(1, 3, 3) * 13.0, (nframes, 1, 1)
298+
)
299+
natoms_data = np.array([[6, 6, 2, 4]] * nframes, dtype=np.int32)
300+
energy = np.array([10.0, 20.0, 15.0, 25.0]).reshape(nframes, 1)
301+
# fparam with varying values so mean != 0 and std != 0
302+
fparam = np.array(
303+
[[1.0, 3.0], [5.0, 7.0], [2.0, 8.0], [6.0, 4.0]], dtype=np.float64
304+
)
305+
306+
merged = [
307+
{
308+
"coord": numpy_to_paddle(coords),
309+
"atype": numpy_to_paddle(atype),
310+
"atype_ext": numpy_to_paddle(atype),
311+
"box": numpy_to_paddle(box),
312+
"natoms": numpy_to_paddle(natoms_data),
313+
"energy": numpy_to_paddle(energy),
314+
"find_energy": np.float32(1.0),
315+
"fparam": numpy_to_paddle(fparam),
316+
"find_fparam": np.float32(1.0),
317+
}
318+
]
319+
320+
# Model A: simulate the OLD code path
321+
# old change_out_bias called both bias adjustment + compute_fitting_input_stat
322+
model_a.change_out_bias(merged, bias_adjust_mode="set-by-statistic")
323+
model_a.atomic_model.compute_fitting_input_stat(merged)
324+
325+
# Model B: use the NEW code path via model_change_out_bias
326+
sample_func = lambda: merged # noqa: E731
327+
model_change_out_bias(model_b, sample_func, "set-by-statistic")
328+
329+
# Compare out_bias
330+
bias_a = paddle_to_numpy(model_a.get_out_bias())
331+
bias_b = paddle_to_numpy(model_b.get_out_bias())
332+
np.testing.assert_allclose(bias_a, bias_b, rtol=1e-10, atol=1e-10)
333+
334+
# Compare fparam_avg and fparam_inv_std
335+
fit_a = model_a.get_fitting_net()
336+
fit_b = model_b.get_fitting_net()
337+
fparam_avg_a = paddle_to_numpy(fit_a.fparam_avg)
338+
fparam_avg_b = paddle_to_numpy(fit_b.fparam_avg)
339+
fparam_inv_std_a = paddle_to_numpy(fit_a.fparam_inv_std)
340+
fparam_inv_std_b = paddle_to_numpy(fit_b.fparam_inv_std)
341+
342+
np.testing.assert_allclose(fparam_avg_a, fparam_avg_b, rtol=1e-10, atol=1e-10)
343+
np.testing.assert_allclose(
344+
fparam_inv_std_a, fparam_inv_std_b, rtol=1e-10, atol=1e-10
345+
)
346+
347+
# Verify non-trivial: avg should not be zeros, inv_std should not be ones
348+
assert not np.allclose(fparam_avg_a, 0.0), (
349+
"fparam_avg is still zero — stat was not computed"
350+
)
351+
assert not np.allclose(fparam_inv_std_a, 1.0), (
352+
"fparam_inv_std is still ones — stat was not computed"
353+
)
354+
355+
239356
if __name__ == "__main__":
240357
unittest.main()

source/tests/pt/test_training.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Path,
1111
)
1212

13+
import numpy as np
1314
import torch
1415

1516
from deepmd.pt.entrypoints.main import (
@@ -608,5 +609,122 @@ def tearDown(self) -> None:
608609
DPTrainTest.tearDown(self)
609610

610611

612+
class TestModelChangeOutBiasFittingStat(unittest.TestCase):
613+
"""Verify model_change_out_bias produces the same fitting stat as the old code path.
614+
615+
The old code called compute_fitting_input_stat inside change_out_bias (make_model.py).
616+
The new code calls get_fitting_net().compute_input_stats() separately in
617+
model_change_out_bias (training.py). This test verifies they produce identical
618+
out_bias, fparam_avg, and fparam_inv_std.
619+
"""
620+
621+
def test_fitting_stat_consistency(self) -> None:
622+
from deepmd.pt.model.model import get_model as get_model_pt
623+
from deepmd.pt.model.model.ener_model import EnergyModel as EnergyModelPT
624+
from deepmd.pt.train.training import (
625+
model_change_out_bias,
626+
)
627+
from deepmd.pt.utils.utils import to_numpy_array as torch_to_numpy
628+
from deepmd.pt.utils.utils import to_torch_tensor as numpy_to_torch
629+
from deepmd.utils.argcheck import model_args as model_args_fn
630+
631+
# Build a model with numb_fparam=2 so fitting stat is non-trivial
632+
model_params = model_args_fn().normalize_value(
633+
{
634+
"type_map": ["O", "H"],
635+
"descriptor": {
636+
"type": "se_e2_a",
637+
"sel": [20, 20],
638+
"rcut_smth": 0.50,
639+
"rcut": 6.00,
640+
"neuron": [3, 6],
641+
"resnet_dt": False,
642+
"axis_neuron": 2,
643+
"precision": "float64",
644+
"type_one_side": True,
645+
"seed": 1,
646+
},
647+
"fitting_net": {
648+
"neuron": [5, 5],
649+
"resnet_dt": True,
650+
"precision": "float64",
651+
"seed": 1,
652+
"numb_fparam": 2,
653+
},
654+
},
655+
trim_pattern="_*",
656+
)
657+
658+
# Create two identical models via serialize/deserialize
659+
model_orig = get_model_pt(model_params)
660+
serialized = model_orig.serialize()
661+
model_a = EnergyModelPT.deserialize(deepcopy(serialized))
662+
model_b = EnergyModelPT.deserialize(deepcopy(serialized))
663+
664+
# Build mock stat data with fparam
665+
nframes = 4
666+
natoms = 6
667+
coords = np.random.default_rng(42).random((nframes, natoms, 3)) * 13.0
668+
atype = np.array([[0, 0, 1, 1, 1, 1]] * nframes, dtype=np.int32)
669+
box = np.tile(
670+
np.eye(3, dtype=np.float64).reshape(1, 3, 3) * 13.0, (nframes, 1, 1)
671+
)
672+
natoms_data = np.array([[6, 6, 2, 4]] * nframes, dtype=np.int32)
673+
energy = np.array([10.0, 20.0, 15.0, 25.0]).reshape(nframes, 1)
674+
# fparam with varying values so mean != 0 and std != 0
675+
fparam = np.array(
676+
[[1.0, 3.0], [5.0, 7.0], [2.0, 8.0], [6.0, 4.0]], dtype=np.float64
677+
)
678+
679+
merged = [
680+
{
681+
"coord": numpy_to_torch(coords),
682+
"atype": numpy_to_torch(atype),
683+
"atype_ext": numpy_to_torch(atype),
684+
"box": numpy_to_torch(box),
685+
"natoms": numpy_to_torch(natoms_data),
686+
"energy": numpy_to_torch(energy),
687+
"find_energy": np.float32(1.0),
688+
"fparam": numpy_to_torch(fparam),
689+
"find_fparam": np.float32(1.0),
690+
}
691+
]
692+
693+
# Model A: simulate the OLD code path
694+
# old change_out_bias called both bias adjustment + compute_fitting_input_stat
695+
model_a.change_out_bias(merged, bias_adjust_mode="set-by-statistic")
696+
model_a.atomic_model.compute_fitting_input_stat(merged)
697+
698+
# Model B: use the NEW code path via model_change_out_bias
699+
sample_func = lambda: merged # noqa: E731
700+
model_change_out_bias(model_b, sample_func, "set-by-statistic")
701+
702+
# Compare out_bias
703+
bias_a = torch_to_numpy(model_a.get_out_bias())
704+
bias_b = torch_to_numpy(model_b.get_out_bias())
705+
np.testing.assert_allclose(bias_a, bias_b, rtol=1e-10, atol=1e-10)
706+
707+
# Compare fparam_avg and fparam_inv_std
708+
fit_a = model_a.get_fitting_net()
709+
fit_b = model_b.get_fitting_net()
710+
fparam_avg_a = torch_to_numpy(fit_a.fparam_avg)
711+
fparam_avg_b = torch_to_numpy(fit_b.fparam_avg)
712+
fparam_inv_std_a = torch_to_numpy(fit_a.fparam_inv_std)
713+
fparam_inv_std_b = torch_to_numpy(fit_b.fparam_inv_std)
714+
715+
np.testing.assert_allclose(fparam_avg_a, fparam_avg_b, rtol=1e-10, atol=1e-10)
716+
np.testing.assert_allclose(
717+
fparam_inv_std_a, fparam_inv_std_b, rtol=1e-10, atol=1e-10
718+
)
719+
720+
# Verify non-trivial: avg should not be zeros, inv_std should not be ones
721+
assert not np.allclose(fparam_avg_a, 0.0), (
722+
"fparam_avg is still zero — stat was not computed"
723+
)
724+
assert not np.allclose(fparam_inv_std_a, 1.0), (
725+
"fparam_inv_std is still ones — stat was not computed"
726+
)
727+
728+
611729
if __name__ == "__main__":
612730
unittest.main()

0 commit comments

Comments
 (0)