diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 6a3c6d8081..d1eb616985 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -105,9 +105,18 @@ def __init__( # only use_timestep when skip connection is established. use_timestep = use_timestep and (num_out == num_in or num_out == num_in * 2) rng = np.random.default_rng(seed) - self.w = rng.normal(size=(num_in, num_out)).astype(prec) - self.b = rng.normal(size=(num_out,)).astype(prec) if bias else None - self.idt = rng.normal(size=(num_out,)).astype(prec) if use_timestep else None + scale_factor = 1.0 / np.sqrt(num_out + num_in) + self.w = rng.normal(size=(num_in, num_out), scale=scale_factor).astype(prec) + self.b = ( + rng.normal(size=(num_out,), scale=scale_factor).astype(prec) + if bias + else None + ) + self.idt = ( + rng.normal(size=(num_out,), scale=scale_factor).astype(prec) + if use_timestep + else None + ) self.activation_function = ( activation_function if activation_function is not None else "none" )