fix

OutisLi · OutisLi · commit e5f2ef577c99 · 2026-01-15T18:19:00.000+08:00
diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py
@@ -227,6 +227,10 @@ def to_numpy_array(xx: torch.Tensor) -> np.ndarray: ...
 def to_numpy_array(xx: None) -> None: ...
 
 
+@overload
+def to_numpy_array(xx: float) -> np.ndarray: ...
+
+
 def to_numpy_array(
     xx: torch.Tensor | np.ndarray | float | None,
 ) -> np.ndarray | None:
diff --git a/deepmd/tf/train/trainer.py b/deepmd/tf/train/trainer.py
@@ -244,7 +244,7 @@ def _build_lr(self) -> None:
         self.global_step = tf.train.get_or_create_global_step()
         if self.stop_batch == 0:
             # Use constant start_lr when stop_batch is zero (no training)
-            self.learning_rate = tf.cast(self.lr.start_lr(), tf.float64)
+            self.learning_rate = tf.cast(self.lr.start_lr(), GLOBAL_TF_FLOAT_PRECISION)
             log.info("built lr (constant start_lr for stop_batch=0)")
         else:
             self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
@@ -809,7 +809,7 @@ def _get_place_holders(self, data_dict) -> None:
                 prec = GLOBAL_ENER_FLOAT_PRECISION
             self.place_holders[kk] = tf.placeholder(prec, [None], name="t_" + kk)
             self.place_holders["find_" + kk] = tf.placeholder(
-                tf.float32, name="t_find_" + kk
+                GLOBAL_TF_FLOAT_PRECISION, name="t_find_" + kk
             )
 
     def _init_from_frz_model(self) -> None:
diff --git a/deepmd/tf/utils/learning_rate.py b/deepmd/tf/utils/learning_rate.py
@@ -21,6 +21,10 @@ class LearningRateSchedule:
     """
     TensorFlow wrapper for BaseLR.
 
+    The learning rate is computed via :func:`tf.numpy_function`, which prevents
+    TensorFlow from optimizing this operation in the graph. This overhead is
+    typically negligible compared to forward/backward passes.
+
     Parameters
     ----------
     params : dict[str, Any]
@@ -86,11 +90,18 @@ def build(self, global_step: tf.Tensor, num_steps: int) -> tf.Tensor:
         self._base_lr = BaseLR(**params)
 
         # === Step 2. Bind a numpy_function for runtime evaluation ===
+        from deepmd.tf.env import (
+            GLOBAL_TF_FLOAT_PRECISION,
+        )
+
         def _lr_value(step: np.ndarray) -> np.ndarray:
-            return np.asarray(self._base_lr.value(step), dtype=np.float64)
+            return np.asarray(
+                self._base_lr.value(step),
+                dtype=GLOBAL_TF_FLOAT_PRECISION.as_numpy_dtype,
+            )
 
         lr = tf.numpy_function(
-            _lr_value, [global_step], Tout=tf.float64, name="lr_schedule"
+            _lr_value, [global_step], Tout=GLOBAL_TF_FLOAT_PRECISION, name="lr_schedule"
         )
         lr.set_shape(global_step.get_shape())
         return lr
diff --git a/source/tests/universal/dpmodel/utils/test_learning_rate.py b/source/tests/universal/dpmodel/utils/test_learning_rate.py
@@ -90,7 +90,11 @@ def test_warmup_steps_exp(self) -> None:
         np.testing.assert_allclose(lr.value(0), 0.0, rtol=1e-10)
         np.testing.assert_allclose(lr.value(500), 0.5e-3, rtol=1e-10)
         np.testing.assert_allclose(lr.value(1000), 1e-3, rtol=1e-10)
-        self.assertLess(to_numpy_array(lr.value(2000)), 1e-3)
+        # Step 2000: 1000 steps into decay phase (1 decay period with decay_steps=1000)
+        # lr = start_lr * decay_rate^1 = 1e-3 * exp(log(0.01)/9) ≈ 5.995e-4
+        np.testing.assert_allclose(
+            to_numpy_array(lr.value(2000)), 1e-3 * np.exp(np.log(0.01) / 9), rtol=1e-5
+        )
 
     def test_warmup_steps_cosine(self) -> None:
         """Test warmup with cosine annealing."""