Skip to content

Commit 225ab4e

Browse files
committed
fix: Validate fp16.loss_scale is finite and non-negative
Signed-off-by: nathon-lee <leejianwoo@gmail.com>
1 parent 6c59d54 commit 225ab4e

1 file changed

Lines changed: 22 additions & 1 deletion

File tree

deepspeed/runtime/precision_config.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
# DeepSpeed Team
55

6+
import math
7+
from pydantic import field_validator
68
from deepspeed.runtime.config_utils import DeepSpeedConfigModel
79
from .fp16.loss_scaler import (
810
INITIAL_LOSS_SCALE,
@@ -107,12 +109,31 @@ class DeepSpeedFP16Config(DeepSpeedConfigModel):
107109
"""
108110
Automatically cast inputs to fp16
109111
"""
110-
112+
111113
loss_scale: float = 0
112114
"""
113115
Loss scaling value. Default value of 0 means dynamic loss scaling instead of static loss scale.
114116
"""
115117

118+
@field_validator("loss_scale")
119+
@classmethod
120+
def _validate_loss_scale(cls, v):
121+
# Prevent True/False from being treated as 1/0
122+
if isinstance(v, bool):
123+
raise ValueError("fp16.loss_scale must be a number, not bool")
124+
125+
v = float(v)
126+
127+
# Reject inf/-inf/nan
128+
if not math.isfinite(v):
129+
raise ValueError("fp16.loss_scale must be a finite number (not inf/-inf/nan)")
130+
131+
# Reject negative values; 0 still means dynamic loss scaling
132+
if v < 0:
133+
raise ValueError("fp16.loss_scale must be >= 0 (0 enables dynamic loss scaling)")
134+
135+
return v
136+
116137
initial_scale_power: int = 16
117138
"""
118139
For dynamic loss scaling, set initial loss scale to 2^{initial_scale_power}.

0 commit comments

Comments
 (0)