Add the getter and setter of skip_fp8_weight_update_tensor (#3015)

xrennvidia · pre-commit-ci[bot] · greptile-apps[bot] · web-flow · commit fc92624bc89b · 2026-06-04T16:20:40.000-07:00
* add the getter and setter of skip_fp8_weight_update_tensor Signed-off-by: Xiaowei Ren <xren@nvidia.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update transformer_engine/pytorch/quantization.py return type fix Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> Signed-off-by: Xiaowei Ren <103958965+xrennvidia@users.noreply.github.com> --------- Signed-off-by: Xiaowei Ren <xren@nvidia.com> Signed-off-by: Xiaowei Ren <103958965+xrennvidia@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
diff --git a/transformer_engine/pytorch/graph.py b/transformer_engine/pytorch/graph.py
@@ -324,12 +324,7 @@ def _make_graphed_callables(
 
     if cache_quantized_params:
         # Initialize flag that controls FP8 weight updates
-        qstate = FP8GlobalStateManager.quantization_state
-        if qstate.skip_fp8_weight_update_tensor is None:
-            qstate.skip_fp8_weight_update_tensor = torch.empty(
-                1, dtype=torch.float32, device="cuda"
-            )
-        qstate.skip_fp8_weight_update_tensor.fill_(False)
+        FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(False)
 
     # Check callables
     for c in callables:
@@ -841,9 +836,7 @@ def forward(ctx, skip_fp8_weight_update, cuda_graph_stream, cuda_graph_event, *i
                 # Set flag for whether to update FP8 weight updates
                 ctx.is_first_module = FP8GlobalStateManager.is_first_fp8_module()
                 if ctx.is_first_module and skip_fp8_weight_update is not None:
-                    FP8GlobalStateManager.quantization_state.skip_fp8_weight_update_tensor.fill_(
-                        skip_fp8_weight_update
-                    )
+                    FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(skip_fp8_weight_update)
                 ctx.cuda_graph_stream = cuda_graph_stream
                 ctx.cuda_graph_event = cuda_graph_event
                 # Copy values from new tensors into static tensors
diff --git a/transformer_engine/pytorch/quantization.py b/transformer_engine/pytorch/quantization.py
@@ -409,6 +409,20 @@ class FP8GlobalStateManager:
 
     quantization_state = FP8GlobalState()
 
+    @classmethod
+    def set_skip_fp8_weight_update_tensor(cls, skip: bool) -> None:
+        """Set the skip fp8 weight update tensor"""
+        if cls.quantization_state.skip_fp8_weight_update_tensor is None:
+            cls.quantization_state.skip_fp8_weight_update_tensor = torch.empty(
+                1, dtype=torch.float32, device="cuda"
+            )
+        cls.quantization_state.skip_fp8_weight_update_tensor.fill_(skip)
+
+    @classmethod
+    def get_skip_fp8_weight_update_tensor(cls) -> Optional[torch.Tensor]:
+        """Get the skip fp8 weight update tensor"""
+        return cls.quantization_state.skip_fp8_weight_update_tensor
+
     @classmethod
     def reset(cls) -> None:
         """Reset the global state"""