Fix comments

ajrasane · ajrasane · commit f6ce7b37910a · 2026-03-16T17:49:35.000Z
Signed-off-by: ajrasane &lt;131806219+ajrasane@users.noreply.github.com&gt;
diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
@@ -1368,10 +1368,27 @@ def _convert_constant_values(constant_node: onnx.NodeProto, cast_node: onnx.Node
     cast_to_type = get_cast_to_type(cast_node)
     for attr in constant_node.attribute:
         if attr.name == "value" and attr.type == onnx.AttributeProto.TENSOR:
-            np_array = onnx.numpy_helper.to_array(attr.t)
-            target_np_type = onnx.helper.tensor_dtype_to_np_dtype(cast_to_type)
-            new_array = np_array.astype(target_np_type)
-            new_tensor = onnx.numpy_helper.from_array(new_array, attr.t.name)
+            # Read input tensor — bfloat16 tensors use raw_data and need special handling
+            if attr.t.data_type == onnx.TensorProto.BFLOAT16:
+                np_array = read_f16_tensor_as_fp32(attr.t)
+            else:
+                np_array = onnx.numpy_helper.to_array(attr.t)
+
+            # Write output tensor — bfloat16 cannot use numpy_helper.from_array
+            if cast_to_type == onnx.TensorProto.BFLOAT16:
+                import ml_dtypes
+
+                new_tensor = onnx.TensorProto()
+                new_tensor.dims.extend(np_array.shape)
+                new_tensor.name = attr.t.name
+                new_tensor.data_type = onnx.TensorProto.BFLOAT16
+                bf16_bytes = np_array.astype(np.float32).astype(ml_dtypes.bfloat16)
+                new_tensor.raw_data = bf16_bytes.view(np.uint16).tobytes()
+            else:
+                target_np_type = onnx.helper.tensor_dtype_to_np_dtype(cast_to_type)
+                new_array = np_array.astype(target_np_type)
+                new_tensor = onnx.numpy_helper.from_array(new_array, attr.t.name)
+
             attr.t.CopyFrom(new_tensor)
             break
 
diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py
@@ -32,6 +32,7 @@
 from onnxconverter_common import convert_float_to_float16
 from torch.nn.parallel import DataParallel, DistributedDataParallel
 
+from modelopt.onnx.autocast.convert import convert_to_f16
 from modelopt.onnx.export import (
     FP8QuantExporter,
     INT4QuantExporter,
@@ -578,16 +579,22 @@ def get_onnx_bytes_and_metadata(
     if dq_only:
         onnx_opt_graph = qdq_to_dq(onnx_opt_graph)
 
-    if weights_dtype == "fp16":
-        onnx_opt_graph = convert_float_to_float16(
-            onnx_opt_graph,
-            keep_io_types=False,
-            disable_shape_infer=True,
-            check_fp16_ready=False,
-            op_block_list=["QuantizeLinear", "DequantizeLinear", "Div"],
-        )
-        # Change FP32 cast nodes feeding into Concat/Add to FP16
-        onnx_opt_graph = change_casts_to_fp16(onnx_opt_graph, ["Concat", "Add"])
+    if weights_dtype in ["fp16", "bf16"]:
+        if is_int4_quantized(model) or is_mxfp8_quantized(model) or is_fp8_quantized(model):
+            assert weights_dtype == "fp16", "BF16 + MXFP8/INT4 mixed precision is not supported yet"
+            onnx_opt_graph = convert_float_to_float16(
+                onnx_opt_graph,
+                keep_io_types=False,
+                disable_shape_infer=True,
+                check_fp16_ready=False,
+                op_block_list=["QuantizeLinear", "DequantizeLinear", "Div"],
+            )
+            # Change FP32 cast nodes feeding into Concat/Add to FP16
+            onnx_opt_graph = change_casts_to_fp16(onnx_opt_graph, ["Concat", "Add"])
+        else:
+            onnx_opt_graph = convert_to_f16(
+                onnx_opt_graph, low_precision_type=weights_dtype, keep_io_types=False
+            )
 
     onnx_opt_graph = remove_redundant_casts(onnx_opt_graph)