microsoft · Rishi-Dave · Apr 25, 2026 · May 1, 2026 · May 3, 2026 · May 4, 2026
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
@@ -5,6 +5,8 @@
 
 #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h"
 
+#include <cmath>
+
 #include "core/graph/graph.h"
 #include "core/graph/graph_utils.h"
 #include "core/optimizer/initializer.h"
@@ -410,6 +412,136 @@
   builder.num_output_defs = 1;  // set to 1 as the first output is variadic
 }
 
+// Validates that the bias DQ's scale matches input_scale * weight_scale[i] for each output channel.
+// ONNX QLinearConv requires bias to be in int32 with scale = x_scale * w_scale[i].
+// If this condition is violated, the fused output would be silently incorrect.
+// Returns false (conservative) if any scale initializer is not a constant or types are non-conformant.
+static bool CheckConvBiasScale(const GraphViewer& graph_viewer,
+                               const Node& input_dq, const Node& weight_dq, const Node& bias_dq) {
+  const auto* x_scale_arg = input_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];
+  const auto* w_scale_arg = weight_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];
+  const auto* b_scale_arg = bias_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];
+
+  const auto* x_scale_proto = graph_viewer.GetConstantInitializer(x_scale_arg->Name(), true);
+  const auto* w_scale_proto = graph_viewer.GetConstantInitializer(w_scale_arg->Name(), true);
+  const auto* b_scale_proto = graph_viewer.GetConstantInitializer(b_scale_arg->Name(), true);
+
+  if (!x_scale_proto || !w_scale_proto || !b_scale_proto) {
+    return false;  // conservative: cannot verify
+  }
+
+  // Input scale must be scalar (rank 0 or 1-element rank-1).
+  if (x_scale_proto->dims_size() != 0 &&
+      !(x_scale_proto->dims_size() == 1 && x_scale_proto->dims(0) == 1)) {
+    return false;
+  }
+
+  const Initializer x_scale_init{graph_viewer.GetGraph(), *x_scale_proto, graph_viewer.ModelPath()};
+  const Initializer w_scale_init{graph_viewer.GetGraph(), *w_scale_proto, graph_viewer.ModelPath()};
+  const Initializer b_scale_init{graph_viewer.GetGraph(), *b_scale_proto, graph_viewer.ModelPath()};
+
+  // All scales must be float32 for standard QLinearConv.
+  if (x_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
+      w_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
+      b_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+    return false;
+  }
+
+  const auto x_scales = x_scale_init.DataAsSpan<float>();
+  const auto w_scales = w_scale_init.DataAsSpan<float>();
+  const auto b_scales = b_scale_init.DataAsSpan<float>();
+
+  // Guard against empty initializers — an empty span would cause an out-of-bounds
+  // access on x_scales[0] or w_scales[i] / b_scales[i] below.
+  if (x_scales.empty() || w_scales.empty() || b_scales.empty()) {
+    return false;
+  }
+
+  const float x_scale = x_scales[0];
+  const size_t w_num = w_scales.size();  // 1 for per-tensor weight scale, C_out for per-channel
+  const size_t b_num = b_scales.size();  // 1 for scalar bias scale, C_out for per-channel
+
+  // Each scale tensor must be either scalar or per-channel (C_out). When one is
+  // per-channel and the other is scalar, broadcast the scalar across channels.
+  if (w_num != 1 && b_num != 1 && w_num != b_num) {
+    return false;
+  }
+  const size_t num_channels = std::max(w_num, b_num);
+
+  // Looser tolerances than optimizer_utils::IsInitializerWithExpectedValue (atol=1e-8, rtol=1e-5)
+  // are used intentionally: bias_scale is computed as input_scale * weight_scale and may
+  // accumulate fp rounding error from upstream quantization tools, so a tighter check
+  // would reject otherwise valid fusions in practice.
+  constexpr float atol = 1e-6f;
+  constexpr float rtol = 1e-2f;
+
+  // x_scale is a scalar — check it once here rather than redundantly on every loop iteration.
+  if (!std::isfinite(x_scale)) {
+    return false;
+  }
+
+  for (size_t i = 0; i < num_channels; ++i) {
+    const float w_scale = (w_num == 1) ? w_scales[0] : w_scales[i];
+    const float b_scale = (b_num == 1) ? b_scales[0] : b_scales[i];
+    // Reject non-finite per-channel values: NaN compares unequal to itself so
+    // the tolerance check below could pass or fail unpredictably; Inf * Inf =
+    // Inf which also produces unreliable results.  Be conservative and refuse fusion.
+    if (!std::isfinite(w_scale) || !std::isfinite(b_scale)) {
+      return false;
+    }
+    const float expected = x_scale * w_scale;
+    if (std::abs(b_scale - expected) > (atol + rtol * std::abs(expected))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Validates that the bias DQ's zero point is absent or a constant int32 initializer whose every
+// element is zero. ONNX QLinearConv (int32 bias path) assumes bias_zero_point == 0; a nonzero value
+// would shift all output activations silently after fusion since QLinearConv has no bias_zero_point
+// input.
+// Returns false (conservative) if a zero-point name is set but not a constant, or if any element is
+// nonzero or the dtype is unexpected.
+static bool CheckConvBiasZeroPoint(const GraphViewer& graph_viewer, const Node& bias_dq) {
+  const auto& bias_dq_inputs = bias_dq.InputDefs();
+  // Zero-point is optional input at index 2.
+  if (bias_dq_inputs.size() < 3 || !bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID] ||
+      !bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID]->Exists()) {
+    return true;  // absent zero-point is implicitly zero — allow fusion
+  }
+
+  const auto* zp_arg = bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID];
+  const auto* zp_proto = graph_viewer.GetConstantInitializer(zp_arg->Name(), true);
+  if (!zp_proto) {
+    return false;  // zero-point present but not constant — cannot verify
+  }
+
+  // Fusion to QLinearConv assumes bias is symmetrically quantized (zero_point == 0).
+  // If a nonzero bias zero-point is present we skip fusion to avoid producing an
+  // arithmetically incorrect QLinearConv (which has no bias zero-point input).
+  const Initializer zp_init{graph_viewer.GetGraph(), *zp_proto, graph_viewer.ModelPath()};
+  if (zp_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT32) {
+    return false;  // unexpected dtype for bias zero-point
+  }
+
+  const auto zp_values = zp_init.DataAsSpan<int32_t>();
+  // An empty zero-point initializer is malformed; reject fusion rather than
+  // silently treating it as "all zeros".
+  if (zp_values.empty()) {
+    return false;
+  }
+
+  for (const int32_t v : zp_values) {
+    if (v != 0) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool ConvNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& node, const Node* redundant_clip_node,
                                   const std::vector<const Node*>& dq_nodes,
                                   const std::vector<const Node*>& q_nodes) const {
@@ -440,6 +572,18 @@
     if (dt_bias != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) {
       return false;
     }
+
+    // Verify bias scale == input_scale * weight_scale[i] per ONNX QLinearConv spec.
+    // If scales don't match within tolerance, skip fusion to avoid silent numerical errors.
+    if (!CheckConvBiasScale(graph_viewer, *dq_nodes[0], *dq_nodes[1], *dq_nodes[2])) {
+      return false;
+    }
+
+    // Verify bias zero-point is absent or all-zero. A nonzero bias zero-point would silently
+    // shift all output activations after fusion since QLinearConv has no bias_zero_point input.
+    if (!CheckConvBiasZeroPoint(graph_viewer, *dq_nodes[2])) {
+      return false;
+    }
   }
 
   // 16-bit int types must be explicitly allowed.

diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@@ -4439,7 +4439,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_ConvClip) {
     auto* dq_bias = builder.MakeIntermediate();
     builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
     builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
-    builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
+    // bias_scale must equal x_scale * w_scale for QLinearConv fusion
+    builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);
 
     // Conv
     auto* conv_output = builder.MakeIntermediate();
@@ -4506,7 +4507,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_ConvClipNonScalar) {
     auto* dq_bias = builder.MakeIntermediate();
     builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
     builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
-    builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
+    // bias_scale must equal x_scale * w_scale for QLinearConv fusion
+    builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);
 
     // Conv
     auto* conv_output = builder.MakeIntermediate();
@@ -4576,7 +4578,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_Conv_Relu) {
       auto* dq_bias = builder.MakeIntermediate();
       builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
       builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
-      builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
+      // bias_scale must equal x_scale * w_scale for QLinearConv fusion
+      builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);
 
       // Conv
       auto* conv_output = builder.MakeIntermediate();
@@ -4682,7 +4685,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_Conv_Relu) {
       auto* dq_bias = builder.MakeIntermediate();
       builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
       builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
-      builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
+      // bias_scale must equal x_scale * w_scale for QLinearConv fusion
+      builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);
 
       // Conv
       auto* conv_output = builder.MakeIntermediate();