Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h"

#include <cmath>

#include "core/graph/graph.h"
#include "core/graph/graph_utils.h"
#include "core/optimizer/initializer.h"
Expand Down Expand Up @@ -410,6 +412,136 @@
builder.num_output_defs = 1; // set to 1 as the first output is variadic
}

// Validates that the bias DQ's scale matches input_scale * weight_scale[i] for each output channel.
// ONNX QLinearConv requires bias to be in int32 with scale = x_scale * w_scale[i].
// If this condition is violated, the fused output would be silently incorrect.
// Returns false (conservative) if any scale initializer is not a constant or types are non-conformant.
static bool CheckConvBiasScale(const GraphViewer& graph_viewer,
const Node& input_dq, const Node& weight_dq, const Node& bias_dq) {
const auto* x_scale_arg = input_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];
const auto* w_scale_arg = weight_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];
const auto* b_scale_arg = bias_dq.InputDefs()[QDQ::InputIndex::SCALE_ID];

const auto* x_scale_proto = graph_viewer.GetConstantInitializer(x_scale_arg->Name(), true);
const auto* w_scale_proto = graph_viewer.GetConstantInitializer(w_scale_arg->Name(), true);
const auto* b_scale_proto = graph_viewer.GetConstantInitializer(b_scale_arg->Name(), true);

if (!x_scale_proto || !w_scale_proto || !b_scale_proto) {
return false; // conservative: cannot verify
}

// Input scale must be scalar (rank 0 or 1-element rank-1).
if (x_scale_proto->dims_size() != 0 &&
!(x_scale_proto->dims_size() == 1 && x_scale_proto->dims(0) == 1)) {
return false;
}

const Initializer x_scale_init{graph_viewer.GetGraph(), *x_scale_proto, graph_viewer.ModelPath()};
const Initializer w_scale_init{graph_viewer.GetGraph(), *w_scale_proto, graph_viewer.ModelPath()};
const Initializer b_scale_init{graph_viewer.GetGraph(), *b_scale_proto, graph_viewer.ModelPath()};

// All scales must be float32 for standard QLinearConv.
if (x_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
w_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT ||
b_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
return false;
}

const auto x_scales = x_scale_init.DataAsSpan<float>();
const auto w_scales = w_scale_init.DataAsSpan<float>();
const auto b_scales = b_scale_init.DataAsSpan<float>();

// Guard against empty initializers — an empty span would cause an out-of-bounds
// access on x_scales[0] or w_scales[i] / b_scales[i] below.
if (x_scales.empty() || w_scales.empty() || b_scales.empty()) {
return false;
}

const float x_scale = x_scales[0];
const size_t w_num = w_scales.size(); // 1 for per-tensor weight scale, C_out for per-channel
const size_t b_num = b_scales.size(); // 1 for scalar bias scale, C_out for per-channel

Comment thread
tianleiwu marked this conversation as resolved.
// Each scale tensor must be either scalar or per-channel (C_out). When one is
// per-channel and the other is scalar, broadcast the scalar across channels.
if (w_num != 1 && b_num != 1 && w_num != b_num) {
return false;
}
Comment thread
Rishi-Dave marked this conversation as resolved.
const size_t num_channels = std::max(w_num, b_num);

Check warning on line 469 in onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <algorithm> for max [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc:469: Add #include <algorithm> for max [build/include_what_you_use] [4]

Check warning on line 469 in onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <algorithm> for max [build/include_what_you_use] [4] Raw Output: onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc:469: Add #include <algorithm> for max [build/include_what_you_use] [4]

// Looser tolerances than optimizer_utils::IsInitializerWithExpectedValue (atol=1e-8, rtol=1e-5)
// are used intentionally: bias_scale is computed as input_scale * weight_scale and may
// accumulate fp rounding error from upstream quantization tools, so a tighter check
// would reject otherwise valid fusions in practice.
constexpr float atol = 1e-6f;
constexpr float rtol = 1e-2f;

// x_scale is a scalar — check it once here rather than redundantly on every loop iteration.
if (!std::isfinite(x_scale)) {
return false;
}

for (size_t i = 0; i < num_channels; ++i) {
const float w_scale = (w_num == 1) ? w_scales[0] : w_scales[i];
const float b_scale = (b_num == 1) ? b_scales[0] : b_scales[i];
// Reject non-finite per-channel values: NaN compares unequal to itself so
// the tolerance check below could pass or fail unpredictably; Inf * Inf =
// Inf which also produces unreliable results. Be conservative and refuse fusion.
if (!std::isfinite(w_scale) || !std::isfinite(b_scale)) {
return false;
}
const float expected = x_scale * w_scale;
if (std::abs(b_scale - expected) > (atol + rtol * std::abs(expected))) {
return false;
}
Comment thread
tianleiwu marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: x_scale is loop-invariant — the std::isfinite(x_scale) check could be hoisted above the loop to avoid redundant evaluation on every channel iteration. Not a correctness issue (optimizer-time cost is negligible), just a clarity improvement:

if (!std::isfinite(x_scale)) {
  return false;
}

for (size_t i = 0; i < num_channels; ++i) {
  const float w_scale = (w_num == 1) ? w_scales[0] : w_scales[i];
  const float b_scale = (b_num == 1) ? b_scales[0] : b_scales[i];
  if (!std::isfinite(w_scale) || !std::isfinite(b_scale)) {
    return false;
  }
  ...
}

}

return true;
}

// Validates that the bias DQ's zero point is absent or a constant int32 initializer whose every
// element is zero. ONNX QLinearConv (int32 bias path) assumes bias_zero_point == 0; a nonzero value
// would shift all output activations silently after fusion since QLinearConv has no bias_zero_point
// input.
// Returns false (conservative) if a zero-point name is set but not a constant, or if any element is
// nonzero or the dtype is unexpected.
static bool CheckConvBiasZeroPoint(const GraphViewer& graph_viewer, const Node& bias_dq) {
const auto& bias_dq_inputs = bias_dq.InputDefs();
// Zero-point is optional input at index 2.
if (bias_dq_inputs.size() < 3 || !bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID] ||
!bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID]->Exists()) {
return true; // absent zero-point is implicitly zero — allow fusion
}

const auto* zp_arg = bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID];
const auto* zp_proto = graph_viewer.GetConstantInitializer(zp_arg->Name(), true);
if (!zp_proto) {
return false; // zero-point present but not constant — cannot verify
}

// Fusion to QLinearConv assumes bias is symmetrically quantized (zero_point == 0).
// If a nonzero bias zero-point is present we skip fusion to avoid producing an
// arithmetically incorrect QLinearConv (which has no bias zero-point input).
const Initializer zp_init{graph_viewer.GetGraph(), *zp_proto, graph_viewer.ModelPath()};
if (zp_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT32) {
return false; // unexpected dtype for bias zero-point
}

const auto zp_values = zp_init.DataAsSpan<int32_t>();
// An empty zero-point initializer is malformed; reject fusion rather than
// silently treating it as "all zeros".
if (zp_values.empty()) {
return false;
}

for (const int32_t v : zp_values) {
if (v != 0) {
return false;
}
}

return true;
Comment thread
tianleiwu marked this conversation as resolved.
}

bool ConvNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& node, const Node* redundant_clip_node,
const std::vector<const Node*>& dq_nodes,
const std::vector<const Node*>& q_nodes) const {
Expand Down Expand Up @@ -440,6 +572,18 @@
if (dt_bias != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) {
return false;
}

// Verify bias scale == input_scale * weight_scale[i] per ONNX QLinearConv spec.
// If scales don't match within tolerance, skip fusion to avoid silent numerical errors.
if (!CheckConvBiasScale(graph_viewer, *dq_nodes[0], *dq_nodes[1], *dq_nodes[2])) {
Comment thread
tianleiwu marked this conversation as resolved.
return false;
}

// Verify bias zero-point is absent or all-zero. A nonzero bias zero-point would silently
// shift all output activations after fusion since QLinearConv has no bias_zero_point input.
if (!CheckConvBiasZeroPoint(graph_viewer, *dq_nodes[2])) {
return false;
Comment thread
tianleiwu marked this conversation as resolved.
}
Comment thread
tianleiwu marked this conversation as resolved.
}

// 16-bit int types must be explicitly allowed.
Expand Down
12 changes: 8 additions & 4 deletions onnxruntime/test/optimizer/qdq_transformer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4439,7 +4439,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_ConvClip) {
auto* dq_bias = builder.MakeIntermediate();
builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
// bias_scale must equal x_scale * w_scale for QLinearConv fusion
builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);

// Conv
auto* conv_output = builder.MakeIntermediate();
Expand Down Expand Up @@ -4506,7 +4507,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_ConvClipNonScalar) {
auto* dq_bias = builder.MakeIntermediate();
builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
// bias_scale must equal x_scale * w_scale for QLinearConv fusion
builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);

// Conv
auto* conv_output = builder.MakeIntermediate();
Expand Down Expand Up @@ -4576,7 +4578,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_Conv_Relu) {
auto* dq_bias = builder.MakeIntermediate();
builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
// bias_scale must equal x_scale * w_scale for QLinearConv fusion
builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);

// Conv
auto* conv_output = builder.MakeIntermediate();
Expand Down Expand Up @@ -4682,7 +4685,8 @@ TEST(QDQTransformerTests, QDQ_Selector_Test_Conv_Relu) {
auto* dq_bias = builder.MakeIntermediate();
builder.AddDequantizeLinearNode(input_arg, 0.02348f, uint8_t(0), dq_input, false);
builder.AddDequantizeLinearNode(weight_arg, 0.307f, uint8_t(0), dq_weight, false);
builder.AddDequantizeLinearNode(bias_arg, 0.007f, int32_t(0), dq_bias, false);
// bias_scale must equal x_scale * w_scale for QLinearConv fusion
builder.AddDequantizeLinearNode(bias_arg, 0.00720836f, int32_t(0), dq_bias, false);

// Conv
auto* conv_output = builder.MakeIntermediate();
Expand Down
Loading
Loading