-
Notifications
You must be signed in to change notification settings - Fork 3.9k
fix(quantization): validate bias scale in QDQ Conv → QLinearConv fusion #28229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
13a881f
4dee849
0f7900d
2c2e3ed
37f30de
6d1fa1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ | |
|
|
||
| #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h" | ||
|
|
||
| #include <cmath> | ||
|
|
||
| #include "core/graph/graph.h" | ||
| #include "core/graph/graph_utils.h" | ||
| #include "core/optimizer/initializer.h" | ||
|
|
@@ -410,6 +412,136 @@ | |
| builder.num_output_defs = 1; // set to 1 as the first output is variadic | ||
| } | ||
|
|
||
| // Validates that the bias DQ's scale matches input_scale * weight_scale[i] for each output channel. | ||
| // ONNX QLinearConv requires bias to be in int32 with scale = x_scale * w_scale[i]. | ||
| // If this condition is violated, the fused output would be silently incorrect. | ||
| // Returns false (conservative) if any scale initializer is not a constant or types are non-conformant. | ||
| static bool CheckConvBiasScale(const GraphViewer& graph_viewer, | ||
| const Node& input_dq, const Node& weight_dq, const Node& bias_dq) { | ||
| const auto* x_scale_arg = input_dq.InputDefs()[QDQ::InputIndex::SCALE_ID]; | ||
| const auto* w_scale_arg = weight_dq.InputDefs()[QDQ::InputIndex::SCALE_ID]; | ||
| const auto* b_scale_arg = bias_dq.InputDefs()[QDQ::InputIndex::SCALE_ID]; | ||
|
|
||
| const auto* x_scale_proto = graph_viewer.GetConstantInitializer(x_scale_arg->Name(), true); | ||
| const auto* w_scale_proto = graph_viewer.GetConstantInitializer(w_scale_arg->Name(), true); | ||
| const auto* b_scale_proto = graph_viewer.GetConstantInitializer(b_scale_arg->Name(), true); | ||
|
|
||
| if (!x_scale_proto || !w_scale_proto || !b_scale_proto) { | ||
| return false; // conservative: cannot verify | ||
| } | ||
|
|
||
| // Input scale must be scalar (rank 0 or 1-element rank-1). | ||
| if (x_scale_proto->dims_size() != 0 && | ||
| !(x_scale_proto->dims_size() == 1 && x_scale_proto->dims(0) == 1)) { | ||
| return false; | ||
| } | ||
|
|
||
| const Initializer x_scale_init{graph_viewer.GetGraph(), *x_scale_proto, graph_viewer.ModelPath()}; | ||
| const Initializer w_scale_init{graph_viewer.GetGraph(), *w_scale_proto, graph_viewer.ModelPath()}; | ||
| const Initializer b_scale_init{graph_viewer.GetGraph(), *b_scale_proto, graph_viewer.ModelPath()}; | ||
|
|
||
| // All scales must be float32 for standard QLinearConv. | ||
| if (x_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || | ||
| w_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || | ||
| b_scale_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { | ||
| return false; | ||
| } | ||
|
|
||
| const auto x_scales = x_scale_init.DataAsSpan<float>(); | ||
| const auto w_scales = w_scale_init.DataAsSpan<float>(); | ||
| const auto b_scales = b_scale_init.DataAsSpan<float>(); | ||
|
|
||
| // Guard against empty initializers — an empty span would cause an out-of-bounds | ||
| // access on x_scales[0] or w_scales[i] / b_scales[i] below. | ||
| if (x_scales.empty() || w_scales.empty() || b_scales.empty()) { | ||
| return false; | ||
| } | ||
|
|
||
| const float x_scale = x_scales[0]; | ||
| const size_t w_num = w_scales.size(); // 1 for per-tensor weight scale, C_out for per-channel | ||
| const size_t b_num = b_scales.size(); // 1 for scalar bias scale, C_out for per-channel | ||
|
|
||
| // Each scale tensor must be either scalar or per-channel (C_out). When one is | ||
| // per-channel and the other is scalar, broadcast the scalar across channels. | ||
| if (w_num != 1 && b_num != 1 && w_num != b_num) { | ||
| return false; | ||
| } | ||
|
Rishi-Dave marked this conversation as resolved.
|
||
| const size_t num_channels = std::max(w_num, b_num); | ||
|
Check warning on line 469 in onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc
|
||
|
|
||
| // Looser tolerances than optimizer_utils::IsInitializerWithExpectedValue (atol=1e-8, rtol=1e-5) | ||
| // are used intentionally: bias_scale is computed as input_scale * weight_scale and may | ||
| // accumulate fp rounding error from upstream quantization tools, so a tighter check | ||
| // would reject otherwise valid fusions in practice. | ||
| constexpr float atol = 1e-6f; | ||
| constexpr float rtol = 1e-2f; | ||
|
|
||
| // x_scale is a scalar — check it once here rather than redundantly on every loop iteration. | ||
| if (!std::isfinite(x_scale)) { | ||
| return false; | ||
| } | ||
|
|
||
| for (size_t i = 0; i < num_channels; ++i) { | ||
| const float w_scale = (w_num == 1) ? w_scales[0] : w_scales[i]; | ||
| const float b_scale = (b_num == 1) ? b_scales[0] : b_scales[i]; | ||
| // Reject non-finite per-channel values: NaN compares unequal to itself so | ||
| // the tolerance check below could pass or fail unpredictably; Inf * Inf = | ||
| // Inf which also produces unreliable results. Be conservative and refuse fusion. | ||
| if (!std::isfinite(w_scale) || !std::isfinite(b_scale)) { | ||
| return false; | ||
| } | ||
| const float expected = x_scale * w_scale; | ||
| if (std::abs(b_scale - expected) > (atol + rtol * std::abs(expected))) { | ||
| return false; | ||
| } | ||
|
tianleiwu marked this conversation as resolved.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: if (!std::isfinite(x_scale)) {
return false;
}
for (size_t i = 0; i < num_channels; ++i) {
const float w_scale = (w_num == 1) ? w_scales[0] : w_scales[i];
const float b_scale = (b_num == 1) ? b_scales[0] : b_scales[i];
if (!std::isfinite(w_scale) || !std::isfinite(b_scale)) {
return false;
}
...
} |
||
| } | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| // Validates that the bias DQ's zero point is absent or a constant int32 initializer whose every | ||
| // element is zero. ONNX QLinearConv (int32 bias path) assumes bias_zero_point == 0; a nonzero value | ||
| // would shift all output activations silently after fusion since QLinearConv has no bias_zero_point | ||
| // input. | ||
| // Returns false (conservative) if a zero-point name is set but not a constant, or if any element is | ||
| // nonzero or the dtype is unexpected. | ||
| static bool CheckConvBiasZeroPoint(const GraphViewer& graph_viewer, const Node& bias_dq) { | ||
| const auto& bias_dq_inputs = bias_dq.InputDefs(); | ||
| // Zero-point is optional input at index 2. | ||
| if (bias_dq_inputs.size() < 3 || !bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID] || | ||
| !bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID]->Exists()) { | ||
| return true; // absent zero-point is implicitly zero — allow fusion | ||
| } | ||
|
|
||
| const auto* zp_arg = bias_dq_inputs[QDQ::InputIndex::ZERO_POINT_ID]; | ||
| const auto* zp_proto = graph_viewer.GetConstantInitializer(zp_arg->Name(), true); | ||
| if (!zp_proto) { | ||
| return false; // zero-point present but not constant — cannot verify | ||
| } | ||
|
|
||
| // Fusion to QLinearConv assumes bias is symmetrically quantized (zero_point == 0). | ||
| // If a nonzero bias zero-point is present we skip fusion to avoid producing an | ||
| // arithmetically incorrect QLinearConv (which has no bias zero-point input). | ||
| const Initializer zp_init{graph_viewer.GetGraph(), *zp_proto, graph_viewer.ModelPath()}; | ||
| if (zp_init.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT32) { | ||
| return false; // unexpected dtype for bias zero-point | ||
| } | ||
|
|
||
| const auto zp_values = zp_init.DataAsSpan<int32_t>(); | ||
| // An empty zero-point initializer is malformed; reject fusion rather than | ||
| // silently treating it as "all zeros". | ||
| if (zp_values.empty()) { | ||
| return false; | ||
| } | ||
|
|
||
| for (const int32_t v : zp_values) { | ||
| if (v != 0) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| return true; | ||
|
tianleiwu marked this conversation as resolved.
|
||
| } | ||
|
|
||
| bool ConvNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& node, const Node* redundant_clip_node, | ||
| const std::vector<const Node*>& dq_nodes, | ||
| const std::vector<const Node*>& q_nodes) const { | ||
|
|
@@ -440,6 +572,18 @@ | |
| if (dt_bias != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) { | ||
| return false; | ||
| } | ||
|
|
||
| // Verify bias scale == input_scale * weight_scale[i] per ONNX QLinearConv spec. | ||
| // If scales don't match within tolerance, skip fusion to avoid silent numerical errors. | ||
| if (!CheckConvBiasScale(graph_viewer, *dq_nodes[0], *dq_nodes[1], *dq_nodes[2])) { | ||
|
tianleiwu marked this conversation as resolved.
|
||
| return false; | ||
| } | ||
|
|
||
| // Verify bias zero-point is absent or all-zero. A nonzero bias zero-point would silently | ||
| // shift all output activations after fusion since QLinearConv has no bias_zero_point input. | ||
| if (!CheckConvBiasZeroPoint(graph_viewer, *dq_nodes[2])) { | ||
| return false; | ||
|
tianleiwu marked this conversation as resolved.
|
||
| } | ||
|
tianleiwu marked this conversation as resolved.
|
||
| } | ||
|
|
||
| // 16-bit int types must be explicitly allowed. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.