Skip to content

Commit c343143

Browse files
authored
[QNN EP] Adjust tolerance for Clip and Transpose tests due to FP16 default in QNN HTP (microsoft#26499)
### Description This PR updates the tolerance thresholds for the Clip and Transpose tests in QnnHTPBackendTests. The adjustment accounts for minor accuracy differences introduced by the change in default floating-point precision in QNN HTP starting from version 2.35. ### Motivation and Context Since QNN 2.35, the default floating-point precision in QNN HTP has changed from FP32 to FP16. Additionally, the configuration option `QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION` has been deprecated. This change in precision can lead to expected accuracy loss, especially in scenarios where graph inputs and outputs are defined as float 32, but internal computations are performed in FP16 (e.g., FP32 → FP16 → FP32 conversions). To accommodate this, the tolerance thresholds for the affected tests have been increased to prevent false negatives due to precision differences. @microsoft-github-policy-service agree company="Qualcomm"
1 parent aeac757 commit c343143

2 files changed

Lines changed: 19 additions & 29 deletions

File tree

onnxruntime/test/providers/qnn/clip_op_test.cc

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,15 @@ static void RunClipTest(const TestInputDef<DataType>& input_def,
2222
ExpectedEPNodeAssignment expected_ep_assignment,
2323
const std::string& backend_name = "cpu",
2424
int opset = 13,
25-
bool enable_fp16_precision = true) {
25+
float fp32_abs_err = 1e-5f) {
2626
ProviderOptions provider_options;
2727
provider_options["backend_type"] = backend_name;
2828

29-
if (backend_name == "htp") {
30-
if (enable_fp16_precision) {
31-
provider_options["enable_htp_fp16_precision"] = "1";
32-
} else {
33-
provider_options["enable_htp_fp16_precision"] = "0";
34-
}
35-
}
36-
3729
RunQnnModelTest(BuildOpTestCase<DataType, DataType>("Clip", {input_def}, min_max_defs, {}),
3830
provider_options,
3931
opset,
40-
expected_ep_assignment);
32+
expected_ep_assignment,
33+
fp32_abs_err);
4134
}
4235

4336
//
@@ -77,17 +70,19 @@ TEST_F(QnnCPUBackendTests, Clip_5D_f32) {
7770
// HTP tests:
7871
//
7972

80-
// Test Clip with float32 on HTP
81-
// Fails with QNN SDK 2.35.0:
82-
// value pair (-4.54545403, -4.54687548) at index #3 don't match, which is -0.00142145 from -4.54545
83-
TEST_F(QnnHTPBackendTests, DISABLED_Clip_f32) {
73+
// Test Clip with float32 on HTP.
74+
// Since QAIRT 2.35, default float precision on QNN HTP became FP16.
75+
// Converting FP32 -> FP16 -> FP32 may introduce minor accuracy loss.
76+
// For example, a value of -4.54545403 could become -4.54687548 after the conversion.
77+
// The expected difference is approximately 0.00142145, so the tolerance is adjusted to 5e-3f.
78+
TEST_F(QnnHTPBackendTests, Clip_f32) {
8479
RunClipTest<float>(TestInputDef<float>({1, 1, 3, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 12)),
8580
{TestInputDef<float>({}, true, {-5.0f}),
8681
TestInputDef<float>({}, true, {5.0f})},
8782
ExpectedEPNodeAssignment::All,
8883
"htp",
8984
13,
90-
false);
85+
5e-3f);
9186
}
9287

9388
// Test Clip with int32 on HTP
@@ -386,8 +381,7 @@ TEST_F(QnnGPUBackendTests, Clip_fp32) {
386381
TestInputDef<float>({}, true, {5.0f})},
387382
ExpectedEPNodeAssignment::All,
388383
"gpu",
389-
13,
390-
false);
384+
13);
391385
}
392386

393387
// Test Clip with int32 on GPU

onnxruntime/test/providers/qnn/transpose_htp_test.cc

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,15 @@ template <typename DataType>
8888
static void RunTransposeNonQDQOnHTP(const TestInputDef<DataType>& input_def,
8989
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
9090
ExpectedEPNodeAssignment expected_ep_assignment,
91-
bool enable_fp16_precision = true) {
91+
float fp32_abs_err = 1e-5f) {
9292
ProviderOptions provider_options;
9393
provider_options["backend_type"] = "htp";
9494

95-
if (enable_fp16_precision) {
96-
provider_options["enable_htp_fp16_precision"] = "1";
97-
} else {
98-
provider_options["enable_htp_fp16_precision"] = "0";
99-
}
100-
10195
RunQnnModelTest(BuildTransposeTestCase<DataType>(input_def, attrs),
10296
provider_options,
10397
13,
10498
expected_ep_assignment,
105-
1e-5f);
99+
fp32_abs_err);
106100
}
107101

108102
// Check that QNN compiles DQ -> Transpose -> Q as a single unit.
@@ -120,12 +114,14 @@ TEST_F(QnnHTPBackendTests, TransposeInt32OnHTP) {
120114
}
121115

122116
// Check that QNN supports Transpose with float32 data input on HTP
123-
// Fails with QNN SDK 2.35.0:
124-
// value pair (0.183528364, 0.183471695) at index #0 don't match, which is -5.66691e-05 from 0.183528
125-
TEST_F(QnnHTPBackendTests, DISABLED_TransposeFloatOnHTP) {
117+
// Since QAIRT 2.35, default float precision on QNN HTP became FP16.
118+
// Converting FP32 -> FP16 -> FP32 may introduce minor accuracy loss.
119+
// For example, a value of 7.64300251 could become 7.64453173 after the conversion.
120+
// The expected difference is approximately 0.00152922, so the tolerance is adjusted to 5e-3f.
121+
TEST_F(QnnHTPBackendTests, TransposeFloat32OnHTP) {
126122
RunTransposeNonQDQOnHTP<float>(TestInputDef<float>({1, 3, 224, 128}, false, 0, 10.0f),
127123
{utils::MakeAttribute("perm", std::vector<int64_t>{0, 2, 3, 1})},
128-
ExpectedEPNodeAssignment::All, false);
124+
ExpectedEPNodeAssignment::All, 5e-3f);
129125
}
130126

131127
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)

0 commit comments

Comments
 (0)