[QNN EP] Adjust tolerance for Clip and Transpose tests due to FP16 default in QNN HTP (microsoft#26499)

yuhuchua-qti · web-flow · commit c343143a49ca · 2026-01-14T07:45:18.000Z
### Description
This PR updates the tolerance thresholds for the Clip and Transpose
tests in QnnHTPBackendTests. The adjustment accounts for minor accuracy
differences introduced by the change in default floating-point precision
in QNN HTP starting from version 2.35.

### Motivation and Context
Since QNN 2.35, the default floating-point precision in QNN HTP has
changed from FP32 to FP16. Additionally, the configuration option
`QNN_HTP_GRAPH_CONFIG_OPTION_PRECISION` has been deprecated.
This change in precision can lead to expected accuracy loss, especially
in scenarios where graph inputs and outputs are defined as float 32, but
internal computations are performed in FP16 (e.g., FP32 → FP16 → FP32
conversions). To accommodate this, the tolerance thresholds for the
affected tests have been increased to prevent false negatives due to
precision differences.

@microsoft-github-policy-service agree company="Qualcomm"
diff --git a/onnxruntime/test/providers/qnn/clip_op_test.cc b/onnxruntime/test/providers/qnn/clip_op_test.cc
@@ -22,22 +22,15 @@ static void RunClipTest(const TestInputDef<DataType>& input_def,
                         ExpectedEPNodeAssignment expected_ep_assignment,
                         const std::string& backend_name = "cpu",
                         int opset = 13,
-                        bool enable_fp16_precision = true) {
+                        float fp32_abs_err = 1e-5f) {
   ProviderOptions provider_options;
   provider_options["backend_type"] = backend_name;
 
-  if (backend_name == "htp") {
-    if (enable_fp16_precision) {
-      provider_options["enable_htp_fp16_precision"] = "1";
-    } else {
-      provider_options["enable_htp_fp16_precision"] = "0";
-    }
-  }
-
   RunQnnModelTest(BuildOpTestCase<DataType, DataType>("Clip", {input_def}, min_max_defs, {}),
                   provider_options,
                   opset,
-                  expected_ep_assignment);
+                  expected_ep_assignment,
+                  fp32_abs_err);
 }
 
 //
@@ -77,17 +70,19 @@ TEST_F(QnnCPUBackendTests, Clip_5D_f32) {
 // HTP tests:
 //
 
-// Test Clip with float32 on HTP
-// Fails with QNN SDK 2.35.0:
-// value pair (-4.54545403, -4.54687548) at index #3 don't match, which is -0.00142145 from -4.54545
-TEST_F(QnnHTPBackendTests, DISABLED_Clip_f32) {
+// Test Clip with float32 on HTP.
+// Since QAIRT 2.35, default float precision on QNN HTP became FP16.
+// Converting FP32 -> FP16 -> FP32 may introduce minor accuracy loss.
+// For example, a value of -4.54545403 could become -4.54687548 after the conversion.
+// The expected difference is approximately 0.00142145, so the tolerance is adjusted to 5e-3f.
+TEST_F(QnnHTPBackendTests, Clip_f32) {
   RunClipTest<float>(TestInputDef<float>({1, 1, 3, 4}, false, GetFloatDataInRange(-10.0f, 10.0f, 12)),
                      {TestInputDef<float>({}, true, {-5.0f}),
                       TestInputDef<float>({}, true, {5.0f})},
                      ExpectedEPNodeAssignment::All,
                      "htp",
                      13,
-                     false);
+                     5e-3f);
 }
 
 // Test Clip with int32 on HTP
@@ -386,8 +381,7 @@ TEST_F(QnnGPUBackendTests, Clip_fp32) {
                       TestInputDef<float>({}, true, {5.0f})},
                      ExpectedEPNodeAssignment::All,
                      "gpu",
-                     13,
-                     false);
+                     13);
 }
 
 // Test Clip with int32 on GPU
diff --git a/onnxruntime/test/providers/qnn/transpose_htp_test.cc b/onnxruntime/test/providers/qnn/transpose_htp_test.cc
@@ -88,21 +88,15 @@ template <typename DataType>
 static void RunTransposeNonQDQOnHTP(const TestInputDef<DataType>& input_def,
                                     const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
                                     ExpectedEPNodeAssignment expected_ep_assignment,
-                                    bool enable_fp16_precision = true) {
+                                    float fp32_abs_err = 1e-5f) {
   ProviderOptions provider_options;
   provider_options["backend_type"] = "htp";
 
-  if (enable_fp16_precision) {
-    provider_options["enable_htp_fp16_precision"] = "1";
-  } else {
-    provider_options["enable_htp_fp16_precision"] = "0";
-  }
-
   RunQnnModelTest(BuildTransposeTestCase<DataType>(input_def, attrs),
                   provider_options,
                   13,
                   expected_ep_assignment,
-                  1e-5f);
+                  fp32_abs_err);
 }
 
 // Check that QNN compiles DQ -> Transpose -> Q as a single unit.
@@ -120,12 +114,14 @@ TEST_F(QnnHTPBackendTests, TransposeInt32OnHTP) {
 }
 
 // Check that QNN supports Transpose with float32 data input on HTP
-// Fails with QNN SDK 2.35.0:
-// value pair (0.183528364, 0.183471695) at index #0 don't match, which is -5.66691e-05 from 0.183528
-TEST_F(QnnHTPBackendTests, DISABLED_TransposeFloatOnHTP) {
+// Since QAIRT 2.35, default float precision on QNN HTP became FP16.
+// Converting FP32 -> FP16 -> FP32 may introduce minor accuracy loss.
+// For example, a value of 7.64300251 could become 7.64453173 after the conversion.
+// The expected difference is approximately 0.00152922, so the tolerance is adjusted to 5e-3f.
+TEST_F(QnnHTPBackendTests, TransposeFloat32OnHTP) {
   RunTransposeNonQDQOnHTP<float>(TestInputDef<float>({1, 3, 224, 128}, false, 0, 10.0f),
                                  {utils::MakeAttribute("perm", std::vector<int64_t>{0, 2, 3, 1})},
-                                 ExpectedEPNodeAssignment::All, false);
+                                 ExpectedEPNodeAssignment::All, 5e-3f);
 }
 
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)