CodeLinaro
diff --git a/‎include/onnxruntime/core/session/onnxruntime_c_api.h‎
Lines changed: 68 additions & 36 deletions b/‎include/onnxruntime/core/session/onnxruntime_c_api.h‎
Lines changed: 68 additions & 36 deletions
diff --git a/‎java/src/test/android/app/src/androidTest/java/ai/onnxruntime/example/javavalidator/SimpleTest.kt‎
Lines changed: 1 addition & 3 deletions b/‎java/src/test/android/app/src/androidTest/java/ai/onnxruntime/example/javavalidator/SimpleTest.kt‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎java/src/test/java/ai/onnxruntime/InferenceTest.java‎
Lines changed: 2 additions & 7 deletions b/‎java/src/test/java/ai/onnxruntime/InferenceTest.java‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎js/common/lib/inference-session.ts‎
Lines changed: 8 additions & 2 deletions b/‎js/common/lib/inference-session.ts‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎js/node/src/session_options_helper.cc‎
Lines changed: 8 additions & 5 deletions b/‎js/node/src/session_options_helper.cc‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎onnxruntime/core/providers/qnn/qnn_execution_provider.cc‎
Lines changed: 88 additions & 8 deletions b/‎onnxruntime/core/providers/qnn/qnn_execution_provider.cc‎
Lines changed: 88 additions & 8 deletions
@@ -3646,64 +3646,96 @@ struct OrtApi {
    *       that should be used to add it.
    *
    * QNN supported keys:
-   *   "backend_path": file path to QNN backend library.
-   *   "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off.
+   *   "backend_type": Type of QNN backend. Specifies a backend path that is the associated QNN backend library file
+   *      name. E.g., given backend type "htp", on Windows, the backend path would be "QnnHtp.dll", and on other
+   *      platforms, it would be "libQnnHtp.so". Mutually exclusive with "backend_path".
+   *      Available options:
+   *      - "cpu"
+   *      - "gpu"
+   *      - "htp": Default.
+   *      - "saver"
+   *   "backend_path": File path to QNN backend library. Mutually exclusive with "backend_type".
+   *   "profiling_level": QNN profiling level.
+   *      Available options:
+   *      - "off": Default.
+   *      - "basic"
+   *      - "detailed"
    *   "profiling_file_path": QNN profiling file path if ETW not enabled.
    *   "rpc_control_latency": QNN RPC control latency.
    *   "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
-   *   "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
-   *   "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
+   *   "htp_performance_mode": QNN performance mode.
+   *      Available options:
+   *      - "burst"
+   *      - "balanced"
+   *      - "default": Default.
+   *      - "high_performance"
+   *      - "high_power_saver"
+   *      - "low_balanced"
+   *      - "extreme_power_saver"
+   *      - "low_power_saver"
+   *      - "power_saver"
+   *      - "sustained_high_performance"
    *   "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
-   *   dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
-   *   may alter model/EP partitioning. Use only for debugging.
-   *   "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". Default to "normal".
-   *   "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend. Available options:
-   *     - "0": Default.
-   *     - "1": Faster preparation time, less optimal graph.
-   *     - "2": Longer preparation time, more optimal graph.
-   *     - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific details.
-   *   "soc_model": The SoC model number. Refer to the QNN SDK documentation for valid values. Defaults to "0" (unknown).
-   *   "htp_arch": The minimum HTP architecture the driver will use to select compatible QNN operators. Available options:
-   *     - "0": Default (none).
-   *     - "68"
-   *     - "69"
-   *     - "73"
-   *     - "75"
+   *      dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
+   *      may alter model/EP partitioning. Use only for debugging.
+   *   "qnn_context_priority": QNN context priority.
+   *      Available options:
+   *      - "low"
+   *      - "normal": Default.
+   *      - "normal_high"
+   *      - "high"
+   *   "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend.
+   *      Available options:
+   *      - "0": Default.
+   *      - "1": Faster preparation time, less optimal graph.
+   *      - "2": Longer preparation time, more optimal graph.
+   *      - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific
+   *        details.
+   *   "soc_model": The SoC model number. Refer to the QNN SDK documentation for valid values.
+   *      Defaults to "0" (unknown).
+   *   "htp_arch": The minimum HTP architecture the driver will use to select compatible QNN operators.
+   *      Available options:
+   *      - "0": Default (none).
+   *      - "68"
+   *      - "69"
+   *      - "73"
+   *      - "75"
    *   "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
    *   "enable_htp_fp16_precision": Used for float32 model for HTP backend.
-   *   Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
+   *      Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
    *     - "0": With fp32 precision.
    *     - "1": Default. With fp16 precision.
    *   "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
-   *   execution provider (typically CPU EP).
-   *     - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
-   *     - "1": Enabled. This is the default value.
-   *   "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context binary.
-   *     - "0": Default. Disabled.
-   *     - "1": Enabled.
+   *      execution provider (typically CPU EP).
+   *      - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
+   *      - "1": Enabled. This is the default value.
+   *   "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context
+   *      binary.
+   *      - "0": Default. Disabled.
+   *      - "1": Enabled.
    *   "enable_htp_shared_memory_allocator": Enable the QNN HTP shared memory allocator. Requires libcdsprpc.so/dll to
-   *   be available.
-   *     - "0": Default. Disabled.
-   *     - "1": Enabled.
+   *      be available.
+   *      - "0": Default. Disabled.
+   *      - "1": Enabled.
    *   "dump_json_qnn_graph": Set to "1" to dump QNN graphs generated by QNN EP as JSON files. Each graph partition
    *      assigned to QNN EP is dumped to a separate file.
    *   "json_qnn_graph_dir": Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the
    *      program's current working directory. Ignored if "dump_json_qnn_graph" is not set.
    *
    * SNPE supported keys:
    *   "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
-   *   "DSP", "DSP_FIXED8_TF", "AIP_FIXED_TF", "AIP_FIXED8_TF".
-   *   Mapping to SNPE Runtime_t definition: CPU, CPU_FLOAT32 => zdl::DlSystem::Runtime_t::CPU;
-   *   GPU, GPU_FLOAT32_16_HYBRID => zdl::DlSystem::Runtime_t::GPU;
-   *   GPU_FLOAT16 => zdl::DlSystem::Runtime_t::GPU_FLOAT16;
-   *   DSP, DSP_FIXED8_TF => zdl::DlSystem::Runtime_t::DSP.
-   *   AIP_FIXED_TF, AIP_FIXED8_TF => zdl::DlSystem::Runtime_t::AIP_FIXED_TF.
+   *      "DSP", "DSP_FIXED8_TF", "AIP_FIXED_TF", "AIP_FIXED8_TF".
+   *      Mapping to SNPE Runtime_t definition:
+   *        CPU, CPU_FLOAT32 => zdl::DlSystem::Runtime_t::CPU;
+   *        GPU, GPU_FLOAT32_16_HYBRID => zdl::DlSystem::Runtime_t::GPU;
+   *        GPU_FLOAT16 => zdl::DlSystem::Runtime_t::GPU_FLOAT16;
+   *        DSP, DSP_FIXED8_TF => zdl::DlSystem::Runtime_t::DSP.
+   *        AIP_FIXED_TF, AIP_FIXED8_TF => zdl::DlSystem::Runtime_t::AIP_FIXED_TF.
    *   "priority": execution priority, options: "low", "normal".
    *   "buffer_type": ITensor or user buffers, options: "ITENSOR", user buffer with different types - "TF8", "TF16", "UINT8", "FLOAT".
    *   "ITENSOR" -- default, ITensor which is float only.
    *   "TF8" -- quantized model required, "FLOAT" -- for both quantized or non-quantized model
    *   "enable_init_cache": enable SNPE init caching feature, set to 1 to enabled it. Disabled by default.
-   *   If SNPE is not available (due to a non Snpe enabled build or its dependencies not being installed), this function will fail.
    *
    * XNNPACK supported keys:
    *   "intra_op_num_threads": number of thread-pool size to use for XNNPACK execution provider.
 
@@ -82,9 +82,7 @@ class SimpleTest {
 
                 OrtProvider.QNN -> {
                     if (OrtEnvironment.getAvailableProviders().contains(OrtProvider.QNN)) {
-                        // Since this is running in an Android environment, we use the .so library
-                        val qnnLibrary = "libQnnHtp.so"
-                        val providerOptions = Collections.singletonMap("backend_path", qnnLibrary)
+                        val providerOptions = Collections.singletonMap("backend_type", "htp")
                         opts.addQnn(providerOptions)
                     } else {
                         Log.println(Log.INFO, TAG, "NO QNN EP available, skip the test")
 
@@ -2125,13 +2125,8 @@ private static SqueezeNetTuple openSessionSqueezeNet(EnumSet<OrtProvider> provid
           options.addXnnpack(Collections.emptyMap());
           break;
         case QNN:
-          {
-            String backendPath = OS.WINDOWS.isCurrentOs() ? "/QnnCpu.dll" : "/libQnnCpu.so";
-            options.addQnn(
-                Collections.singletonMap(
-                    "backend_path", TestHelpers.getResourcePath(backendPath).toString()));
-            break;
-          }
+          options.addQnn(Collections.singletonMap("backend_type", "cpu"));
+          break;
         case VITIS_AI:
         case RK_NPU:
         case MI_GRAPH_X:
 
@@ -310,9 +310,15 @@ export declare namespace InferenceSession {
   export interface QnnExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'qnn';
     /**
-     * Specify a path to the QnnHtp.dll file.
+     * Specify the QNN backend type. E.g., 'cpu' or 'htp'.
+     * Mutually exclusive with `backendPath`.
      *
-     * @default 'QnnHtp.dll'
+     * @default 'htp'
+     */
+    backendType?: string;
+    /**
+     * Specify a path to the QNN backend library.
+     * Mutually exclusive with `backendType`.
      */
     backendPath?: string;
     /**
 
@@ -83,6 +83,14 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions& sess
 #endif
 #ifdef USE_QNN
       if (name == "qnn") {
+        Napi::Value backend_type = obj.Get("backendType");
+        if (!backend_type.IsUndefined()) {
+          if (backend_type.IsString()) {
+            qnn_options["backend_type"] = backend_type.As<Napi::String>().Utf8Value();
+          } else {
+            ORT_NAPI_THROW_TYPEERROR(epList.Env(), "Invalid argument: backendType must be a string.");
+          }
+        }
         Napi::Value backend_path = obj.Get("backendPath");
         if (!backend_path.IsUndefined()) {
           if (backend_path.IsString()) {
@@ -136,11 +144,6 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions& sess
 #endif
 #ifdef USE_QNN
     } else if (name == "qnn") {
-      // Ensure that the backend_path option are set to default values if not provided.
-      if (qnn_options.find("backend_path") == qnn_options.end()) {
-        qnn_options["backend_path"] = "QnnHtp.dll";
-      }
-
       sessionOptions.AppendExecutionProvider("QNN", qnn_options);
 #endif
     } else {
 
@@ -4,6 +4,8 @@
 #include "qnn_execution_provider.h"
 
 #include <filesystem>
+#include <optional>
+#include <string_view>
 #include <unordered_set>
 
 #include "core/providers/qnn/ort_api.h"
@@ -22,6 +24,60 @@ namespace onnxruntime {
 
 constexpr const char* QNN = "QNN";
 
+static std::string MakeSharedLibraryPath(std::string_view name) {
+#if defined(_WIN32)
+  return MakeString(name, ".dll");
+#else
+  return MakeString("lib", name, ".so");
+#endif
+}
+
+const std::string kDefaultCpuBackendPath = MakeSharedLibraryPath("QnnCpu");
+const std::string kDefaultGpuBackendPath = MakeSharedLibraryPath("QnnGpu");
+const std::string kDefaultHtpBackendPath = MakeSharedLibraryPath("QnnHtp");
+const std::string kDefaultSaverBackendPath = MakeSharedLibraryPath("QnnSaver");
+
+static bool ParseBackendTypeName(std::string_view backend_type_name, std::string& backend_path) {
+  constexpr std::string_view kCpuBackendTypeName{"cpu"};
+  constexpr std::string_view kGpuBackendTypeName{"gpu"};
+  constexpr std::string_view kHtpBackendTypeName{"htp"};
+  constexpr std::string_view kSaverBackendTypeName{"saver"};
+
+  constexpr std::array kAllowedBackendTypeNames{
+      kCpuBackendTypeName,
+      kGpuBackendTypeName,
+      kHtpBackendTypeName,
+      kSaverBackendTypeName,
+  };
+
+  std::optional<std::string> associated_backend_path{};
+  if (backend_type_name == kCpuBackendTypeName) {
+    associated_backend_path = kDefaultCpuBackendPath;
+  } else if (backend_type_name == kGpuBackendTypeName) {
+    associated_backend_path = kDefaultGpuBackendPath;
+  } else if (backend_type_name == kHtpBackendTypeName) {
+    associated_backend_path = kDefaultHtpBackendPath;
+  } else if (backend_type_name == kSaverBackendTypeName) {
+    associated_backend_path = kDefaultSaverBackendPath;
+  }
+
+  if (associated_backend_path.has_value()) {
+    backend_path = std::move(*associated_backend_path);
+    return true;
+  }
+
+  std::ostringstream warning{};
+  warning << "Invalid backend type name: " << backend_type_name << ". Allowed backend type names: ";
+  for (size_t i = 0; i < kAllowedBackendTypeNames.size(); ++i) {
+    warning << kAllowedBackendTypeNames[i];
+    if (i + 1 < kAllowedBackendTypeNames.size()) {
+      warning << ", ";
+    }
+  }
+  LOGS_DEFAULT(WARNING) << warning.str();
+  return false;
+}
+
 static void ParseProfilingLevel(std::string profiling_level_string,
                                 qnn::ProfilingLevel& profiling_level) {
   std::transform(profiling_level_string.begin(),
@@ -201,15 +257,39 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
     LOGS_DEFAULT(VERBOSE) << "User specified option - stop share EP contexts across sessions: " << stop_share_ep_contexts_;
   }
 
-  static const std::string BACKEND_PATH = "backend_path";
-  auto backend_path_pos = provider_options_map.find(BACKEND_PATH);
+  std::string backend_path{};
+  {
+    std::optional<std::string> backend_path_from_options{};
+
+    static const std::string BACKEND_TYPE = "backend_type";
+    static const std::string BACKEND_PATH = "backend_path";
 
-  std::string backend_path;
-  if (backend_path_pos != provider_options_map.end()) {
-    backend_path = backend_path_pos->second;
-    LOGS_DEFAULT(VERBOSE) << "Backend path: " << backend_path;
-  } else {
-    LOGS_DEFAULT(ERROR) << "No backend path provided.";
+    auto backend_type_it = provider_options_map.find(BACKEND_TYPE);
+    auto backend_path_it = provider_options_map.find(BACKEND_PATH);
+
+    if (backend_type_it != provider_options_map.end() && backend_path_it != provider_options_map.end()) {
+      ORT_THROW("Only one of '", BACKEND_TYPE, "' and '", BACKEND_PATH, "' should be set.");
+    }
+
+    if (backend_type_it != provider_options_map.end()) {
+      if (std::string parsed_backend_path; ParseBackendTypeName(backend_type_it->second, parsed_backend_path)) {
+        backend_path_from_options = parsed_backend_path;
+      } else {
+        LOGS_DEFAULT(ERROR) << "Failed to parse '" << BACKEND_TYPE << "' value.";
+      }
+    } else if (backend_path_it != provider_options_map.end()) {
+      backend_path_from_options = backend_path_it->second;
+    }
+
+    if (backend_path_from_options.has_value()) {
+      backend_path = std::move(*backend_path_from_options);
+    } else {
+      const auto& default_backend_path = kDefaultHtpBackendPath;
+      backend_path = default_backend_path;
+      LOGS_DEFAULT(WARNING) << "Unable to determine backend path from provider options. Using default.";
+    }
+
+    LOGS_DEFAULT(VERBOSE) << "Using backend path: " << backend_path;
   }
 
   std::string profiling_file_path;