@@ -3646,64 +3646,96 @@ struct OrtApi {
36463646 * that should be used to add it.
36473647 *
36483648 * QNN supported keys:
3649- * "backend_path": file path to QNN backend library.
3650- * "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off.
3649+ * "backend_type": Type of QNN backend. Specifies a backend path that is the associated QNN backend library file
3650+ * name. E.g., given backend type "htp", on Windows, the backend path would be "QnnHtp.dll", and on other
3651+ * platforms, it would be "libQnnHtp.so". Mutually exclusive with "backend_path".
3652+ * Available options:
3653+ * - "cpu"
3654+ * - "gpu"
3655+ * - "htp": Default.
3656+ * - "saver"
3657+ * "backend_path": File path to QNN backend library. Mutually exclusive with "backend_type".
3658+ * "profiling_level": QNN profiling level.
3659+ * Available options:
3660+ * - "off": Default.
3661+ * - "basic"
3662+ * - "detailed"
36513663 * "profiling_file_path": QNN profiling file path if ETW not enabled.
36523664 * "rpc_control_latency": QNN RPC control latency.
36533665 * "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
3654- * "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
3655- * "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
3666+ * "htp_performance_mode": QNN performance mode.
3667+ * Available options:
3668+ * - "burst"
3669+ * - "balanced"
3670+ * - "default": Default.
3671+ * - "high_performance"
3672+ * - "high_power_saver"
3673+ * - "low_balanced"
3674+ * - "extreme_power_saver"
3675+ * - "low_power_saver"
3676+ * - "power_saver"
3677+ * - "sustained_high_performance"
36563678 * "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
3657- * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
3658- * may alter model/EP partitioning. Use only for debugging.
3659- * "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". Default to "normal".
3660- * "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend. Available options:
3661- * - "0": Default.
3662- * - "1": Faster preparation time, less optimal graph.
3663- * - "2": Longer preparation time, more optimal graph.
3664- * - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific details.
3665- * "soc_model": The SoC model number. Refer to the QNN SDK documentation for valid values. Defaults to "0" (unknown).
3666- * "htp_arch": The minimum HTP architecture the driver will use to select compatible QNN operators. Available options:
3667- * - "0": Default (none).
3668- * - "68"
3669- * - "69"
3670- * - "73"
3671- * - "75"
3679+ * dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
3680+ * may alter model/EP partitioning. Use only for debugging.
3681+ * "qnn_context_priority": QNN context priority.
3682+ * Available options:
3683+ * - "low"
3684+ * - "normal": Default.
3685+ * - "normal_high"
3686+ * - "high"
3687+ * "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend.
3688+ * Available options:
3689+ * - "0": Default.
3690+ * - "1": Faster preparation time, less optimal graph.
3691+ * - "2": Longer preparation time, more optimal graph.
3692+ * - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific
3693+ * details.
3694+ * "soc_model": The SoC model number. Refer to the QNN SDK documentation for valid values.
3695+ * Defaults to "0" (unknown).
3696+ * "htp_arch": The minimum HTP architecture the driver will use to select compatible QNN operators.
3697+ * Available options:
3698+ * - "0": Default (none).
3699+ * - "68"
3700+ * - "69"
3701+ * - "73"
3702+ * - "75"
36723703 * "device_id": The ID of the device to use when setting 'htp_arch'. Defaults to "0" (for single device).
36733704 * "enable_htp_fp16_precision": Used for float32 model for HTP backend.
3674- * Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
3705+ * Enable the float32 model to be inferenced with fp16 precision. Otherwise, it will be fp32 precision.
36753706 * - "0": With fp32 precision.
36763707 * - "1": Default. With fp16 precision.
36773708 * "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
3678- * execution provider (typically CPU EP).
3679- * - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
3680- * - "1": Enabled. This is the default value.
3681- * "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context binary.
3682- * - "0": Default. Disabled.
3683- * - "1": Enabled.
3709+ * execution provider (typically CPU EP).
3710+ * - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
3711+ * - "1": Enabled. This is the default value.
3712+ * "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context
3713+ * binary.
3714+ * - "0": Default. Disabled.
3715+ * - "1": Enabled.
36843716 * "enable_htp_shared_memory_allocator": Enable the QNN HTP shared memory allocator. Requires libcdsprpc.so/dll to
3685- * be available.
3686- * - "0": Default. Disabled.
3687- * - "1": Enabled.
3717+ * be available.
3718+ * - "0": Default. Disabled.
3719+ * - "1": Enabled.
36883720 * "dump_json_qnn_graph": Set to "1" to dump QNN graphs generated by QNN EP as JSON files. Each graph partition
36893721 * assigned to QNN EP is dumped to a separate file.
36903722 * "json_qnn_graph_dir": Directory in which to dump QNN JSON graphs. If not specified, QNN graphs are dumped in the
36913723 * program's current working directory. Ignored if "dump_json_qnn_graph" is not set.
36923724 *
36933725 * SNPE supported keys:
36943726 * "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
3695- * "DSP", "DSP_FIXED8_TF", "AIP_FIXED_TF", "AIP_FIXED8_TF".
3696- * Mapping to SNPE Runtime_t definition: CPU, CPU_FLOAT32 => zdl::DlSystem::Runtime_t::CPU;
3697- * GPU, GPU_FLOAT32_16_HYBRID => zdl::DlSystem::Runtime_t::GPU;
3698- * GPU_FLOAT16 => zdl::DlSystem::Runtime_t::GPU_FLOAT16;
3699- * DSP, DSP_FIXED8_TF => zdl::DlSystem::Runtime_t::DSP.
3700- * AIP_FIXED_TF, AIP_FIXED8_TF => zdl::DlSystem::Runtime_t::AIP_FIXED_TF.
3727+ * "DSP", "DSP_FIXED8_TF", "AIP_FIXED_TF", "AIP_FIXED8_TF".
3728+ * Mapping to SNPE Runtime_t definition:
3729+ * CPU, CPU_FLOAT32 => zdl::DlSystem::Runtime_t::CPU;
3730+ * GPU, GPU_FLOAT32_16_HYBRID => zdl::DlSystem::Runtime_t::GPU;
3731+ * GPU_FLOAT16 => zdl::DlSystem::Runtime_t::GPU_FLOAT16;
3732+ * DSP, DSP_FIXED8_TF => zdl::DlSystem::Runtime_t::DSP.
3733+ * AIP_FIXED_TF, AIP_FIXED8_TF => zdl::DlSystem::Runtime_t::AIP_FIXED_TF.
37013734 * "priority": execution priority, options: "low", "normal".
37023735 * "buffer_type": ITensor or user buffers, options: "ITENSOR", user buffer with different types - "TF8", "TF16", "UINT8", "FLOAT".
37033736 * "ITENSOR" -- default, ITensor which is float only.
37043737 * "TF8" -- quantized model required, "FLOAT" -- for both quantized or non-quantized model
37053738 * "enable_init_cache": enable SNPE init caching feature, set to 1 to enabled it. Disabled by default.
3706- * If SNPE is not available (due to a non Snpe enabled build or its dependencies not being installed), this function will fail.
37073739 *
37083740 * XNNPACK supported keys:
37093741 * "intra_op_num_threads": number of thread-pool size to use for XNNPACK execution provider.
0 commit comments