diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index 9fd3f7f32a0..8e6174506f0 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -245,6 +245,7 @@ extern "C" {
     // Load all known backends from dynamic libraries
     GGML_API void               ggml_backend_load_all(void);
     GGML_API void               ggml_backend_load_all_from_path(const char * dir_path);
+    GGML_API void               ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *skiplist);
 
     //
     // Backend scheduler
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 0587109212e..9e9bec1171b 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -10,6 +10,7 @@
 #include <type_traits>
 #include <vector>
 #include <cctype>
+#include <unordered_set>
 
 #ifdef _WIN32
 #    define WIN32_LEAN_AND_MEAN
@@ -540,35 +541,45 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
     return get_reg().load_backend(best_path, silent);
 }
 
-void ggml_backend_load_all() {
-    ggml_backend_load_all_from_path(nullptr);
-}
+void ggml_backend_load_all() { ggml_backend_load_all_from_path(nullptr); }
+
+void ggml_backend_load_all_from_path(const char *dir_path) { ggml_backend_load_all_from_path_with_disable(dir_path, nullptr); }
 
-void ggml_backend_load_all_from_path(const char * dir_path) {
+void ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *disable_backends) {
 #ifdef NDEBUG
     bool silent = true;
 #else
     bool silent = false;
 #endif
-
-    ggml_backend_load_best("blas", silent, dir_path);
-    ggml_backend_load_best("zendnn", silent, dir_path);
-    ggml_backend_load_best("cann", silent, dir_path);
-    ggml_backend_load_best("cuda", silent, dir_path);
-    ggml_backend_load_best("hip", silent, dir_path);
-    ggml_backend_load_best("metal", silent, dir_path);
-    ggml_backend_load_best("rpc", silent, dir_path);
-    ggml_backend_load_best("sycl", silent, dir_path);
-    ggml_backend_load_best("vulkan", silent, dir_path);
-    ggml_backend_load_best("virtgpu", silent, dir_path);
-    ggml_backend_load_best("opencl", silent, dir_path);
-    ggml_backend_load_best("hexagon", silent, dir_path);
-    ggml_backend_load_best("musa", silent, dir_path);
-    ggml_backend_load_best("openvino", silent, dir_path);
-    ggml_backend_load_best("cpu", silent, dir_path);
+    const std::pair<const char*, const char*> backends[] = {
+        {"blas",    "BLAS"},
+        {"zendnn",  "ZenDNN"},
+        {"cann",    "CANN"},
+        {"cuda",    "CUDA"},
+        {"hip",     "HIP"},
+        {"metal",   "Metal"},
+        {"rpc",     "RPC"},
+        {"sycl",    "SYCL"},
+        {"vulkan",  "Vulkan"},
+        {"virtgpu", "VirtGPU"},
+        {"opencl",  "OpenCL"},
+        {"hexagon", "Hexagon"},
+        {"musa",    "MUSA"},
+        {"openvino","OpenVINO"},
+        {"cpu",     "CPU"} };
+    std::unordered_set<std::string> skiplist;
+    if (disable_backends && *disable_backends) {
+        std::stringstream ss(disable_backends);
+        std::string token;
+        while (std::getline(ss, token, ',')) { skiplist.insert(token); } }
+    for (const auto& [key, name] : backends) {
+        if(skiplist.find(key) != skiplist.end()) { fprintf(stderr, "INFO: Skipping backend '%s'.\n", name); continue; }
+        fprintf(stderr, "INFO: Loading backend '%s'.\n", name);
+        ggml_backend_load_best(key, silent, dir_path); }
     // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
     const char * backend_path = std::getenv("GGML_BACKEND_PATH");
     if (backend_path) {
         ggml_backend_load(backend_path);
     }
 }
+
diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
index beebc4760d2..5a9b25f910e 100644
--- a/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
@@ -311,7 +311,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
             endif()
         else ()
             if (GGML_NATIVE)
-                list(APPEND ARCH_FLAGS -march=native)
+               string(REGEX MATCH "-march=[^ ]+" MARCH_MATCH "${CMAKE_C_FLAGS} ${CMAKE_CXX_FLAGS}")
+               if(NOT MARCH_MATCH)
+                  list(APPEND ARCH_FLAGS -march=native)
+               endif()
             else ()
                 if (GGML_SSE42)
                     list(APPEND ARCH_FLAGS -msse4.2)
diff --git a/include/whisper.h b/include/whisper.h
index f4cc6bf7abd..44973ef30d2 100644
--- a/include/whisper.h
+++ b/include/whisper.h
@@ -117,6 +117,7 @@ extern "C" {
         bool  use_gpu;
         bool  flash_attn;
         int   gpu_device;  // CUDA device
+        char  *disable_backends;
 
         // [EXPERIMENTAL] Token-level timestamps with DTW
         bool dtw_token_timestamps;
@@ -734,6 +735,7 @@ extern "C" {
     // Get the no_speech probability for the specified segment
     WHISPER_API float whisper_full_get_segment_no_speech_prob           (struct whisper_context * ctx, int i_segment);
     WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/whisper.cpp b/src/whisper.cpp
index 86bfafeaad8..1943fd8b949 100644
--- a/src/whisper.cpp
+++ b/src/whisper.cpp
@@ -32,6 +32,7 @@
 #include <set>
 #include <string>
 #include <thread>
+#include <unordered_set>
 #include <vector>
 
 #ifdef _MSC_VER
@@ -212,6 +213,17 @@ static bool ggml_graph_compute_helper(
     return t;
 }
 
+static void whisper_load_backends(const char *disable_backends) {
+#ifdef GGML_BACKEND_DL
+    static std::once_flag flag;
+    std::call_once(flag, [disable_backends]() {
+        ggml_backend_load_all_from_path_with_disable(nullptr, disable_backends);
+    });
+#else
+    (void)disable_backends;
+#endif
+}
+
 // TODO: move these functions to ggml-base with support for ggml-backend?
 
 static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
@@ -1290,6 +1302,8 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
 static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
     ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);
 
+    whisper_load_backends(params.disable_backends);
+
     ggml_backend_dev_t dev = nullptr;
 
     int cnt = 0;
@@ -3608,6 +3622,7 @@ struct whisper_context_params whisper_context_default_params() {
         /*.use_gpu              =*/ true,
         /*.flash_attn           =*/ true,
         /*.gpu_device           =*/ 0,
+        /*.disable_backends     =*/ NULL,
 
         /*.dtw_token_timestamps =*/ false,
         /*.dtw_aheads_preset    =*/ WHISPER_AHEADS_NONE,
@@ -4315,6 +4330,8 @@ static int whisper_has_openvino(void) {
 const char * whisper_print_system_info(void) {
     static std::string s;
 
+    whisper_load_backends(nullptr);
+
     s  = "";
     s += "WHISPER : ";
     s += "COREML = "    + std::to_string(whisper_has_coreml())     + " | ";
@@ -8214,6 +8231,8 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
 }
 
 WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
+    whisper_load_backends(nullptr);
+
     static std::string s;
     s = "";
     char strbuf[256];
@@ -8992,3 +9011,4 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
     fputs(text, stderr);
     fflush(stderr);
 }
+