diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 9fd3f7f32a0..8e6174506f0 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -245,6 +245,7 @@ extern "C" { // Load all known backends from dynamic libraries GGML_API void ggml_backend_load_all(void); GGML_API void ggml_backend_load_all_from_path(const char * dir_path); + GGML_API void ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *skiplist); // // Backend scheduler diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 0587109212e..9e9bec1171b 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef _WIN32 # define WIN32_LEAN_AND_MEAN @@ -540,35 +541,45 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent, return get_reg().load_backend(best_path, silent); } -void ggml_backend_load_all() { - ggml_backend_load_all_from_path(nullptr); -} +void ggml_backend_load_all() { ggml_backend_load_all_from_path(nullptr); } + +void ggml_backend_load_all_from_path(const char *dir_path) { ggml_backend_load_all_from_path_with_disable(dir_path, nullptr); } -void ggml_backend_load_all_from_path(const char * dir_path) { +void ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *disable_backends) { #ifdef NDEBUG bool silent = true; #else bool silent = false; #endif - - ggml_backend_load_best("blas", silent, dir_path); - ggml_backend_load_best("zendnn", silent, dir_path); - ggml_backend_load_best("cann", silent, dir_path); - ggml_backend_load_best("cuda", silent, dir_path); - ggml_backend_load_best("hip", silent, dir_path); - ggml_backend_load_best("metal", silent, dir_path); - ggml_backend_load_best("rpc", silent, dir_path); - ggml_backend_load_best("sycl", silent, dir_path); - ggml_backend_load_best("vulkan", silent, dir_path); - ggml_backend_load_best("virtgpu", silent, dir_path); - ggml_backend_load_best("opencl", silent, dir_path); - ggml_backend_load_best("hexagon", silent, dir_path); - ggml_backend_load_best("musa", silent, dir_path); - ggml_backend_load_best("openvino", silent, dir_path); - ggml_backend_load_best("cpu", silent, dir_path); + const std::pair backends[] = { + {"blas", "BLAS"}, + {"zendnn", "ZenDNN"}, + {"cann", "CANN"}, + {"cuda", "CUDA"}, + {"hip", "HIP"}, + {"metal", "Metal"}, + {"rpc", "RPC"}, + {"sycl", "SYCL"}, + {"vulkan", "Vulkan"}, + {"virtgpu", "VirtGPU"}, + {"opencl", "OpenCL"}, + {"hexagon", "Hexagon"}, + {"musa", "MUSA"}, + {"openvino","OpenVINO"}, + {"cpu", "CPU"} }; + std::unordered_set skiplist; + if (disable_backends && *disable_backends) { + std::stringstream ss(disable_backends); + std::string token; + while (std::getline(ss, token, ',')) { skiplist.insert(token); } } + for (const auto& [key, name] : backends) { + if(skiplist.find(key) != skiplist.end()) { fprintf(stderr, "INFO: Skipping backend '%s'.\n", name); continue; } + fprintf(stderr, "INFO: Loading backend '%s'.\n", name); + ggml_backend_load_best(key, silent, dir_path); } // check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend const char * backend_path = std::getenv("GGML_BACKEND_PATH"); if (backend_path) { ggml_backend_load(backend_path); } } + diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index beebc4760d2..5a9b25f910e 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -311,7 +311,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name) endif() else () if (GGML_NATIVE) - list(APPEND ARCH_FLAGS -march=native) + string(REGEX MATCH "-march=[^ ]+" MARCH_MATCH "${CMAKE_C_FLAGS} ${CMAKE_CXX_FLAGS}") + if(NOT MARCH_MATCH) + list(APPEND ARCH_FLAGS -march=native) + endif() else () if (GGML_SSE42) list(APPEND ARCH_FLAGS -msse4.2) diff --git a/include/whisper.h b/include/whisper.h index f4cc6bf7abd..44973ef30d2 100644 --- a/include/whisper.h +++ b/include/whisper.h @@ -117,6 +117,7 @@ extern "C" { bool use_gpu; bool flash_attn; int gpu_device; // CUDA device + char *disable_backends; // [EXPERIMENTAL] Token-level timestamps with DTW bool dtw_token_timestamps; @@ -734,6 +735,7 @@ extern "C" { // Get the no_speech probability for the specified segment WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment); WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment); + #ifdef __cplusplus } #endif diff --git a/src/whisper.cpp b/src/whisper.cpp index 86bfafeaad8..1943fd8b949 100644 --- a/src/whisper.cpp +++ b/src/whisper.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #ifdef _MSC_VER @@ -212,6 +213,17 @@ static bool ggml_graph_compute_helper( return t; } +static void whisper_load_backends(const char *disable_backends) { +#ifdef GGML_BACKEND_DL + static std::once_flag flag; + std::call_once(flag, [disable_backends]() { + ggml_backend_load_all_from_path_with_disable(nullptr, disable_backends); + }); +#else + (void)disable_backends; +#endif +} + // TODO: move these functions to ggml-base with support for ggml-backend? static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) { @@ -1290,6 +1302,8 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) { static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) { ggml_log_set(g_state.log_callback, g_state.log_callback_user_data); + whisper_load_backends(params.disable_backends); + ggml_backend_dev_t dev = nullptr; int cnt = 0; @@ -3608,6 +3622,7 @@ struct whisper_context_params whisper_context_default_params() { /*.use_gpu =*/ true, /*.flash_attn =*/ true, /*.gpu_device =*/ 0, + /*.disable_backends =*/ NULL, /*.dtw_token_timestamps =*/ false, /*.dtw_aheads_preset =*/ WHISPER_AHEADS_NONE, @@ -4315,6 +4330,8 @@ static int whisper_has_openvino(void) { const char * whisper_print_system_info(void) { static std::string s; + whisper_load_backends(nullptr); + s = ""; s += "WHISPER : "; s += "COREML = " + std::to_string(whisper_has_coreml()) + " | "; @@ -8214,6 +8231,8 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) { } WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) { + whisper_load_backends(nullptr); + static std::string s; s = ""; char strbuf[256]; @@ -8992,3 +9011,4 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text fputs(text, stderr); fflush(stderr); } +