Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ extern "C" {
// Load all known backends from dynamic libraries
GGML_API void ggml_backend_load_all(void);
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
GGML_API void ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *skiplist);

//
// Backend scheduler
Expand Down
51 changes: 31 additions & 20 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <type_traits>
#include <vector>
#include <cctype>
#include <unordered_set>

#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
Expand Down Expand Up @@ -540,35 +541,45 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
return get_reg().load_backend(best_path, silent);
}

void ggml_backend_load_all() {
ggml_backend_load_all_from_path(nullptr);
}
void ggml_backend_load_all() { ggml_backend_load_all_from_path(nullptr); }

void ggml_backend_load_all_from_path(const char *dir_path) { ggml_backend_load_all_from_path_with_disable(dir_path, nullptr); }

void ggml_backend_load_all_from_path(const char * dir_path) {
void ggml_backend_load_all_from_path_with_disable(const char *dir_path, const char *disable_backends) {
#ifdef NDEBUG
bool silent = true;
#else
bool silent = false;
#endif

ggml_backend_load_best("blas", silent, dir_path);
ggml_backend_load_best("zendnn", silent, dir_path);
ggml_backend_load_best("cann", silent, dir_path);
ggml_backend_load_best("cuda", silent, dir_path);
ggml_backend_load_best("hip", silent, dir_path);
ggml_backend_load_best("metal", silent, dir_path);
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("virtgpu", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
ggml_backend_load_best("openvino", silent, dir_path);
ggml_backend_load_best("cpu", silent, dir_path);
const std::pair<const char*, const char*> backends[] = {
{"blas", "BLAS"},
{"zendnn", "ZenDNN"},
{"cann", "CANN"},
{"cuda", "CUDA"},
{"hip", "HIP"},
{"metal", "Metal"},
{"rpc", "RPC"},
{"sycl", "SYCL"},
{"vulkan", "Vulkan"},
{"virtgpu", "VirtGPU"},
{"opencl", "OpenCL"},
{"hexagon", "Hexagon"},
{"musa", "MUSA"},
{"openvino","OpenVINO"},
{"cpu", "CPU"} };
std::unordered_set<std::string> skiplist;
if (disable_backends && *disable_backends) {
std::stringstream ss(disable_backends);
std::string token;
while (std::getline(ss, token, ',')) { skiplist.insert(token); } }
for (const auto& [key, name] : backends) {
if(skiplist.find(key) != skiplist.end()) { fprintf(stderr, "INFO: Skipping backend '%s'.\n", name); continue; }
fprintf(stderr, "INFO: Loading backend '%s'.\n", name);
ggml_backend_load_best(key, silent, dir_path); }
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
const char * backend_path = std::getenv("GGML_BACKEND_PATH");
if (backend_path) {
ggml_backend_load(backend_path);
}
}

5 changes: 4 additions & 1 deletion ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
else ()
if (GGML_NATIVE)
list(APPEND ARCH_FLAGS -march=native)
string(REGEX MATCH "-march=[^ ]+" MARCH_MATCH "${CMAKE_C_FLAGS} ${CMAKE_CXX_FLAGS}")
if(NOT MARCH_MATCH)
list(APPEND ARCH_FLAGS -march=native)
endif()
else ()
if (GGML_SSE42)
list(APPEND ARCH_FLAGS -msse4.2)
Expand Down
2 changes: 2 additions & 0 deletions include/whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ extern "C" {
bool use_gpu;
bool flash_attn;
int gpu_device; // CUDA device
char *disable_backends;

// [EXPERIMENTAL] Token-level timestamps with DTW
bool dtw_token_timestamps;
Expand Down Expand Up @@ -734,6 +735,7 @@ extern "C" {
// Get the no_speech probability for the specified segment
WHISPER_API float whisper_full_get_segment_no_speech_prob (struct whisper_context * ctx, int i_segment);
WHISPER_API float whisper_full_get_segment_no_speech_prob_from_state(struct whisper_state * state, int i_segment);

#ifdef __cplusplus
}
#endif
Expand Down
20 changes: 20 additions & 0 deletions src/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <set>
#include <string>
#include <thread>
#include <unordered_set>
#include <vector>

#ifdef _MSC_VER
Expand Down Expand Up @@ -212,6 +213,17 @@ static bool ggml_graph_compute_helper(
return t;
}

static void whisper_load_backends(const char *disable_backends) {
#ifdef GGML_BACKEND_DL
static std::once_flag flag;
std::call_once(flag, [disable_backends]() {
ggml_backend_load_all_from_path_with_disable(nullptr, disable_backends);
});
#else
(void)disable_backends;
#endif
}

// TODO: move these functions to ggml-base with support for ggml-backend?

static ggml_tensor * whisper_set_f32(struct ggml_tensor * t, float v) {
Expand Down Expand Up @@ -1290,6 +1302,8 @@ static size_t aheads_masks_nbytes(struct whisper_aheads_masks & aheads_masks) {
static ggml_backend_t whisper_backend_init_gpu(const whisper_context_params & params) {
ggml_log_set(g_state.log_callback, g_state.log_callback_user_data);

whisper_load_backends(params.disable_backends);

ggml_backend_dev_t dev = nullptr;

int cnt = 0;
Expand Down Expand Up @@ -3608,6 +3622,7 @@ struct whisper_context_params whisper_context_default_params() {
/*.use_gpu =*/ true,
/*.flash_attn =*/ true,
/*.gpu_device =*/ 0,
/*.disable_backends =*/ NULL,

/*.dtw_token_timestamps =*/ false,
/*.dtw_aheads_preset =*/ WHISPER_AHEADS_NONE,
Expand Down Expand Up @@ -4315,6 +4330,8 @@ static int whisper_has_openvino(void) {
const char * whisper_print_system_info(void) {
static std::string s;

whisper_load_backends(nullptr);

s = "";
s += "WHISPER : ";
s += "COREML = " + std::to_string(whisper_has_coreml()) + " | ";
Expand Down Expand Up @@ -8214,6 +8231,8 @@ WHISPER_API int whisper_bench_ggml_mul_mat(int n_threads) {
}

WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
whisper_load_backends(nullptr);

static std::string s;
s = "";
char strbuf[256];
Expand Down Expand Up @@ -8992,3 +9011,4 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
fputs(text, stderr);
fflush(stderr);
}