Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ if (WHISPER_COMMON_FFMPEG)
endif()


# add json lib (used by the HF cache/download subsystem)
add_library(json_cpp INTERFACE)
target_include_directories(json_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})

add_library(${TARGET} STATIC
common.h
common.cpp
Expand All @@ -56,12 +60,30 @@ add_library(${TARGET} STATIC
common-whisper.cpp
grammar-parser.h
grammar-parser.cpp
http.h
hf-cache.h
hf-cache.cpp
${COMMON_SOURCES_FFMPEG}
)

include(DefaultTargetOptions)

target_link_libraries(${TARGET} PRIVATE whisper ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})
# the ported HF cache subsystem (hf-cache.cpp) uses std::filesystem / std::string_view
target_compile_features(${TARGET} PRIVATE cxx_std_17)

# vendored cpp-httplib header lives under examples/server/ (used by http.h)
target_include_directories(${TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/server)

# HTTPS support for the HF download path (cpp-httplib + OpenSSL). Off by default;
# when OFF an https:// attempt prints a rebuild hint (see http.h).
option(WHISPER_OPENSSL "whisper: enable OpenSSL for HTTPS HuggingFace downloads" OFF)
if (WHISPER_OPENSSL)
find_package(OpenSSL REQUIRED)
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
endif()

target_link_libraries(${TARGET} PRIVATE whisper json_cpp ${COMMON_EXTRA_LIBS} ${CMAKE_DL_LIBS})

set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
Expand All @@ -85,10 +107,6 @@ if (WHISPER_SDL2)
set_target_properties(${TARGET} PROPERTIES FOLDER "libs")
endif()

# add json lib
add_library(json_cpp INTERFACE)
target_include_directories(json_cpp INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})

# examples

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
Expand Down
15 changes: 15 additions & 0 deletions examples/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ struct whisper_params {
std::string prompt;
std::string font_path = "/System/Library/Fonts/Supplemental/Courier New Bold.ttf";
std::string model = "models/ggml-base.en.bin";
std::string hf_repo;
std::string hf_file;
std::string grammar;
std::string grammar_rule;

Expand Down Expand Up @@ -199,6 +201,8 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if ( arg == "--prompt") { params.prompt = ARGV_NEXT; }
else if ( arg == "--carry-initial-prompt") { params.carry_initial_prompt = true; }
else if (arg == "-m" || arg == "--model") { params.model = ARGV_NEXT; }
else if (arg == "-hf" || arg == "--hf-repo") { params.hf_repo = ARGV_NEXT; }
else if (arg == "-hff" || arg == "--hf-file") { params.hf_file = ARGV_NEXT; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(ARGV_NEXT); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = ARGV_NEXT; }
else if (arg == "-dtw" || arg == "--dtw") { params.dtw = ARGV_NEXT; }
Expand Down Expand Up @@ -282,6 +286,8 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt (max n_text_ctx/2 tokens)\n", params.prompt.c_str());
fprintf(stderr, " --carry-initial-prompt [%-7s] always prepend initial prompt\n", params.carry_initial_prompt ? "true" : "false");
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -hf REPO, --hf-repo REPO [%-7s] HuggingFace repo (org/repo) to resolve from cache\n", params.hf_repo.c_str());
fprintf(stderr, " -hff FILE, --hf-file FILE [%-7s] file within the HuggingFace repo (e.g. ggml-base.en.bin)\n", params.hf_file.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input audio file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -dtw MODEL --dtw MODEL [%-7s] compute token-level timestamps\n", params.dtw.c_str());
Expand Down Expand Up @@ -1070,6 +1076,15 @@ int main(int argc, char ** argv) {
}
}

// resolve HF repo-id -> cached model path if -hf given and -m was left at its default
if (!params.hf_repo.empty() && params.model == "models/ggml-base.en.bin") {
params.model = whisper_hf_resolve_model(params.hf_repo, params.hf_file);
if (params.model.empty()) {
// whisper_hf_resolve_model prints a specific diagnostic for every failure mode
return 3;
}
}

struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);

if (ctx == nullptr) {
Expand Down
112 changes: 112 additions & 0 deletions examples/common-whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "common-whisper.h"

#include "common.h"
#include "hf-cache.h"

#include "whisper.h"

Expand Down Expand Up @@ -31,7 +32,10 @@
#include <io.h>
#endif

#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>

#ifdef WHISPER_COMMON_FFMPEG
Expand Down Expand Up @@ -243,5 +247,113 @@ bool speak_with_file(const std::string & command, const std::string & text, cons
return true;
}

// filename test for whisper GGML models: ggml-*.bin
static bool whisper_hf_is_ggml_bin(const std::string & name) {
return name.rfind("ggml-", 0) == 0 && name.size() >= 4 &&
name.compare(name.size() - 4, 4, ".bin") == 0;
}

// pick the primary file from a listing: exact hf_file match, else the first ggml-*.bin
static const hf_cache::hf_file * whisper_hf_pick_primary(const hf_cache::hf_files & files, const std::string & hf_file) {
for (const auto & file : files) {
if (!hf_file.empty()) {
if (file.path == hf_file) {
return &file;
}
} else {
const std::string name = std::filesystem::path(file.path).filename().string();
if (whisper_hf_is_ggml_bin(name)) {
return &file;
}
}
}
return nullptr;
}

// collect the entries whose filename matches ggml-*.bin
static hf_cache::hf_files whisper_hf_ggml_candidates(const hf_cache::hf_files & files) {
hf_cache::hf_files out;
for (const auto & file : files) {
const std::string name = std::filesystem::path(file.path).filename().string();
if (whisper_hf_is_ggml_bin(name)) {
out.push_back(file);
}
}
return out;
}

// print an error message followed by the sorted list of candidate filenames
static void whisper_hf_print_candidates(const std::string & msg, const hf_cache::hf_files & candidates) {
fprintf(stderr, "%s\n", msg.c_str());
std::vector<std::string> names;
for (const auto & file : candidates) {
names.push_back(std::filesystem::path(file.path).filename().string());
}
std::sort(names.begin(), names.end());
for (const auto & name : names) {
fprintf(stderr, " - %s\n", name.c_str());
}
}

std::string whisper_hf_resolve_model(const std::string & hf_repo, const std::string & hf_file) {
const char * token_env = std::getenv("HF_TOKEN");
const std::string token = token_env ? token_env : "";

// honor an HF offline mode (huggingface_hub convention): skip the network path entirely
const char * offline_env = std::getenv("HF_HUB_OFFLINE");
const bool offline = offline_env && *offline_env && std::string(offline_env) != "0";

// -hf alone (no --hf-file): cache-first, and refuse ambiguity rather than guess.
if (hf_file.empty()) {
const hf_cache::hf_files cached = whisper_hf_ggml_candidates(hf_cache::get_cached_files(hf_repo));
if (cached.size() == 1) {
return hf_cache::finalize_file(cached.front());
}
if (cached.size() > 1) {
whisper_hf_print_candidates(
"error: multiple models cached for " + hf_repo + "; specify one with -hff/--hf-file:", cached);
return "";
}

// none cached
if (offline) {
fprintf(stderr, "error: %s not found in HF cache\n", hf_repo.c_str());
return "";
}

const hf_cache::hf_files remote = whisper_hf_ggml_candidates(hf_cache::get_repo_files(hf_repo, token));
if (remote.empty()) {
fprintf(stderr, "error: no models found in %s\n", hf_repo.c_str());
return "";
}
// don't auto-pick/download a multi-model repo; list what's available instead
whisper_hf_print_candidates(
"error: multiple models available in " + hf_repo + "; specify one with -hff/--hf-file:", remote);
return "";
}

// explicit --hf-file: download-first with cache fall-back (Phase 2, unchanged).

// 1. try download first: list the repo over the network and fetch the primary file.
// get_repo_files swallows network errors into an empty result (graceful degradation).
if (!offline) {
const hf_cache::hf_files remote = hf_cache::get_repo_files(hf_repo, token);
if (const hf_cache::hf_file * primary = whisper_hf_pick_primary(remote, hf_file)) {
if (hf_cache::download_file(*primary, token)) {
return hf_cache::finalize_file(*primary);
}
}
}

// 2. fall back to the on-disk HF hub cache scan (Phase 1 behavior).
const hf_cache::hf_files cached = hf_cache::get_cached_files(hf_repo);
if (const hf_cache::hf_file * primary = whisper_hf_pick_primary(cached, hf_file)) {
return hf_cache::finalize_file(*primary);
}

fprintf(stderr, "error: file '%s' not found in %s (cache or network)\n", hf_file.c_str(), hf_repo.c_str());
return "";
}

#undef STB_VORBIS_HEADER_ONLY
#include "stb_vorbis.c"
4 changes: 4 additions & 0 deletions examples/common-whisper.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ int utf8_trailing_bytes_needed(const std::string & s);

// write text to file, and call system("command voice_id file")
bool speak_with_file(const std::string & command, const std::string & text, const std::string & path, int voice_id);

// returns a concrete model path, or "" if the repo/file is not resolvable from the local cache.
// Phase 1: cache-only (get_cached_files + finalize_file). Phase 2 adds download.
std::string whisper_hf_resolve_model(const std::string & hf_repo, const std::string & hf_file);
Loading