Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ui-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
ui-checks:
name: UI Checks
needs: ui-build
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-slim
continue-on-error: true
steps:
- name: Checkout code
Expand Down Expand Up @@ -93,7 +93,7 @@ jobs:
e2e-tests:
name: E2E Tests
needs: ui-build
runs-on: ubuntu-24.04-arm
runs-on: ubuntu-slim
steps:
- name: Checkout code
uses: actions/checkout@v6
Expand Down
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
/common/fit.* @JohannesGaessler
/common/jinja/ @CISC
/common/ngram-map.* @srogmann
/conversion/ @CISC
/convert_*.py @CISC
/docs/backend/snapdragon/ @ggml-org/ggml-hexagon
/examples/batched.swift/ @ggerganov
Expand Down
7 changes: 0 additions & 7 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "chat.h"
#include "common.h"
#include "download.h"
#include "hf-cache.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "sampling.h"
Expand Down Expand Up @@ -586,12 +585,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
// parse the first time to get -hf option (used for remote preset)
parse_cli_args();

// TODO: Remove later
try {
hf_cache::migrate_old_cache_to_hf_cache(params.hf_token, params.offline);
} catch (const std::exception & e) {
LOG_WRN("HF cache migration failed: %s\n", e.what());
}
// export_graph_ops loads only metadata
const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;

Expand Down
274 changes: 0 additions & 274 deletions common/hf-cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <filesystem>
#include <fstream>
#include <atomic>
#include <regex> // migration only
#include <string>
#include <string_view>
#include <stdexcept>
Expand Down Expand Up @@ -336,15 +335,9 @@ hf_files get_repo_files(const std::string & repo_id,
if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
file.oid = item["lfs"]["oid"].get<std::string>();
}
if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
file.size = item["lfs"]["size"].get<size_t>();
}
} else if (item.contains("oid") && item["oid"].is_string()) {
file.oid = item["oid"].get<std::string>();
}
if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
file.size = item["size"].get<size_t>();
}

if (!file.oid.empty() && !is_valid_oid(file.oid)) {
LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
Expand Down Expand Up @@ -502,271 +495,4 @@ std::string finalize_file(const hf_file & file) {
return file.final_path;
}

// delete everything after this line, one day

// copied from download.cpp without the tag part
struct gguf_split_info {
std::string prefix; // tag included
int index;
int count;
};

static gguf_split_info get_gguf_split_info(const std::string & path) {
static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase);
std::smatch m;

std::string prefix = path;
if (!string_remove_suffix(prefix, ".gguf")) {
return {};
}

int index = 1;
int count = 1;

if (std::regex_match(prefix, m, re_split)) {
index = std::stoi(m[2].str());
count = std::stoi(m[3].str());
prefix = m[1].str();
}

return {std::move(prefix), index, count};
}

static std::pair<std::string, std::string> parse_manifest_name(std::string & filename) {
static const std::regex re(R"(^manifest=([^=]+)=([^=]+)=.*\.json$)");
std::smatch match;
if (std::regex_match(filename, match, re)) {
return {match[1].str(), match[2].str()};
}
return {};
}

static std::string make_old_cache_filename(const std::string & owner,
const std::string & repo,
const std::string & filename) {
auto result = owner + "_" + repo + "_" + filename;
string_replace_all(result, "/", "_");
return result;
}

struct migrate_file {
std::string path;
std::string sha256;
size_t size;
fs::path old_path;
fs::path etag_path;
const hf_file * file;
};

using migrate_files = std::vector<migrate_file>;

static bool collect_file(const fs::path & old_cache,
const std::string & owner,
const std::string & repo,
const std::string & path,
const std::string & sha256,
const hf_files & files,
migrate_files & to_migrate) {

const hf_file * file = nullptr;

for (const auto & f : files) {
if (f.path == path) {
file = &f;
break;
}
}

std::string old_filename = make_old_cache_filename(owner, repo, path);
fs::path old_path = old_cache / old_filename;
fs::path etag_path = old_path.string() + ".etag";

if (!fs::exists(old_path)) {
if (file && fs::exists(file->final_path)) {
return true;
}
LOG_WRN("%s: %s not found in old cache or HF cache\n", __func__, old_filename.c_str());
return false;
}

if (!file) {
LOG_WRN("%s: %s not found in current repo\n", __func__, old_filename.c_str());
return false;
}

if (!sha256.empty() && !file->oid.empty() && sha256 != file->oid) {
LOG_WRN("%s: %s is not up to date (sha256 mismatch)\n", __func__, old_filename.c_str());
return false;
}

if (file->size > 0) {
size_t size = fs::file_size(old_path);
if (size != file->size) {
LOG_WRN("%s: %s has wrong size %zu (expected %zu)\n", __func__, old_filename.c_str(), size, file->size);
return false;
}
}

to_migrate.push_back({path, sha256, file->size, old_path, etag_path, file});
return true;
}

static bool collect_files(const fs::path & old_cache,
const std::string & owner,
const std::string & repo,
const nl::json & node,
const hf_files & files,
migrate_files & to_migrate) {

if (!node.contains("rfilename") ||
!node.contains("lfs") ||
!node["lfs"].contains("sha256")) {
return true;
}

std::string path = node["rfilename"];
std::string sha256 = node["lfs"]["sha256"];

auto split = get_gguf_split_info(path);

if (split.count <= 1) {
return collect_file(old_cache, owner, repo, path, sha256, files, to_migrate);
}

std::vector<std::pair<std::string, std::string>> splits;

for (const auto & f : files) {
auto split_f = get_gguf_split_info(f.path);
if (split_f.count == split.count && split_f.prefix == split.prefix) {
// sadly the manifest only provides the sha256 of the first file (index == 1)
// the rest will be verified using the size...
std::string f_sha256 = (split_f.index == 1) ? sha256 : "";
splits.emplace_back(f.path, f_sha256);
}
}

if ((int)splits.size() != split.count) {
LOG_WRN("%s: expected %d split files but found %d in repo\n", __func__, split.count, (int)splits.size());
return false;
}

for (const auto & [f_path, f_sha256] : splits) {
if (!collect_file(old_cache, owner, repo, f_path, f_sha256, files, to_migrate)) {
return false;
}
}

return true;
}

static bool migrate_file(const migrate_file & file) {
std::error_code ec;

fs::path new_path(file.file->local_path);
fs::create_directories(new_path.parent_path(), ec);

if (!fs::exists(new_path, ec)) {
fs::rename(file.old_path, new_path, ec);
if (ec) {
fs::copy_file(file.old_path, new_path, ec);
if (ec) {
LOG_ERR("%s: failed to move/copy %s: %s\n", __func__, file.old_path.string().c_str(), ec.message().c_str());
return false;
}
}
fs::remove(file.old_path, ec);
}
fs::remove(file.etag_path, ec);

std::string filename = finalize_file(*file.file);
LOG_INF("%s: migrated %s -> %s\n", __func__, file.old_path.filename().string().c_str(), filename.c_str());
return true;
}

void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) {
fs::path old_cache = fs_get_cache_directory();
if (!fs::exists(old_cache)) {
return;
}

if (offline) {
LOG_WRN("%s: skipping migration in offline mode (will run when online)\n", __func__);
return; // -hf is not going to work
}

bool warned = false;

for (const auto & entry : fs::directory_iterator(old_cache)) {
if (!entry.is_regular_file()) {
continue;
}
auto filename = entry.path().filename().string();
auto [owner, repo] = parse_manifest_name(filename);

if (owner.empty() || repo.empty()) {
continue;
}

if (!warned) {
warned = true;
LOG_WRN("================================================================================\n"
"WARNING: Migrating cache to HuggingFace cache directory\n"
" Old cache: %s\n"
" New cache: %s\n"
"This one-time migration moves models previously downloaded with -hf\n"
"from the legacy llama.cpp cache to the standard HuggingFace cache.\n"
"Models downloaded with --model-url are not affected.\n"
"================================================================================\n",
old_cache.string().c_str(), get_cache_directory().string().c_str());
}

auto repo_id = owner + "/" + repo;
auto files = get_repo_files(repo_id, token);

if (files.empty()) {
LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str());
continue;
}

migrate_files to_migrate;
bool ok = true;

try {
std::ifstream manifest(entry.path());
auto json = nl::json::parse(manifest);
for (const char * key : {"ggufFile", "mmprojFile"}) {
if (json.contains(key)) {
if (!collect_files(old_cache, owner, repo, json[key], files, to_migrate)) {
ok = false;
break;
}
}
}
} catch (const std::exception & e) {
LOG_WRN("%s: failed to parse manifest %s: %s\n", __func__, filename.c_str(), e.what());
continue;
}

if (!ok) {
LOG_WRN("%s: migration skipped: one or more files failed validation\n", __func__);
continue;
}

for (const auto & file : to_migrate) {
if (!migrate_file(file)) {
ok = false;
break;
}
}

if (!ok) {
LOG_WRN("%s: migration failed: could not migrate all files\n", __func__);
continue;
}

LOG_INF("%s: migration complete, deleting manifest: %s\n", __func__, entry.path().string().c_str());
fs::remove(entry.path());
}
}

} // namespace hf_cache
4 changes: 0 additions & 4 deletions common/hf-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ struct hf_file {
std::string final_path;
std::string oid;
std::string repo_id;
size_t size = 0; // only for the migration
};

using hf_files = std::vector<hf_file>;
Expand All @@ -30,7 +29,4 @@ hf_files get_cached_files(const std::string & repo_id = {});
// Create snapshot path (link or move/copy) and return it
std::string finalize_file(const hf_file & file);

// TODO: Remove later
void migrate_old_cache_to_hf_cache(const std::string & token, bool offline = false);

} // namespace hf_cache
5 changes: 3 additions & 2 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ llama_context::llama_context(
cparams.yarn_attn_factor = params.yarn_attn_factor >= 0.0f ? params.yarn_attn_factor : hparams.yarn_attn_factor;
cparams.yarn_beta_fast = params.yarn_beta_fast >= 0.0f ? params.yarn_beta_fast : hparams.yarn_beta_fast;
cparams.yarn_beta_slow = params.yarn_beta_slow >= 0.0f ? params.yarn_beta_slow : hparams.yarn_beta_slow;
cparams.embeddings = params.embeddings;
cparams.embeddings_pre_norm = false;
cparams.embeddings = params.embeddings;
cparams.embeddings_pre_norm = false;
cparams.embeddings_pre_norm_masked = false;
cparams.offload_kqv = params.offload_kqv;
cparams.no_perf = params.no_perf;
cparams.pooling_type = params.pooling_type;
Expand Down
1 change: 1 addition & 0 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3885,6 +3885,7 @@ void server_routes::init_routes() {
{ "eos_token", meta->eos_token_str },
{ "build_info", meta->build_info },
{ "is_sleeping", queue_tasks.is_sleeping() },
{ "cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy },
};
if (params.use_jinja) {
if (!tmpl_tools.empty()) {
Expand Down
1 change: 1 addition & 0 deletions tools/server/server-models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,7 @@ void server_models_routes::init_routes() {
// Deprecated: use ui_settings instead (kept for backward compat)
{"webui_settings", webui_settings},
{"build_info", std::string(llama_build_info())},
{"cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy},
});
return res;
}
Expand Down
2 changes: 2 additions & 0 deletions tools/ui/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
VITE_PUBLIC_APP_NAME='llama-ui'
# VITE_DEBUG='true'
Loading
Loading