Skip to content

Commit 45b455e

Browse files
authored
common : remove hf cache migration (ggml-org#23266)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
1 parent 3a9c1b8 commit 45b455e

3 files changed

Lines changed: 0 additions & 285 deletions

File tree

common/arg.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include "chat.h"
55
#include "common.h"
66
#include "download.h"
7-
#include "hf-cache.h"
87
#include "json-schema-to-grammar.h"
98
#include "log.h"
109
#include "sampling.h"
@@ -586,12 +585,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
586585
// parse the first time to get -hf option (used for remote preset)
587586
parse_cli_args();
588587

589-
// TODO: Remove later
590-
try {
591-
hf_cache::migrate_old_cache_to_hf_cache(params.hf_token, params.offline);
592-
} catch (const std::exception & e) {
593-
LOG_WRN("HF cache migration failed: %s\n", e.what());
594-
}
595588
// export_graph_ops loads only metadata
596589
const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
597590

common/hf-cache.cpp

Lines changed: 0 additions & 274 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include <filesystem>
1212
#include <fstream>
1313
#include <atomic>
14-
#include <regex> // migration only
1514
#include <string>
1615
#include <string_view>
1716
#include <stdexcept>
@@ -336,15 +335,9 @@ hf_files get_repo_files(const std::string & repo_id,
336335
if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) {
337336
file.oid = item["lfs"]["oid"].get<std::string>();
338337
}
339-
if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) {
340-
file.size = item["lfs"]["size"].get<size_t>();
341-
}
342338
} else if (item.contains("oid") && item["oid"].is_string()) {
343339
file.oid = item["oid"].get<std::string>();
344340
}
345-
if (file.size == 0 && item.contains("size") && item["size"].is_number()) {
346-
file.size = item["size"].get<size_t>();
347-
}
348341

349342
if (!file.oid.empty() && !is_valid_oid(file.oid)) {
350343
LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str());
@@ -502,271 +495,4 @@ std::string finalize_file(const hf_file & file) {
502495
return file.final_path;
503496
}
504497

505-
// delete everything after this line, one day
506-
507-
// copied from download.cpp without the tag part
508-
struct gguf_split_info {
509-
std::string prefix; // tag included
510-
int index;
511-
int count;
512-
};
513-
514-
static gguf_split_info get_gguf_split_info(const std::string & path) {
515-
static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase);
516-
std::smatch m;
517-
518-
std::string prefix = path;
519-
if (!string_remove_suffix(prefix, ".gguf")) {
520-
return {};
521-
}
522-
523-
int index = 1;
524-
int count = 1;
525-
526-
if (std::regex_match(prefix, m, re_split)) {
527-
index = std::stoi(m[2].str());
528-
count = std::stoi(m[3].str());
529-
prefix = m[1].str();
530-
}
531-
532-
return {std::move(prefix), index, count};
533-
}
534-
535-
static std::pair<std::string, std::string> parse_manifest_name(std::string & filename) {
536-
static const std::regex re(R"(^manifest=([^=]+)=([^=]+)=.*\.json$)");
537-
std::smatch match;
538-
if (std::regex_match(filename, match, re)) {
539-
return {match[1].str(), match[2].str()};
540-
}
541-
return {};
542-
}
543-
544-
static std::string make_old_cache_filename(const std::string & owner,
545-
const std::string & repo,
546-
const std::string & filename) {
547-
auto result = owner + "_" + repo + "_" + filename;
548-
string_replace_all(result, "/", "_");
549-
return result;
550-
}
551-
552-
struct migrate_file {
553-
std::string path;
554-
std::string sha256;
555-
size_t size;
556-
fs::path old_path;
557-
fs::path etag_path;
558-
const hf_file * file;
559-
};
560-
561-
using migrate_files = std::vector<migrate_file>;
562-
563-
static bool collect_file(const fs::path & old_cache,
564-
const std::string & owner,
565-
const std::string & repo,
566-
const std::string & path,
567-
const std::string & sha256,
568-
const hf_files & files,
569-
migrate_files & to_migrate) {
570-
571-
const hf_file * file = nullptr;
572-
573-
for (const auto & f : files) {
574-
if (f.path == path) {
575-
file = &f;
576-
break;
577-
}
578-
}
579-
580-
std::string old_filename = make_old_cache_filename(owner, repo, path);
581-
fs::path old_path = old_cache / old_filename;
582-
fs::path etag_path = old_path.string() + ".etag";
583-
584-
if (!fs::exists(old_path)) {
585-
if (file && fs::exists(file->final_path)) {
586-
return true;
587-
}
588-
LOG_WRN("%s: %s not found in old cache or HF cache\n", __func__, old_filename.c_str());
589-
return false;
590-
}
591-
592-
if (!file) {
593-
LOG_WRN("%s: %s not found in current repo\n", __func__, old_filename.c_str());
594-
return false;
595-
}
596-
597-
if (!sha256.empty() && !file->oid.empty() && sha256 != file->oid) {
598-
LOG_WRN("%s: %s is not up to date (sha256 mismatch)\n", __func__, old_filename.c_str());
599-
return false;
600-
}
601-
602-
if (file->size > 0) {
603-
size_t size = fs::file_size(old_path);
604-
if (size != file->size) {
605-
LOG_WRN("%s: %s has wrong size %zu (expected %zu)\n", __func__, old_filename.c_str(), size, file->size);
606-
return false;
607-
}
608-
}
609-
610-
to_migrate.push_back({path, sha256, file->size, old_path, etag_path, file});
611-
return true;
612-
}
613-
614-
static bool collect_files(const fs::path & old_cache,
615-
const std::string & owner,
616-
const std::string & repo,
617-
const nl::json & node,
618-
const hf_files & files,
619-
migrate_files & to_migrate) {
620-
621-
if (!node.contains("rfilename") ||
622-
!node.contains("lfs") ||
623-
!node["lfs"].contains("sha256")) {
624-
return true;
625-
}
626-
627-
std::string path = node["rfilename"];
628-
std::string sha256 = node["lfs"]["sha256"];
629-
630-
auto split = get_gguf_split_info(path);
631-
632-
if (split.count <= 1) {
633-
return collect_file(old_cache, owner, repo, path, sha256, files, to_migrate);
634-
}
635-
636-
std::vector<std::pair<std::string, std::string>> splits;
637-
638-
for (const auto & f : files) {
639-
auto split_f = get_gguf_split_info(f.path);
640-
if (split_f.count == split.count && split_f.prefix == split.prefix) {
641-
// sadly the manifest only provides the sha256 of the first file (index == 1)
642-
// the rest will be verified using the size...
643-
std::string f_sha256 = (split_f.index == 1) ? sha256 : "";
644-
splits.emplace_back(f.path, f_sha256);
645-
}
646-
}
647-
648-
if ((int)splits.size() != split.count) {
649-
LOG_WRN("%s: expected %d split files but found %d in repo\n", __func__, split.count, (int)splits.size());
650-
return false;
651-
}
652-
653-
for (const auto & [f_path, f_sha256] : splits) {
654-
if (!collect_file(old_cache, owner, repo, f_path, f_sha256, files, to_migrate)) {
655-
return false;
656-
}
657-
}
658-
659-
return true;
660-
}
661-
662-
static bool migrate_file(const migrate_file & file) {
663-
std::error_code ec;
664-
665-
fs::path new_path(file.file->local_path);
666-
fs::create_directories(new_path.parent_path(), ec);
667-
668-
if (!fs::exists(new_path, ec)) {
669-
fs::rename(file.old_path, new_path, ec);
670-
if (ec) {
671-
fs::copy_file(file.old_path, new_path, ec);
672-
if (ec) {
673-
LOG_ERR("%s: failed to move/copy %s: %s\n", __func__, file.old_path.string().c_str(), ec.message().c_str());
674-
return false;
675-
}
676-
}
677-
fs::remove(file.old_path, ec);
678-
}
679-
fs::remove(file.etag_path, ec);
680-
681-
std::string filename = finalize_file(*file.file);
682-
LOG_INF("%s: migrated %s -> %s\n", __func__, file.old_path.filename().string().c_str(), filename.c_str());
683-
return true;
684-
}
685-
686-
void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) {
687-
fs::path old_cache = fs_get_cache_directory();
688-
if (!fs::exists(old_cache)) {
689-
return;
690-
}
691-
692-
if (offline) {
693-
LOG_WRN("%s: skipping migration in offline mode (will run when online)\n", __func__);
694-
return; // -hf is not going to work
695-
}
696-
697-
bool warned = false;
698-
699-
for (const auto & entry : fs::directory_iterator(old_cache)) {
700-
if (!entry.is_regular_file()) {
701-
continue;
702-
}
703-
auto filename = entry.path().filename().string();
704-
auto [owner, repo] = parse_manifest_name(filename);
705-
706-
if (owner.empty() || repo.empty()) {
707-
continue;
708-
}
709-
710-
if (!warned) {
711-
warned = true;
712-
LOG_WRN("================================================================================\n"
713-
"WARNING: Migrating cache to HuggingFace cache directory\n"
714-
" Old cache: %s\n"
715-
" New cache: %s\n"
716-
"This one-time migration moves models previously downloaded with -hf\n"
717-
"from the legacy llama.cpp cache to the standard HuggingFace cache.\n"
718-
"Models downloaded with --model-url are not affected.\n"
719-
"================================================================================\n",
720-
old_cache.string().c_str(), get_cache_directory().string().c_str());
721-
}
722-
723-
auto repo_id = owner + "/" + repo;
724-
auto files = get_repo_files(repo_id, token);
725-
726-
if (files.empty()) {
727-
LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str());
728-
continue;
729-
}
730-
731-
migrate_files to_migrate;
732-
bool ok = true;
733-
734-
try {
735-
std::ifstream manifest(entry.path());
736-
auto json = nl::json::parse(manifest);
737-
for (const char * key : {"ggufFile", "mmprojFile"}) {
738-
if (json.contains(key)) {
739-
if (!collect_files(old_cache, owner, repo, json[key], files, to_migrate)) {
740-
ok = false;
741-
break;
742-
}
743-
}
744-
}
745-
} catch (const std::exception & e) {
746-
LOG_WRN("%s: failed to parse manifest %s: %s\n", __func__, filename.c_str(), e.what());
747-
continue;
748-
}
749-
750-
if (!ok) {
751-
LOG_WRN("%s: migration skipped: one or more files failed validation\n", __func__);
752-
continue;
753-
}
754-
755-
for (const auto & file : to_migrate) {
756-
if (!migrate_file(file)) {
757-
ok = false;
758-
break;
759-
}
760-
}
761-
762-
if (!ok) {
763-
LOG_WRN("%s: migration failed: could not migrate all files\n", __func__);
764-
continue;
765-
}
766-
767-
LOG_INF("%s: migration complete, deleting manifest: %s\n", __func__, entry.path().string().c_str());
768-
fs::remove(entry.path());
769-
}
770-
}
771-
772498
} // namespace hf_cache

common/hf-cache.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ struct hf_file {
1414
std::string final_path;
1515
std::string oid;
1616
std::string repo_id;
17-
size_t size = 0; // only for the migration
1817
};
1918

2019
using hf_files = std::vector<hf_file>;
@@ -30,7 +29,4 @@ hf_files get_cached_files(const std::string & repo_id = {});
3029
// Create snapshot path (link or move/copy) and return it
3130
std::string finalize_file(const hf_file & file);
3231

33-
// TODO: Remove later
34-
void migrate_old_cache_to_hf_cache(const std::string & token, bool offline = false);
35-
3632
} // namespace hf_cache

0 commit comments

Comments
 (0)