|
4 | 4 | #include "gguf-model-data.h" |
5 | 5 |
|
6 | 6 | #include "common.h" |
| 7 | +#include "ggml-cpp.h" |
7 | 8 | #include "gguf.h" |
8 | 9 |
|
9 | 10 | #include <algorithm> |
@@ -531,14 +532,18 @@ static std::optional<gguf_remote_model> fetch_and_parse( |
531 | 532 | return std::nullopt; |
532 | 533 | } |
533 | 534 |
|
| 535 | +static std::string get_cache_file_path(const std::string& cdir, const std::string& repo_part, const std::string& filename) { |
| 536 | + std::string fname_part = sanitize_for_path(filename); |
| 537 | + return cdir + "/" + repo_part + "--" + fname_part + ".partial"; |
| 538 | +} |
| 539 | + |
534 | 540 | // Try cache first, then fetch and parse a single GGUF shard. |
535 | 541 | static std::optional<gguf_remote_model> fetch_or_cached( |
536 | 542 | const std::string & repo, |
537 | 543 | const std::string & filename, |
538 | 544 | const std::string & cdir, |
539 | 545 | const std::string & repo_part) { |
540 | | - std::string fname_part = sanitize_for_path(filename); |
541 | | - std::string cache_path = cdir + "/" + repo_part + "--" + fname_part + ".partial"; |
| 546 | + std::string cache_path = get_cache_file_path(cdir, repo_part, filename); |
542 | 547 |
|
543 | 548 | { |
544 | 549 | std::vector<char> cached; |
@@ -611,3 +616,84 @@ std::optional<gguf_remote_model> gguf_fetch_model_meta( |
611 | 616 |
|
612 | 617 | return model_opt; |
613 | 618 | } |
| 619 | + |
| 620 | +gguf_context_ptr gguf_fetch_gguf_ctx( |
| 621 | + const std::string & repo, |
| 622 | + const std::string & quant, |
| 623 | + const std::string & cache_dir) { |
| 624 | + std::string cdir = cache_dir.empty() ? get_default_cache_dir() : cache_dir; |
| 625 | + std::string repo_part = sanitize_for_path(repo); |
| 626 | + |
| 627 | + std::string split_prefix; |
| 628 | + std::string filename = detect_gguf_filename(repo, quant, split_prefix); |
| 629 | + |
| 630 | + if (filename.empty()) { |
| 631 | + return nullptr; |
| 632 | + } |
| 633 | + |
| 634 | + auto model_opt = fetch_or_cached(repo, filename, cdir, repo_part); |
| 635 | + if (!model_opt.has_value()) { |
| 636 | + fprintf(stderr, "gguf_fetch: failed to fetch %s\n", filename.c_str()); |
| 637 | + return nullptr; |
| 638 | + } |
| 639 | + |
| 640 | + auto & model = model_opt.value(); |
| 641 | + |
| 642 | + const std::string cache_path = get_cache_file_path(cdir, repo_part, filename); |
| 643 | + |
| 644 | + ggml_context_ptr ggml_ctx_ptr; |
| 645 | + ggml_context * ggml_ctx{}; |
| 646 | + gguf_init_params params{true, &ggml_ctx}; |
| 647 | + gguf_context_ptr ctx{gguf_init_from_file(cache_path.c_str(), params)}; |
| 648 | + ggml_ctx_ptr.reset(ggml_ctx); |
| 649 | + |
| 650 | + if (ctx == nullptr) { |
| 651 | + fprintf(stderr, "gguf_fetch: gguf_init_from_file failed\n"); |
| 652 | + return nullptr; |
| 653 | + } |
| 654 | + |
| 655 | + // If the model is split across multiple files we need to fetch the remaining shards metadata |
| 656 | + if (model.n_split > 1) { |
| 657 | + if (split_prefix.empty()) { |
| 658 | + fprintf(stderr, "gguf_fetch: model reports %u splits but filename has no split pattern\n", model.n_split); |
| 659 | + return nullptr; |
| 660 | + } |
| 661 | + |
| 662 | + fprintf(stderr, "gguf_fetch: split model with %u shards, fetching remaining %u...\n", |
| 663 | + model.n_split, model.n_split - 1); |
| 664 | + |
| 665 | + for (int i = 2; i <= model.n_split; i++) { |
| 666 | + char num_buf[6], total_buf[6]; |
| 667 | + snprintf(num_buf, sizeof(num_buf), "%05d", i); |
| 668 | + snprintf(total_buf, sizeof(total_buf), "%05d", (int)model.n_split); |
| 669 | + std::string shard_name = split_prefix + "-" + num_buf + "-of-" + total_buf + ".gguf"; |
| 670 | + |
| 671 | + auto shard = fetch_or_cached(repo, shard_name, cdir, repo_part); |
| 672 | + if (!shard.has_value()) { |
| 673 | + fprintf(stderr, "gguf_fetch: failed to fetch shard %d: %s\n", i, shard_name.c_str()); |
| 674 | + return nullptr; |
| 675 | + } |
| 676 | + |
| 677 | + // Load tensors from shard and add to main gguf_context |
| 678 | + const std::string shard_path = get_cache_file_path(cdir, repo_part, shard_name); |
| 679 | + ggml_context_ptr shard_ggml_ctx_ptr; |
| 680 | + ggml_context * shard_ggml_ctx{}; |
| 681 | + gguf_init_params shard_params{true, &shard_ggml_ctx}; |
| 682 | + gguf_context_ptr shard_ctx{gguf_init_from_file(shard_path.c_str(), shard_params)}; |
| 683 | + shard_ggml_ctx_ptr.reset(shard_ggml_ctx); |
| 684 | + |
| 685 | + if (shard_ctx == nullptr) { |
| 686 | + fprintf(stderr, "gguf_fetch: shard gguf_init_from_file failed\n"); |
| 687 | + return nullptr; |
| 688 | + } |
| 689 | + |
| 690 | + for (ggml_tensor * t = ggml_get_first_tensor(shard_ggml_ctx); t; t = ggml_get_next_tensor(shard_ggml_ctx, t)) { |
| 691 | + gguf_add_tensor(ctx.get(), t); |
| 692 | + } |
| 693 | + } |
| 694 | + |
| 695 | + gguf_set_val_u16(ctx.get(), "split.count", 1); |
| 696 | + } |
| 697 | + |
| 698 | + return ctx; |
| 699 | +} |
0 commit comments