diff --git a/.github/workflows/ui-ci.yml b/.github/workflows/ui-ci.yml index 43d6e125687..7f6f467ddaa 100644 --- a/.github/workflows/ui-ci.yml +++ b/.github/workflows/ui-ci.yml @@ -41,7 +41,7 @@ jobs: ui-checks: name: UI Checks needs: ui-build - runs-on: ubuntu-24.04-arm + runs-on: ubuntu-slim continue-on-error: true steps: - name: Checkout code @@ -93,7 +93,7 @@ jobs: e2e-tests: name: E2E Tests needs: ui-build - runs-on: ubuntu-24.04-arm + runs-on: ubuntu-slim steps: - name: Checkout code uses: actions/checkout@v6 diff --git a/CODEOWNERS b/CODEOWNERS index f58f0f830fa..f92fe98ce8c 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -26,6 +26,7 @@ /common/fit.* @JohannesGaessler /common/jinja/ @CISC /common/ngram-map.* @srogmann +/conversion/ @CISC /convert_*.py @CISC /docs/backend/snapdragon/ @ggml-org/ggml-hexagon /examples/batched.swift/ @ggerganov diff --git a/common/arg.cpp b/common/arg.cpp index d7a935fc179..ab23b77e021 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -4,7 +4,6 @@ #include "chat.h" #include "common.h" #include "download.h" -#include "hf-cache.h" #include "json-schema-to-grammar.h" #include "log.h" #include "sampling.h" @@ -586,12 +585,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // parse the first time to get -hf option (used for remote preset) parse_cli_args(); - // TODO: Remove later - try { - hf_cache::migrate_old_cache_to_hf_cache(params.hf_token, params.offline); - } catch (const std::exception & e) { - LOG_WRN("HF cache migration failed: %s\n", e.what()); - } // export_graph_ops loads only metadata const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS; diff --git a/common/hf-cache.cpp b/common/hf-cache.cpp index 20f33e4c7f4..ba7417a12bb 100644 --- a/common/hf-cache.cpp +++ b/common/hf-cache.cpp @@ -11,7 +11,6 @@ #include #include #include -#include // migration only #include #include #include @@ -336,15 +335,9 @@ hf_files get_repo_files(const std::string & repo_id, if (item["lfs"].contains("oid") && item["lfs"]["oid"].is_string()) { file.oid = item["lfs"]["oid"].get(); } - if (item["lfs"].contains("size") && item["lfs"]["size"].is_number()) { - file.size = item["lfs"]["size"].get(); - } } else if (item.contains("oid") && item["oid"].is_string()) { file.oid = item["oid"].get(); } - if (file.size == 0 && item.contains("size") && item["size"].is_number()) { - file.size = item["size"].get(); - } if (!file.oid.empty() && !is_valid_oid(file.oid)) { LOG_WRN("%s: skip invalid oid: %s\n", __func__, file.oid.c_str()); @@ -502,271 +495,4 @@ std::string finalize_file(const hf_file & file) { return file.final_path; } -// delete everything after this line, one day - -// copied from download.cpp without the tag part -struct gguf_split_info { - std::string prefix; // tag included - int index; - int count; -}; - -static gguf_split_info get_gguf_split_info(const std::string & path) { - static const std::regex re_split("^(.+)-([0-9]{5})-of-([0-9]{5})$", std::regex::icase); - std::smatch m; - - std::string prefix = path; - if (!string_remove_suffix(prefix, ".gguf")) { - return {}; - } - - int index = 1; - int count = 1; - - if (std::regex_match(prefix, m, re_split)) { - index = std::stoi(m[2].str()); - count = std::stoi(m[3].str()); - prefix = m[1].str(); - } - - return {std::move(prefix), index, count}; -} - -static std::pair parse_manifest_name(std::string & filename) { - static const std::regex re(R"(^manifest=([^=]+)=([^=]+)=.*\.json$)"); - std::smatch match; - if (std::regex_match(filename, match, re)) { - return {match[1].str(), match[2].str()}; - } - return {}; -} - -static std::string make_old_cache_filename(const std::string & owner, - const std::string & repo, - const std::string & filename) { - auto result = owner + "_" + repo + "_" + filename; - string_replace_all(result, "/", "_"); - return result; -} - -struct migrate_file { - std::string path; - std::string sha256; - size_t size; - fs::path old_path; - fs::path etag_path; - const hf_file * file; -}; - -using migrate_files = std::vector; - -static bool collect_file(const fs::path & old_cache, - const std::string & owner, - const std::string & repo, - const std::string & path, - const std::string & sha256, - const hf_files & files, - migrate_files & to_migrate) { - - const hf_file * file = nullptr; - - for (const auto & f : files) { - if (f.path == path) { - file = &f; - break; - } - } - - std::string old_filename = make_old_cache_filename(owner, repo, path); - fs::path old_path = old_cache / old_filename; - fs::path etag_path = old_path.string() + ".etag"; - - if (!fs::exists(old_path)) { - if (file && fs::exists(file->final_path)) { - return true; - } - LOG_WRN("%s: %s not found in old cache or HF cache\n", __func__, old_filename.c_str()); - return false; - } - - if (!file) { - LOG_WRN("%s: %s not found in current repo\n", __func__, old_filename.c_str()); - return false; - } - - if (!sha256.empty() && !file->oid.empty() && sha256 != file->oid) { - LOG_WRN("%s: %s is not up to date (sha256 mismatch)\n", __func__, old_filename.c_str()); - return false; - } - - if (file->size > 0) { - size_t size = fs::file_size(old_path); - if (size != file->size) { - LOG_WRN("%s: %s has wrong size %zu (expected %zu)\n", __func__, old_filename.c_str(), size, file->size); - return false; - } - } - - to_migrate.push_back({path, sha256, file->size, old_path, etag_path, file}); - return true; -} - -static bool collect_files(const fs::path & old_cache, - const std::string & owner, - const std::string & repo, - const nl::json & node, - const hf_files & files, - migrate_files & to_migrate) { - - if (!node.contains("rfilename") || - !node.contains("lfs") || - !node["lfs"].contains("sha256")) { - return true; - } - - std::string path = node["rfilename"]; - std::string sha256 = node["lfs"]["sha256"]; - - auto split = get_gguf_split_info(path); - - if (split.count <= 1) { - return collect_file(old_cache, owner, repo, path, sha256, files, to_migrate); - } - - std::vector> splits; - - for (const auto & f : files) { - auto split_f = get_gguf_split_info(f.path); - if (split_f.count == split.count && split_f.prefix == split.prefix) { - // sadly the manifest only provides the sha256 of the first file (index == 1) - // the rest will be verified using the size... - std::string f_sha256 = (split_f.index == 1) ? sha256 : ""; - splits.emplace_back(f.path, f_sha256); - } - } - - if ((int)splits.size() != split.count) { - LOG_WRN("%s: expected %d split files but found %d in repo\n", __func__, split.count, (int)splits.size()); - return false; - } - - for (const auto & [f_path, f_sha256] : splits) { - if (!collect_file(old_cache, owner, repo, f_path, f_sha256, files, to_migrate)) { - return false; - } - } - - return true; -} - -static bool migrate_file(const migrate_file & file) { - std::error_code ec; - - fs::path new_path(file.file->local_path); - fs::create_directories(new_path.parent_path(), ec); - - if (!fs::exists(new_path, ec)) { - fs::rename(file.old_path, new_path, ec); - if (ec) { - fs::copy_file(file.old_path, new_path, ec); - if (ec) { - LOG_ERR("%s: failed to move/copy %s: %s\n", __func__, file.old_path.string().c_str(), ec.message().c_str()); - return false; - } - } - fs::remove(file.old_path, ec); - } - fs::remove(file.etag_path, ec); - - std::string filename = finalize_file(*file.file); - LOG_INF("%s: migrated %s -> %s\n", __func__, file.old_path.filename().string().c_str(), filename.c_str()); - return true; -} - -void migrate_old_cache_to_hf_cache(const std::string & token, bool offline) { - fs::path old_cache = fs_get_cache_directory(); - if (!fs::exists(old_cache)) { - return; - } - - if (offline) { - LOG_WRN("%s: skipping migration in offline mode (will run when online)\n", __func__); - return; // -hf is not going to work - } - - bool warned = false; - - for (const auto & entry : fs::directory_iterator(old_cache)) { - if (!entry.is_regular_file()) { - continue; - } - auto filename = entry.path().filename().string(); - auto [owner, repo] = parse_manifest_name(filename); - - if (owner.empty() || repo.empty()) { - continue; - } - - if (!warned) { - warned = true; - LOG_WRN("================================================================================\n" - "WARNING: Migrating cache to HuggingFace cache directory\n" - " Old cache: %s\n" - " New cache: %s\n" - "This one-time migration moves models previously downloaded with -hf\n" - "from the legacy llama.cpp cache to the standard HuggingFace cache.\n" - "Models downloaded with --model-url are not affected.\n" - "================================================================================\n", - old_cache.string().c_str(), get_cache_directory().string().c_str()); - } - - auto repo_id = owner + "/" + repo; - auto files = get_repo_files(repo_id, token); - - if (files.empty()) { - LOG_WRN("%s: could not get repo files for %s, skipping\n", __func__, repo_id.c_str()); - continue; - } - - migrate_files to_migrate; - bool ok = true; - - try { - std::ifstream manifest(entry.path()); - auto json = nl::json::parse(manifest); - for (const char * key : {"ggufFile", "mmprojFile"}) { - if (json.contains(key)) { - if (!collect_files(old_cache, owner, repo, json[key], files, to_migrate)) { - ok = false; - break; - } - } - } - } catch (const std::exception & e) { - LOG_WRN("%s: failed to parse manifest %s: %s\n", __func__, filename.c_str(), e.what()); - continue; - } - - if (!ok) { - LOG_WRN("%s: migration skipped: one or more files failed validation\n", __func__); - continue; - } - - for (const auto & file : to_migrate) { - if (!migrate_file(file)) { - ok = false; - break; - } - } - - if (!ok) { - LOG_WRN("%s: migration failed: could not migrate all files\n", __func__); - continue; - } - - LOG_INF("%s: migration complete, deleting manifest: %s\n", __func__, entry.path().string().c_str()); - fs::remove(entry.path()); - } -} - } // namespace hf_cache diff --git a/common/hf-cache.h b/common/hf-cache.h index 9e46f977437..23fa0adb729 100644 --- a/common/hf-cache.h +++ b/common/hf-cache.h @@ -14,7 +14,6 @@ struct hf_file { std::string final_path; std::string oid; std::string repo_id; - size_t size = 0; // only for the migration }; using hf_files = std::vector; @@ -30,7 +29,4 @@ hf_files get_cached_files(const std::string & repo_id = {}); // Create snapshot path (link or move/copy) and return it std::string finalize_file(const hf_file & file); -// TODO: Remove later -void migrate_old_cache_to_hf_cache(const std::string & token, bool offline = false); - } // namespace hf_cache diff --git a/src/llama-context.cpp b/src/llama-context.cpp index b1b12d017c0..3cc8ffa6668 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -64,8 +64,9 @@ llama_context::llama_context( cparams.yarn_attn_factor = params.yarn_attn_factor >= 0.0f ? params.yarn_attn_factor : hparams.yarn_attn_factor; cparams.yarn_beta_fast = params.yarn_beta_fast >= 0.0f ? params.yarn_beta_fast : hparams.yarn_beta_fast; cparams.yarn_beta_slow = params.yarn_beta_slow >= 0.0f ? params.yarn_beta_slow : hparams.yarn_beta_slow; - cparams.embeddings = params.embeddings; - cparams.embeddings_pre_norm = false; + cparams.embeddings = params.embeddings; + cparams.embeddings_pre_norm = false; + cparams.embeddings_pre_norm_masked = false; cparams.offload_kqv = params.offload_kqv; cparams.no_perf = params.no_perf; cparams.pooling_type = params.pooling_type; diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 0f3fb9efa3c..6b16c6b4962 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3885,6 +3885,7 @@ void server_routes::init_routes() { { "eos_token", meta->eos_token_str }, { "build_info", meta->build_info }, { "is_sleeping", queue_tasks.is_sleeping() }, + { "cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy }, }; if (params.use_jinja) { if (!tmpl_tools.empty()) { diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 6c6fed52d58..ccf42320f77 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -1165,6 +1165,7 @@ void server_models_routes::init_routes() { // Deprecated: use ui_settings instead (kept for backward compat) {"webui_settings", webui_settings}, {"build_info", std::string(llama_build_info())}, + {"cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy}, }); return res; } diff --git a/tools/ui/.env.example b/tools/ui/.env.example new file mode 100644 index 00000000000..9a995b746f6 --- /dev/null +++ b/tools/ui/.env.example @@ -0,0 +1,2 @@ +VITE_PUBLIC_APP_NAME='llama-ui' +# VITE_DEBUG='true' diff --git a/tools/ui/package-lock.json b/tools/ui/package-lock.json index bf23307b82c..3686eb3261e 100644 --- a/tools/ui/package-lock.json +++ b/tools/ui/package-lock.json @@ -6008,9 +6008,9 @@ } }, "node_modules/katex": { - "version": "0.16.22", - "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.22.tgz", - "integrity": "sha512-XCHRdUw4lf3SKBaJe4EvgqIuWwkPSo9XoeO8GjQW94Bp7TWv9hNhzZjZ+OH9yf1UmLygb7DIT5GSFQiyt16zYg==", + "version": "0.16.47", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.47.tgz", + "integrity": "sha512-Eeo8Ys1doU1z+x8AZsPpQu+p/QcZBI5PeOo7QGQdy2x2m0MU/hYagBbGOmXwr5KVbEfVuWv9LpnQWeehogurjg==", "dev": true, "funding": [ "https://opencollective.com/katex", diff --git a/tools/ui/src/app.css b/tools/ui/src/app.css index 6e29b70a35a..d6dc6670c0c 100644 --- a/tools/ui/src/app.css +++ b/tools/ui/src/app.css @@ -1,4 +1,5 @@ @import 'tailwindcss'; +@source "."; @import 'tw-animate-css'; @@ -39,6 +40,9 @@ --sidebar-ring: oklch(0.708 0 0); --code-background: oklch(0.985 0 0); --code-foreground: oklch(0.145 0 0); + --font-mono: + ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, + 'Liberation Mono', Menlo, monospace; --layer-popover: 1000000; --chat-form-area-height: 8rem; @@ -171,6 +175,10 @@ *::-webkit-scrollbar-thumb:hover { background: hsl(var(--muted-foreground) / 0.5); } + + :where(code, pre, kbd, samp) { + font-family: var(--font-mono); + } } @layer utilities { diff --git a/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte b/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte index 2f9471e0ddd..29702060565 100644 --- a/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte +++ b/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionModels.svelte @@ -7,7 +7,6 @@ import { activeMessages } from '$lib/stores/conversations.svelte'; interface Props { - currentModel?: string; disabled?: boolean; forceForegroundText?: boolean; hasAudioModality?: boolean; @@ -20,7 +19,6 @@ } let { - currentModel, disabled = false, forceForegroundText = false, hasAudioModality = $bindable(false), @@ -41,14 +39,28 @@ let lastSyncedConversationModel: string | null = null; + let selectorModel = $derived(conversationModel ?? modelsStore.selectedModelName ?? null); + $effect(() => { if (conversationModel && conversationModel !== lastSyncedConversationModel) { - lastSyncedConversationModel = conversationModel; + if (modelOptions().some((m) => m.model === conversationModel)) { + modelsStore.selectedModelName = conversationModel; + modelsStore.selectModelByName(conversationModel); + } else { + modelsStore.selectedModelName = null; + modelsStore.clearSelection(); + } - modelsStore.selectModelByName(conversationModel); - } else if (isRouter && !modelsStore.selectedModelId && modelsStore.loadedModelIds.length > 0) { + lastSyncedConversationModel = conversationModel; + } else if ( + isRouter && + !modelsStore.selectedModelId && + modelsStore.loadedModelIds.length > 0 && + activeMessages().length > 0 && + !conversationModel + ) { lastSyncedConversationModel = null; - // auto-select the first loaded model only when nothing is selected yet + const first = modelOptions().find((m) => modelsStore.loadedModelIds.includes(m.model)); if (first) modelsStore.selectModelById(first.id); @@ -151,7 +163,7 @@ @@ -159,7 +171,7 @@ diff --git a/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte b/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte index 567fdac4704..ff734ac88fb 100644 --- a/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte +++ b/tools/ui/src/lib/components/app/chat/ChatForm/ChatFormPickers/ChatFormPickerMcpPrompts/ChatFormPickerMcpPrompts.svelte @@ -162,7 +162,7 @@ return; } - if (import.meta.env.DEV) { + if (import.meta.env.DEV && import.meta.env.VITE_DEBUG) { console.log('[ChatFormPickerMcpPrompts] Fetching completions for:', { serverName: selectedPrompt.serverName, promptName: selectedPrompt.name, @@ -181,7 +181,7 @@ value ); - if (import.meta.env.DEV) { + if (import.meta.env.DEV && import.meta.env.VITE_DEBUG) { console.log('[ChatFormPickerMcpPrompts] Autocomplete result:', { argName, value, diff --git a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte index b4d69b932ab..4c74206f1be 100644 --- a/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte +++ b/tools/ui/src/lib/components/app/chat/ChatMessages/ChatMessage/ChatMessageAssistant/ChatMessageAssistant.svelte @@ -379,9 +379,6 @@ border-radius: 1rem; background: hsl(var(--muted) / 0.3); color: var(--foreground); - font-family: - ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, - 'Liberation Mono', Menlo, monospace; font-size: 0.875rem; line-height: 1.6; white-space: pre-wrap; diff --git a/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte index dc3eab134f4..bd93a569ce6 100644 --- a/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte +++ b/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte @@ -8,6 +8,7 @@ ChatMessages, ChatScreenDragOverlay, ChatScreenProcessingInfo, + ChatScreenActionScrollDown, DialogEmptyFileAlert, DialogFileUploadError, DialogChatError, @@ -338,7 +339,9 @@ }); function handleMessagesReady() { - if (!disableAutoScroll && !autoScroll.userScrolledUp) { + if (disableAutoScroll) return; + + if (!autoScroll.userScrolledUp) { requestAnimationFrame(() => { autoScroll.scrollToBottom('instant'); }); @@ -405,7 +408,7 @@
{#if isEmpty}
@@ -419,6 +422,8 @@
{/if} + + {#if page.params.id} {/if} diff --git a/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte b/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte new file mode 100644 index 00000000000..3f3ee867710 --- /dev/null +++ b/tools/ui/src/lib/components/app/chat/ChatScreen/ChatScreenActionScrollDown.svelte @@ -0,0 +1,48 @@ + + +
+ +
diff --git a/tools/ui/src/lib/components/app/chat/index.ts b/tools/ui/src/lib/components/app/chat/index.ts index 5f65979803c..9c7ce864e21 100644 --- a/tools/ui/src/lib/components/app/chat/index.ts +++ b/tools/ui/src/lib/components/app/chat/index.ts @@ -667,3 +667,10 @@ export { default as ChatScreenForm } from './ChatScreen/ChatScreenForm.svelte'; * Only visible when `isCurrentConversationLoading` is true. */ export { default as ChatScreenProcessingInfo } from './ChatScreen/ChatScreenProcessingInfo.svelte'; + +/** + * Scroll-to-bottom action button. Displays a floating button when the user + * has scrolled up more than half a viewport height from the bottom. + * Takes the chat container element as a prop to manage scroll state internally. + */ +export { default as ChatScreenActionScrollDown } from './ChatScreen/ChatScreenActionScrollDown.svelte'; diff --git a/tools/ui/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte b/tools/ui/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte index c1b71e4519e..3a11854b6e4 100644 --- a/tools/ui/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte +++ b/tools/ui/src/lib/components/app/content/MarkdownContent/MarkdownContent.svelte @@ -742,9 +742,6 @@ padding: 0.125rem 0.375rem; border-radius: 0.375rem; font-size: 0.875rem; - font-family: - ui-monospace, SFMono-Regular, 'SF Mono', Monaco, 'Cascadia Code', 'Roboto Mono', Consolas, - 'Liberation Mono', Menlo, monospace; } div :global(pre) { diff --git a/tools/ui/src/lib/components/app/content/SyntaxHighlightedCode.svelte b/tools/ui/src/lib/components/app/content/SyntaxHighlightedCode.svelte index 41d59324cb4..c4d1706bfe1 100644 --- a/tools/ui/src/lib/components/app/content/SyntaxHighlightedCode.svelte +++ b/tools/ui/src/lib/components/app/content/SyntaxHighlightedCode.svelte @@ -80,12 +80,6 @@