From bbce619adb409880fb6db850a1c5a5f36a4dc7b1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 22 May 2026 11:46:26 +0300 Subject: [PATCH 1/2] cmake : add install() for impl libraries + fix apple builds (#23511) * pi : update * ci : fix ios build * ci : fix andoroid * ci : fix apple builds * cmake : add install() for impl libraries Add install(TARGETS LIBRARY) for all -impl libraries that were changed from STATIC to shared (controlled by BUILD_SHARED_LIBS) in commit bb28c1fe2. Without this, cmake --install fails to copy the shared libraries, causing runtime errors like: llama-server: error while loading shared libraries: libllama-server-impl.so Ref: https://github.com/ggml-org/llama.cpp/issues/23494#issuecomment-4512912515 Assisted-by: llama.cpp:local pi * ci : fix xcframework build --- .github/workflows/build-apple.yml | 5 +++++ .github/workflows/build-cmake-pkg.yml | 11 ++++++++--- .github/workflows/release.yml | 1 + .pi/gg/SYSTEM.md | 5 +++-- build-xcframework.sh | 2 ++ examples/llama.android/lib/build.gradle.kts | 1 + tools/batched-bench/CMakeLists.txt | 4 ++++ tools/cli/CMakeLists.txt | 4 ++++ tools/completion/CMakeLists.txt | 4 ++++ tools/fit-params/CMakeLists.txt | 4 ++++ tools/llama-bench/CMakeLists.txt | 4 ++++ tools/perplexity/CMakeLists.txt | 4 ++++ tools/quantize/CMakeLists.txt | 4 ++++ tools/server/CMakeLists.txt | 4 ++++ 14 files changed, 52 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index b99e614666e1..a5b7ef56e4e0 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -59,6 +59,7 @@ jobs: cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_COMMON=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ @@ -89,6 +90,7 @@ jobs: -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ -DLLAMA_BUILD_TESTS=OFF \ @@ -138,6 +140,7 @@ jobs: -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ -DLLAMA_BUILD_TESTS=OFF \ @@ -163,6 +166,7 @@ jobs: -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ -DLLAMA_BUILD_TESTS=OFF \ @@ -206,6 +210,7 @@ jobs: -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ -DLLAMA_BUILD_TESTS=OFF \ diff --git a/.github/workflows/build-cmake-pkg.yml b/.github/workflows/build-cmake-pkg.yml index 6bbfd9988ba9..b36ac5b8e6d9 100644 --- a/.github/workflows/build-cmake-pkg.yml +++ b/.github/workflows/build-cmake-pkg.yml @@ -19,9 +19,14 @@ jobs: - name: Build run: | PREFIX="$(pwd)"/inst - cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \ - -DLLAMA_OPENSSL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_APP=OFF -DCMAKE_BUILD_TYPE=Release + cmake -S . -B build \ + -DCMAKE_PREFIX_PATH="$PREFIX" \ + -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_APP=OFF \ + -DCMAKE_BUILD_TYPE=Release cmake --build build --config Release cmake --install build --prefix "$PREFIX" --config Release diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1880c155c85b..e7cbac35fc53 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1108,6 +1108,7 @@ jobs: -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_OPENSSL=OFF \ + -DLLAMA_BUILD_APP=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TOOLS=OFF \ -DLLAMA_BUILD_TESTS=OFF \ diff --git a/.pi/gg/SYSTEM.md b/.pi/gg/SYSTEM.md index b7597a4c3aec..06d97ae78ee2 100644 --- a/.pi/gg/SYSTEM.md +++ b/.pi/gg/SYSTEM.md @@ -1,7 +1,7 @@ You are a coding agent. Here are some very important rules that you must follow: General: -- By very precise and concise when writing code, comments, explanations, etc. +- Be very precise and concise when writing code, comments, explanations, etc. - PR and commit titles format: ` : `. Lookup recents for examples - Don't try to build or run the code unless you are explicitly asked to do so - Use the `gh` CLI tool when querying PRs, issues, or other GitHub resources @@ -16,7 +16,8 @@ Pull requests (PRs): - New branch names are prefixed with "gg/" - Before opening a pull request, ask the user to confirm the description - When creating a pull request, look for the repository's PR template and follow it -- For the AI usage disclosure section, write "YES. llama.cpp + pi" +- For the AI usage disclosure section, write "YES. llama.cpp + pi + [MODEL]" +- Ask the user to tell you what model was used and write it in place of [MODEL] - Always create the pull requests in draft mode Commits: diff --git a/build-xcframework.sh b/build-xcframework.sh index c25a1ef28c18..d287d72fbd89 100755 --- a/build-xcframework.sh +++ b/build-xcframework.sh @@ -7,6 +7,7 @@ VISIONOS_MIN_OS_VERSION=1.0 TVOS_MIN_OS_VERSION=16.4 BUILD_SHARED_LIBS=OFF +LLAMA_BUILD_APP=OFF LLAMA_BUILD_EXAMPLES=OFF LLAMA_BUILD_TOOLS=OFF LLAMA_BUILD_TESTS=OFF @@ -31,6 +32,7 @@ COMMON_CMAKE_ARGS=( -DCMAKE_XCODE_ATTRIBUTE_STRIP_INSTALLED_PRODUCT=NO -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS} + -DLLAMA_BUILD_APP=${LLAMA_BUILD_APP} -DLLAMA_BUILD_EXAMPLES=${LLAMA_BUILD_EXAMPLES} -DLLAMA_BUILD_TOOLS=${LLAMA_BUILD_TOOLS} -DLLAMA_BUILD_TESTS=${LLAMA_BUILD_TESTS} diff --git a/examples/llama.android/lib/build.gradle.kts b/examples/llama.android/lib/build.gradle.kts index 9b290d6d4a7e..ae95f41a8382 100644 --- a/examples/llama.android/lib/build.gradle.kts +++ b/examples/llama.android/lib/build.gradle.kts @@ -25,6 +25,7 @@ android { arguments += "-DCMAKE_VERBOSE_MAKEFILE=ON" arguments += "-DBUILD_SHARED_LIBS=ON" + arguments += "-DLLAMA_BUILD_APP=OFF" arguments += "-DLLAMA_BUILD_COMMON=ON" arguments += "-DLLAMA_OPENSSL=OFF" diff --git a/tools/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt index 42b50972f4d1..f6ed257f556c 100644 --- a/tools/batched-bench/CMakeLists.txt +++ b/tools/batched-bench/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-batched-bench executable set(TARGET llama-batched-bench) diff --git a/tools/cli/CMakeLists.txt b/tools/cli/CMakeLists.txt index aa44e586c51e..a3e635719b67 100644 --- a/tools/cli/CMakeLists.txt +++ b/tools/cli/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../server) target_link_libraries(${TARGET} PUBLIC server-context llama-common ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-cli executable set(TARGET llama-cli) diff --git a/tools/completion/CMakeLists.txt b/tools/completion/CMakeLists.txt index a485bf0a3cbd..a310251eff6e 100644 --- a/tools/completion/CMakeLists.txt +++ b/tools/completion/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-completion executable set(TARGET llama-completion) diff --git a/tools/fit-params/CMakeLists.txt b/tools/fit-params/CMakeLists.txt index 799c2d74773c..8acdaef3712a 100644 --- a/tools/fit-params/CMakeLists.txt +++ b/tools/fit-params/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-fit-params executable set(TARGET llama-fit-params) diff --git a/tools/llama-bench/CMakeLists.txt b/tools/llama-bench/CMakeLists.txt index 2b71faa5f5c6..b1c35ee88a5f 100644 --- a/tools/llama-bench/CMakeLists.txt +++ b/tools/llama-bench/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-bench executable set(TARGET llama-bench) diff --git a/tools/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt index b03d61a41157..0eee9acd406e 100644 --- a/tools/perplexity/CMakeLists.txt +++ b/tools/perplexity/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-perplexity executable set(TARGET llama-perplexity) diff --git a/tools/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt index 5ef4e4e8a2ea..eead4c859513 100644 --- a/tools/quantize/CMakeLists.txt +++ b/tools/quantize/CMakeLists.txt @@ -8,6 +8,10 @@ set_target_properties(${TARGET} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-quantize executable set(TARGET llama-quantize) diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt index d87d1a5a5b28..7d427431db93 100644 --- a/tools/server/CMakeLists.txt +++ b/tools/server/CMakeLists.txt @@ -44,6 +44,10 @@ target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(${TARGET} PRIVATE ../mtmd ${CMAKE_SOURCE_DIR}) target_link_libraries(${TARGET} PUBLIC server-context llama-ui cpp-httplib ${CMAKE_THREAD_LIBS_INIT}) +if(LLAMA_TOOLS_INSTALL) + install(TARGETS ${TARGET} LIBRARY) +endif() + # llama-server executable set(TARGET llama-server) From afcda09d154a285cd366135f98ffc1d357f7ddbd Mon Sep 17 00:00:00 2001 From: Kashif Rasul <kashif.rasul@gmail.com> Date: Fri, 22 May 2026 11:17:31 +0200 Subject: [PATCH 2/2] vocab : fix HybridDNA tokenizer (#23466) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * vocab : mark hybriddna k-mers to avoid BPE token collisions * improved loop --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --- conversion/base.py | 5 +++++ src/llama-vocab.cpp | 50 +++++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/conversion/base.py b/conversion/base.py index 8e12af6c5dd5..d8f050ed32d6 100644 --- a/conversion/base.py +++ b/conversion/base.py @@ -1617,6 +1617,11 @@ def _set_vocab_hybriddna(self): assert max(tokenizer.vocab.values()) < vocab_size # ty: ignore[unresolved-attribute] reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()} # ty: ignore[unresolved-attribute] + # k-mers can share text with a base-vocab BPE token (e.g. CCCCCC) and get + # dropped by get_vocab(); a reserved marker suffix (U+E000) keeps each + # k-mer's own id (llama.cpp strips it on detokenization) + for kmer in tokenizer.kmers: # ty: ignore[unresolved-attribute] + reverse_vocab[tokenizer.dna_token_to_id[kmer]] = kmer + "\ue000" # ty: ignore[unresolved-attribute] added_vocab = tokenizer.get_added_vocab() # ty: ignore[unresolved-attribute] added_tokens_decoder = tokenizer.added_tokens_decoder # ty: ignore[unresolved-attribute] diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index acf832d05a62..a5cf148b268f 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1581,6 +1581,11 @@ struct llm_tokenizer_plamo2_session { const llm_tokenizer_plamo2 & tokenizer; }; +// reserved suffix (U+E000) that keeps DNA k-mers distinct from identical +// base-vocab BPE tokens (e.g. CCCCCC) in token_to_id; erased from id_to_token +// text at load +static const std::string dna_kmer_marker = "\xee\x80\x80"; + struct llm_tokenizer_hybriddna_session : llm_tokenizer_bpe_session { llm_tokenizer_hybriddna_session(const llama_vocab & vocab, const llm_tokenizer_bpe & tokenizer) : llm_tokenizer_bpe_session{vocab, tokenizer}, vocab{vocab} {} @@ -1636,34 +1641,22 @@ struct llm_tokenizer_hybriddna_session : llm_tokenizer_bpe_session { c = char(c - 32); } } - auto is_valid_kmer = [](const std::string & s) { - for (char c : s) { - if (c != 'A' && c != 'C' && c != 'G' && c != 'T') { - return false; - } - } - return true; + + // k-mers carry the reserved marker suffix; a non-ACGT k-mer simply + // isn't in the vocab and falls back to <oov> + auto kmer_token = [&](const std::string & kmer) { + const auto tok = vocab.text_to_token(kmer + dna_kmer_marker); + return tok != LLAMA_TOKEN_NULL ? tok : oov_id; }; size_t i = 0; for (; i + k <= seq.size(); i += k) { - const std::string kmer = seq.substr(i, k); - if (is_valid_kmer(kmer)) { - const auto tok = vocab.text_to_token(kmer); - output.push_back(tok != LLAMA_TOKEN_NULL ? tok : oov_id); - } else { - output.push_back(oov_id); - } + output.push_back(kmer_token(seq.substr(i, k))); } if (i < seq.size()) { std::string kmer = seq.substr(i); kmer.append(k - kmer.size(), 'A'); - if (is_valid_kmer(kmer)) { - const auto tok = vocab.text_to_token(kmer); - output.push_back(tok != LLAMA_TOKEN_NULL ? tok : oov_id); - } else { - output.push_back(oov_id); - } + output.push_back(kmer_token(kmer)); } } @@ -2357,6 +2350,23 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } GGML_ASSERT(id_to_token.size() == token_to_id.size()); + // hybriddna: the marker suffix kept k-mer ids distinct in token_to_id; erase + // it from id_to_token so the k-mers detokenize to the bare DNA sequence. The + // k-mers are the block right after <oov>, so only scan from there. + if (tokenizer_model == "hybriddna") { + const auto idx = token_to_id.find("<oov>"); + if (idx != token_to_id.end()) { + auto it = id_to_token.begin() + idx->second + 1; + for (; it != id_to_token.end(); ++it) { + std::string & text = it->text; + if (text.size() > dna_kmer_marker.size() + && text.compare(text.size() - dna_kmer_marker.size(), dna_kmer_marker.size(), dna_kmer_marker) == 0) { + text.erase(text.size() - dna_kmer_marker.size()); + } + } + } + } + init_tokenizer(type); // determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'