Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 2 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,15 @@ option(LLAMA_BUILD_TESTS "llama: build tests"
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
# Deprecated: use LLAMA_BUILD_UI instead (kept for backward compat)
option(LLAMA_BUILD_WEBUI "llama: build the embedded Web UI for server (deprecated: use LLAMA_BUILD_UI)" ON)
option(LLAMA_USE_PREBUILT_WEBUI "llama: use prebuilt WebUI from HF Bucket when available (deprecated: use LLAMA_USE_PREBUILT_UI)" ON)

# New option names
option(LLAMA_BUILD_UI "llama: build the embedded Web UI for server" ON)
option(LLAMA_USE_PREBUILT_UI "llama: use prebuilt UI from HF Bucket when available (requires LLAMA_BUILD_UI=ON)" ON)

# Backward compat: when old var is set but new one isn't, forward the value
if(DEFINED LLAMA_BUILD_WEBUI AND NOT DEFINED LLAMA_BUILD_UI)
if(DEFINED LLAMA_BUILD_WEBUI)
set(LLAMA_BUILD_UI ${LLAMA_BUILD_WEBUI})
message(DEPRECATION "LLAMA_BUILD_WEBUI is deprecated, use LLAMA_BUILD_UI instead")
endif()
if(DEFINED LLAMA_USE_PREBUILT_WEBUI AND NOT DEFINED LLAMA_USE_PREBUILT_UI)
if(DEFINED LLAMA_USE_PREBUILT_WEBUI)
set(LLAMA_USE_PREBUILT_UI ${LLAMA_USE_PREBUILT_WEBUI})
message(DEPRECATION "LLAMA_USE_PREBUILT_WEBUI is deprecated, use LLAMA_USE_PREBUILT_UI instead")
endif()
Expand Down Expand Up @@ -286,18 +281,6 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)

install(
FILES convert_hf_to_gguf.py
PERMISSIONS
OWNER_READ
OWNER_WRITE
OWNER_EXECUTE
GROUP_READ
GROUP_EXECUTE
WORLD_READ
WORLD_EXECUTE
DESTINATION ${CMAKE_INSTALL_BINDIR})

configure_file(cmake/llama.pc.in
"${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
@ONLY)
Expand Down
6 changes: 6 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then
# if on Mac, disable METAL
if [[ "$OSTYPE" == "darwin"* ]]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"

MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION="/usr/local/lib/cmake/vulkan"
MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION="${MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION}/SPIRV-Headers/SPIRV-HeadersConfig.cmake"
if [[ -f "${MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION}" || -h "${MACOS_RUNNER_CUSTOM_SPIRV_HEADERS_LOCATION}" ]]; then
CMAKE_EXTRA="${CMAKE_EXTRA} -DSPIRV-Headers_DIR=${MACOS_RUNNER_CUSTOM_VULKAN_CMAKE_LOCATION}/SPIRV-Headers"
fi
fi

# Build shared libs on Windows
Expand Down
2 changes: 1 addition & 1 deletion cmake/llama-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)

set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
set(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")

find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)

Expand Down
34 changes: 28 additions & 6 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,33 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
const autoparser & autoparser) {
// Create the result structure
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = autoparser.preserved_tokens;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = autoparser.preserved_tokens;

std::string parser_generation_prompt = data.generation_prompt;

if (inputs.continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !inputs.continue_msg.empty()) {
// Build up generation prompt manually
const auto & msg = inputs.continue_msg;

if (!autoparser.reasoning.start.empty()) {
data.generation_prompt = data.generation_prompt.substr(0, data.generation_prompt.find(autoparser.reasoning.start));
data.generation_prompt += autoparser.reasoning.start + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += autoparser.reasoning.end;
}
}

if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += msg.render_content();
}

data.prompt += data.generation_prompt;
}

auto parser = autoparser.build_parser(inputs);
auto parser = autoparser.build_parser(inputs, parser_generation_prompt);
data.parser = parser.save();

// Build grammar if tools are present
Expand Down Expand Up @@ -87,7 +109,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
return data;
}

common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
common_peg_arena autoparser::build_parser(const generation_params & inputs, const std::string & generation_prompt) const {
if (!analysis_complete) {
throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
}
Expand Down Expand Up @@ -121,7 +143,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
} else {
parser = content.build_parser(ctx);
}
return pure_content ? p.prefix(inputs.generation_prompt, reasoning.start) + parser : p.prefix(inputs.generation_prompt, reasoning.start) << parser;
return pure_content ? p.prefix(generation_prompt, reasoning.start) + parser : p.prefix(generation_prompt, reasoning.start) << parser;
});
}

Expand Down
15 changes: 10 additions & 5 deletions common/chat-auto-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,21 @@ struct generation_params {
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
bool stream = true;
std::string grammar;
bool add_generation_prompt = false;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
std::string generation_prompt;
bool add_generation_prompt = false;
common_chat_continuation continue_final_message = COMMON_CHAT_CONTINUATION_NONE;
common_chat_msg continue_msg;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
json extra_context;
bool add_bos = false;
bool add_eos = false;
bool is_inference = true;
bool add_inference = false;
bool mark_input = true; // whether to mark input strings in the jinja context

bool has_continuation() const {
return continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !continue_msg.empty();
}
};

// ============================================================================
Expand Down Expand Up @@ -386,7 +391,7 @@ struct autoparser {
void analyze_template(const common_chat_template & tmpl);

// Build the PEG parser for this template
common_peg_arena build_parser(const generation_params & inputs) const;
common_peg_arena build_parser(const generation_params & inputs, const std::string & generation_prompt) const;

private:
// Collect tokens from entire analysis to preserve
Expand Down
2 changes: 1 addition & 1 deletion common/chat-peg-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,7 @@ common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const s
if (delimiter.empty()) {
return literal(s);
}
return literal(s.substr(0, s.rfind(delimiter)));
return literal(s.substr(0, s.find(delimiter)));
}

common_peg_parser common_chat_peg_builder::optspace(const std::string & tag) {
Expand Down
Loading
Loading