Skip to content

Commit 9125bb4

Browse files
authored
Merge branch 'ggml-org:master' into dflash-rebase
2 parents 486081d + 67e9fd3 commit 9125bb4

41 files changed

Lines changed: 403 additions & 239 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/s390x.Dockerfile

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,6 @@ ARG BUILD_DATE=N/A
44
ARG APP_VERSION=N/A
55
ARG APP_REVISION=N/A
66

7-
ARG NODE_VERSION=24
8-
9-
FROM docker.io/node:$NODE_VERSION AS web
10-
11-
ARG APP_VERSION
12-
13-
WORKDIR /app/tools/ui
14-
15-
COPY tools/ui/package.json tools/ui/package-lock.json ./
16-
RUN npm ci
17-
18-
COPY tools/ui/ ./
19-
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
20-
217
### Build Llama.cpp stage
228
FROM docker.io/gcc:${GCC_VERSION} AS build
239

@@ -34,8 +20,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
3420
WORKDIR /app
3521
COPY . .
3622

37-
COPY --from=web /app/tools/ui/dist tools/ui/dist
38-
3923
RUN --mount=type=cache,target=/root/.ccache \
4024
--mount=type=cache,target=/app/build \
4125
cmake -S . -B build -G Ninja \

.dockerignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
build*/
1212

1313
tools/ui/node_modules/
14-
tools/ui/dist/
1514

1615
models/*
1716

.github/workflows/docker.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ jobs:
5858
git tag ${{ steps.srctag.outputs.name }} || exit 0
5959
git push origin ${{ steps.srctag.outputs.name }} || exit 0
6060
61+
build_ui:
62+
name: Build UI
63+
needs: create_tag
64+
uses: ./.github/workflows/ui-build.yml
65+
with:
66+
hf_ui_version: ${{ needs.create_tag.outputs.source_tag }}
67+
6168
prepare_matrices:
6269
name: Prepare Docker matrices
6370
runs-on: ubuntu-24.04
@@ -79,7 +86,7 @@ jobs:
7986
[
8087
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
8188
{ "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
82-
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
89+
{ "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x", "prebuilt_ui": true },
8390
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
8491
{ "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
8592
{ "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.3.0", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
@@ -135,7 +142,7 @@ jobs:
135142
136143
push_to_registry:
137144
name: Push Docker image to Docker Registry
138-
needs: [prepare_matrices, create_tag]
145+
needs: [prepare_matrices, create_tag, build_ui]
139146

140147
runs-on: ${{ matrix.config.runs_on }}
141148
strategy:
@@ -150,6 +157,13 @@ jobs:
150157
fetch-depth: 0
151158
ref: ${{ needs.create_tag.outputs.source_tag }}
152159

160+
- name: Download prebuilt UI
161+
if: ${{ matrix.config.prebuilt_ui == true }}
162+
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
163+
with:
164+
name: ui-build
165+
path: tools/ui/dist
166+
153167
- name: Set up QEMU
154168
if: ${{ contains(matrix.config.platforms, 'linux/amd64') }}
155169
uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4

common/arg.cpp

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# define NOMINMAX
1818
#endif
1919
#include <windows.h>
20+
#include <shellapi.h>
2021
#endif
2122

2223
#define JSON_ASSERT GGML_ASSERT
@@ -302,7 +303,6 @@ static handle_model_result common_params_handle_model(struct common_params_model
302303

303304
if (!model.docker_repo.empty()) {
304305
model.path = common_docker_resolve_model(model.docker_repo);
305-
model.name = model.docker_repo;
306306
} else if (!model.hf_repo.empty()) {
307307
// If -m was used with -hf, treat the model "path" as the hf_file to download
308308
if (model.hf_file.empty() && !model.path.empty()) {
@@ -322,7 +322,6 @@ static handle_model_result common_params_handle_model(struct common_params_model
322322
throw std::runtime_error("failed to download model from Hugging Face");
323323
}
324324

325-
model.name = model.hf_repo;
326325
model.path = download_result.model_path;
327326

328327
if (!download_result.mmproj_path.empty()) {
@@ -893,7 +892,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
893892
return true;
894893
}
895894

895+
#ifdef _WIN32
896+
struct utf8_argv {
897+
std::vector<std::string> buf;
898+
std::vector<char*> ptrs;
899+
};
900+
901+
static utf8_argv make_utf8_argv() {
902+
utf8_argv out;
903+
int wargc = 0;
904+
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
905+
if (!wargv) return out;
906+
907+
out.buf.reserve(wargc);
908+
for (int i = 0; i < wargc; ++i) {
909+
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
910+
if (n <= 0) { out.buf.emplace_back(); continue; }
911+
auto& s = out.buf.emplace_back();
912+
s.resize(static_cast<size_t>(n - 1));
913+
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
914+
}
915+
LocalFree(wargv);
916+
917+
out.ptrs.reserve(out.buf.size() + 1);
918+
for (auto& s : out.buf) out.ptrs.push_back(s.data());
919+
out.ptrs.push_back(nullptr);
920+
return out;
921+
}
922+
#endif
923+
896924
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
925+
#ifdef _WIN32
926+
auto utf8 = make_utf8_argv();
927+
if (!utf8.ptrs.empty()) {
928+
argc = static_cast<int>(utf8.buf.size());
929+
argv = utf8.ptrs.data();
930+
}
931+
#endif
932+
897933
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
898934
const common_params params_org = ctx_arg.params; // the example can modify the default params
899935

common/common.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
10741074
return files;
10751075
}
10761076

1077+
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
1078+
#ifdef _WIN32
1079+
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
1080+
if (!wlen) { return std::ifstream(); }
1081+
std::vector<wchar_t> wfname(wlen);
1082+
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
1083+
return std::ifstream(wfname.data(), mode);
1084+
#else
1085+
return std::ifstream(fname, mode);
1086+
#endif
1087+
}
1088+
10771089
//
10781090
// TTY utils
10791091
//

common/common.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,16 @@ struct common_params_model {
296296
std::string hf_repo = ""; // HF repo // NOLINT
297297
std::string hf_file = ""; // HF file // NOLINT
298298
std::string docker_repo = ""; // Docker repo // NOLINT
299-
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
299+
300+
std::string get_name() {
301+
if (!hf_repo.empty()) {
302+
return hf_repo;
303+
}
304+
if (!docker_repo.empty()) {
305+
return docker_repo;
306+
}
307+
return path;
308+
}
300309
};
301310

302311
// draft-model-based speculative decoding parameters
@@ -843,6 +852,9 @@ struct common_file_info {
843852
};
844853
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
845854

855+
// fs open, also handle UTF8 on Windows
856+
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
857+
846858
//
847859
// TTY utils
848860
//

conversion/bailingmoe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def set_gguf_parameters(self):
126126
if (rope_dim := hparams.get("head_dim")) is None:
127127
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
128128

129-
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
129+
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.rope_parameters.get("partial_rotary_factor", 0.5)))
130130
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
131131
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
132132
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])

conversion/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1119,15 +1119,21 @@ def __init__(self, *args, **kwargs):
11191119

11201120
rope_theta = self.find_hparam(["global_rope_theta", "rope_global_theta", "rope_theta_global", "rope_theta", "rotary_emb_base"], optional=True)
11211121
local_rope_theta = self.find_hparam(["local_rope_theta", "rope_local_theta", "rope_theta_local", "swa_rope_theta", "rope_local_base_freq"], optional=True)
1122+
partial_rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct", "rope_percent"], optional=True)
1123+
original_max_position_embeddings = self.find_hparam(["original_max_position_embeddings"], optional=True)
11221124

1123-
# Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
1125+
# Ensure global params are mirrored in rope_parameters
11241126
if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
11251127
if local_rope_theta is not None:
11261128
self.rope_parameters["sliding_attention"] = {"rope_theta": local_rope_theta}
11271129
if "rope_theta" not in self.rope_parameters and rope_theta is not None:
11281130
self.rope_parameters["rope_theta"] = rope_theta
11291131
if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
11301132
self.rope_parameters["rope_type"] = rope_type
1133+
if "partial_rotary_factor" not in self.rope_parameters and partial_rotary_factor is not None:
1134+
self.rope_parameters["partial_rotary_factor"] = partial_rotary_factor
1135+
if "original_max_position_embeddings" not in self.rope_parameters and original_max_position_embeddings is not None:
1136+
self.rope_parameters["original_max_position_embeddings"] = original_max_position_embeddings
11311137

11321138
@classmethod
11331139
def __init_subclass__(cls):

conversion/chatglm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def set_gguf_parameters(self):
148148
rope_dim = self.hparams["attention_dim"]
149149
else:
150150
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
151-
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
151+
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.rope_parameters.get("partial_rotary_factor", 0.5)))
152152
self.gguf_writer.add_add_bos_token(False)
153153
rope_freq = 10000
154154
if "rope_ratio" in self.hparams:

conversion/deci.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
161161
factor = rope_params.get("factor", 8.0)
162162
low_freq_factor = rope_params.get("low_freq_factor", 1.0)
163163
high_freq_factor = rope_params.get("high_freq_factor", 4.0)
164-
old_context_len = self.hparams.get("original_max_position_embeddings", 8192)
164+
old_context_len = rope_params.get("original_max_position_embeddings", 8192)
165165

166166
low_freq_wavelen = old_context_len / low_freq_factor
167167
high_freq_wavelen = old_context_len / high_freq_factor

0 commit comments

Comments
 (0)