Skip to content

Commit 72f26d9

Browse files
Merge pull request #568 from janhq/update-dev-from-master-2026-06-23-01-09
Sync master with upstream release b9763
2 parents 5cbd9ae + dec5ca5 commit 72f26d9

35 files changed

Lines changed: 1009 additions & 301 deletions

common/arg.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ static bool parse_bool_value(const std::string & value) {
396396
// CLI argument parsing functions
397397
//
398398

399-
bool common_params_handle_models(common_params & params, llama_example curr_ex) {
399+
bool common_params_handle_models(common_params & params, llama_example curr_ex, common_download_callback * callback) {
400400
const bool spec_type_draft_mtp = std::find(params.speculative.types.begin(),
401401
params.speculative.types.end(),
402402
COMMON_SPECULATIVE_TYPE_DRAFT_MTP) != params.speculative.types.end();
@@ -408,6 +408,10 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
408408
opts.download_mtp = spec_type_draft_mtp;
409409
opts.download_mmproj = !params.no_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty();
410410

411+
if (callback) {
412+
opts.callback = callback;
413+
}
414+
411415
// sub-models (draft, mmproj, vocoder) are explicitly specified by the user,
412416
// so we should not auto-discover mtp/mmproj siblings for them
413417
common_download_opts sub_opts = opts;
@@ -584,8 +588,11 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
584588
throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
585589
}
586590

587-
// export_graph_ops loads only metadata
588-
const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
591+
const bool skip_model_download =
592+
// server will call common_params_handle_models() later, so we skip it here
593+
ctx_arg.ex == LLAMA_EXAMPLE_SERVER ||
594+
// export_graph_ops loads only metadata
595+
ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
589596

590597
if (!skip_model_download) {
591598
// handle model and download
@@ -594,7 +601,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
594601
// model is required (except for server)
595602
// TODO @ngxson : maybe show a list of available models in CLI in this case
596603
if (params.model.path.empty()
597-
&& ctx_arg.ex != LLAMA_EXAMPLE_SERVER
598604
&& !params.usage
599605
&& !params.completion) {
600606
throw std::invalid_argument("error: --model is required\n");

common/arg.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "common.h"
4+
#include "download.h"
45

56
#include <set>
67
#include <map>
@@ -133,7 +134,10 @@ void common_params_add_preset_options(std::vector<common_arg> & args);
133134
// return true if the model is ready to use
134135
// throw an exception if there is an error that prevents the model from being used (e.g. network error, model not found, etc)
135136
// if params.skip_download is true, no downloads will be attempted. return false if the model is invalid or missing (e.g. ETag check failed)
136-
bool common_params_handle_models(common_params & params, llama_example curr_ex);
137+
bool common_params_handle_models(
138+
common_params & params,
139+
llama_example curr_ex,
140+
common_download_callback * callback = nullptr);
137141

138142
// initialize argument parser context - used by test-arg-parser and preset
139143
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);

docs/android.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ With Termux, you can install and run `llama.cpp` as if the environment were Linu
2929

3030
```
3131
$ apt update && apt upgrade -y
32-
$ apt install git cmake
32+
$ apt install git cmake libandroid-spawn
3333
```
3434

3535
Then, follow the [build instructions](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md), specifically for CMake.

ggml/src/ggml-sycl/binbcast.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
293293
(sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
294294
ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
295295
ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
296+
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_BF16) {
297+
op()((const sycl::ext::oneapi::bfloat16 *) src0->data, (const float *) src1->data,
298+
(sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
299+
ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
300+
ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
296301
#endif
297302
} else {
298303
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type),

0 commit comments

Comments
 (0)