janhq
diff --git a/‎common/arg.cpp‎
Lines changed: 10 additions & 4 deletions b/‎common/arg.cpp‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎common/arg.h‎
Lines changed: 5 additions & 1 deletion b/‎common/arg.h‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎docs/android.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/android.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ggml/src/ggml-sycl/binbcast.cpp‎
Lines changed: 5 additions & 0 deletions b/‎ggml/src/ggml-sycl/binbcast.cpp‎
Lines changed: 5 additions & 0 deletions
@@ -396,7 +396,7 @@ static bool parse_bool_value(const std::string & value) {
 // CLI argument parsing functions
 //
 
-bool common_params_handle_models(common_params & params, llama_example curr_ex) {
+bool common_params_handle_models(common_params & params, llama_example curr_ex, common_download_callback * callback) {
     const bool spec_type_draft_mtp = std::find(params.speculative.types.begin(),
                                          params.speculative.types.end(),
                                          COMMON_SPECULATIVE_TYPE_DRAFT_MTP) != params.speculative.types.end();
@@ -408,6 +408,10 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
     opts.download_mtp    = spec_type_draft_mtp;
     opts.download_mmproj = !params.no_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty();
 
+    if (callback) {
+        opts.callback = callback;
+    }
+
     // sub-models (draft, mmproj, vocoder) are explicitly specified by the user,
     // so we should not auto-discover mtp/mmproj siblings for them
     common_download_opts sub_opts = opts;
@@ -584,8 +588,11 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
         throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n");
     }
 
-    // export_graph_ops loads only metadata
-    const bool skip_model_download = ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
+    const bool skip_model_download =
+        // server will call common_params_handle_models() later, so we skip it here
+        ctx_arg.ex == LLAMA_EXAMPLE_SERVER ||
+        // export_graph_ops loads only metadata
+        ctx_arg.ex == LLAMA_EXAMPLE_EXPORT_GRAPH_OPS;
 
     if (!skip_model_download) {
         // handle model and download
@@ -594,7 +601,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
         // model is required (except for server)
         // TODO @ngxson : maybe show a list of available models in CLI in this case
         if (params.model.path.empty()
-                && ctx_arg.ex != LLAMA_EXAMPLE_SERVER
                 && !params.usage
                 && !params.completion) {
             throw std::invalid_argument("error: --model is required\n");
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "common.h"
+#include "download.h"
 
 #include <set>
 #include <map>
@@ -133,7 +134,10 @@ void common_params_add_preset_options(std::vector<common_arg> & args);
 // return true if the model is ready to use
 // throw an exception if there is an error that prevents the model from being used (e.g. network error, model not found, etc)
 // if params.skip_download is true, no downloads will be attempted. return false if the model is invalid or missing (e.g. ETag check failed)
-bool common_params_handle_models(common_params & params, llama_example curr_ex);
+bool common_params_handle_models(
+    common_params & params,
+    llama_example curr_ex,
+    common_download_callback * callback = nullptr);
 
 // initialize argument parser context - used by test-arg-parser and preset
 common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
@@ -29,7 +29,7 @@ With Termux, you can install and run `llama.cpp` as if the environment were Linu
 
 ```
 $ apt update && apt upgrade -y
-$ apt install git cmake
+$ apt install git cmake libandroid-spawn
 ```
 
 Then, follow the [build instructions](https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md), specifically for CMake.
 
@@ -293,6 +293,11 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
              (sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
              ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
              ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
+    } else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_BF16) {
+        op()((const sycl::ext::oneapi::bfloat16 *) src0->data, (const float *) src1->data,
+             (sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
+             ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
+             ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
 #endif
     } else {
         fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type),