Skip to content

Commit e0ec788

Browse files
Merge remote-tracking branch 'upstream/master'
2 parents 2b4500b + 871b0b7 commit e0ec788

28 files changed

Lines changed: 2839 additions & 100 deletions

.github/ISSUE_TEMPLATE/011-bug-results.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ body:
100100
label: Relevant log output
101101
description: >
102102
Please copy and paste any relevant log output, including the command that you entered and any generated text.
103-
For very long logs (thousands of lines), preferably upload them as files instead.
104-
On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
103+
For very long logs (thousands of lines), please upload them as files instead; the `--log-file` CLI argument can be used for this purpose.
104+
On Linux you can alternatively redirect the console output of any command into a file by appending ` > llama.log 2>&1` to your command.
105105
value: |
106106
<details>
107107
<summary>Logs</summary>

.github/ISSUE_TEMPLATE/019-bug-misc.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ body:
8888
description: >
8989
If applicable, please copy and paste any relevant log output, including any generated text.
9090
If you are encountering problems specifically with the `llama_params_fit` module, always upload `--verbose` logs as well.
91-
For very long logs (thousands of lines), please upload them as files instead.
92-
On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
91+
For very long logs (thousands of lines), please upload them as files instead; the `--log-file` CLI argument can be used for this purpose.
92+
On Linux you can alternatively redirect the console output of any command into a file by appending ` > llama.log 2>&1` to your command.
9393
value: |
9494
<details>
9595
<summary>Logs</summary>

.github/workflows/build-and-test-snapdragon.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
android-ndk-snapdragon:
3232
runs-on: ubuntu-latest
3333
container:
34-
image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
34+
image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.6'
3535
defaults:
3636
run:
3737
shell: bash
@@ -61,7 +61,7 @@ jobs:
6161
linux-iot-snapdragon:
6262
runs-on: ubuntu-latest
6363
container:
64-
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
64+
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.6'
6565
defaults:
6666
run:
6767
shell: bash

common/arg.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3364,7 +3364,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
33643364
" - 1: error\n"
33653365
" - 2: warning\n"
33663366
" - 3: info\n"
3367-
" - 4: debug\n"
3367+
" - 4: trace (more info)\n"
3368+
" - 5: debug\n"
33683369
"(default: %d)\n", params.verbosity),
33693370
[](common_params & params, int value) {
33703371
params.verbosity = value;

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,7 @@ common_init_result::common_init_result(common_params & params, bool model_only)
11731173
params.tensor_buft_overrides.data(),
11741174
params.fit_params_target.data(),
11751175
params.fit_params_min_ctx,
1176-
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
1176+
params.verbosity >= LOG_LEVEL_DEBUG ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
11771177
}
11781178

11791179
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
@@ -1366,7 +1366,7 @@ common_init_result_ptr common_init_from_params(common_params & params, bool mode
13661366
}
13671367

13681368
if (params.warmup) {
1369-
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
1369+
LOG_INF("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
13701370

13711371
llama_set_warmup(lctx, true);
13721372

convert_hf_to_gguf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,15 @@ def parse_args() -> argparse.Namespace:
115115
)
116116
parser.add_argument(
117117
"--mmproj", action="store_true",
118-
help="(Experimental) Export multimodal projector (mmproj) for vision models. This will only work on some vision models. A prefix 'mmproj-' will be added to the output file name.",
118+
help="Export multimodal projector (mmproj) for vision models. This will only work on some vision models. An 'mmproj-' prefix will be added to the output file name.",
119119
)
120120
parser.add_argument(
121121
"--mtp", action="store_true",
122-
help="(Experimental) Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. Output file name will get a '-MTP' suffix.",
122+
help="Export only the multi-token prediction (MTP) head as a separate GGUF, suitable for use as a speculative draft. An 'mtp-' prefix will be added to the output file name.",
123123
)
124124
parser.add_argument(
125125
"--no-mtp", action="store_true",
126-
help="(Experimental) Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, so the bundled default is more space-efficient overall.",
126+
help="Exclude the multi-token prediction (MTP) head from the converted GGUF. Pair with --mtp on a second run to publish trunk and MTP as two files. Note: the split form duplicates embeddings, but even though the bundled default is more space-efficient overall, this allows differing quantization which may be more performant.",
127127
)
128128
parser.add_argument(
129129
"--mistral-format", action="store_true",

docs/backend/snapdragon/CMakeUserPresets.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
"ANDROID_ABI": "arm64-v8a",
1111
"ANDROID_PLATFORM": "android-31",
1212
"CMAKE_TOOLCHAIN_FILE": "$env{ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake",
13-
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
14-
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16 -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
13+
"CMAKE_C_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
14+
"CMAKE_CXX_FLAGS": "-march=armv8.7a+fp16+dotprod+i8mm -fvectorize -ffp-model=fast -fno-finite-math-only -flto -D_GNU_SOURCE",
1515
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
1616
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
1717
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",
@@ -59,8 +59,8 @@
5959
"toolset": { "value": "host=x86_64", "strategy": "external" },
6060
"cacheVariables": {
6161
"CMAKE_TOOLCHAIN_FILE": "cmake/arm64-linux-clang.cmake",
62-
"CMAKE_C_FLAGS": "-march=armv8 -fno-finite-math-only -flto -D_GNU_SOURCE",
63-
"CMAKE_CXX_FLAGS": "-march=armv8 -fno-finite-math-only -flto -D_GNU_SOURCE",
62+
"CMAKE_C_FLAGS": "-march=armv8.2a+fp16+dotprod -fvectorize -fno-finite-math-only -flto -D_GNU_SOURCE",
63+
"CMAKE_CXX_FLAGS": "-march=armv8.2a+fp16+dotprod -fvectorize -fno-finite-math-only -flto -D_GNU_SOURCE",
6464
"CMAKE_C_FLAGS_RELEASE": "-O3 -DNDEBUG",
6565
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG",
6666
"CMAKE_C_FLAGS_RELWITHDEBINFO": "-O3 -DNDEBUG -g",

docs/backend/snapdragon/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ This image includes Android NDK, OpenCL SDK, Hexagon SDK, CMake, etc.
1010
This method works on Linux, macOS, and Windows. macOS and Windows users should install Docker Desktop.
1111

1212
```
13-
~/src/llama.cpp$ docker run -it -u $(id -u):$(id -g) --volume $(pwd):/workspace --platform linux/amd64 ghcr.io/snapdragon-toolchain/arm64-android:v0.3
13+
~/src/llama.cpp$ docker run -it -u $(id -u):$(id -g) --volume $(pwd):/workspace --platform linux/amd64 ghcr.io/snapdragon-toolchain/arm64-android:v0.6
1414
[d]/> cd /workspace
1515
```
1616

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
359359
case GGML_TYPE_Q5_1:
360360
case GGML_TYPE_Q8_0:
361361
case GGML_TYPE_Q4_K:
362+
return 8;
362363
case GGML_TYPE_Q6_K:
364+
return 2;
363365
case GGML_TYPE_IQ4_NL:
364366
return 8;
365367
default:

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2661,7 +2661,7 @@ static bool ggml_hexagon_supported_rope(const struct ggml_hexagon_session * sess
26612661

26622662
int mode = op_params[2];
26632663

2664-
if ((mode & GGML_ROPE_TYPE_MROPE) || (mode & GGML_ROPE_TYPE_VISION)) {
2664+
if (mode == GGML_ROPE_TYPE_VISION) {
26652665
return false;
26662666
}
26672667
if (mode & 1) {

0 commit comments

Comments
 (0)