Skip to content

Commit 327c1ac

Browse files
committed
Merge branch 'master' of github.com:tekintian/llama.cpp into diy
* 'master' of github.com:tekintian/llama.cpp: (659 commits) ggml-webgpu: Improve performance of mat-vec and mat-mat for MUL_MAT_ID (ggml-org#22464) Update llama-mmap to use ftello/fseeko (ggml-org#22497) common : check for null getpwuid in hf-cache (ggml-org#22550) vulkan: add get/set tensor 2d functions (ggml-org#22514) spec: fix argument typo (ggml-org#22552) ci : bump ty to 0.0.33 (ggml-org#22535) vendor : update cpp-httplib to 0.43.2 (ggml-org#22548) CUDA: fix tile FA kernel on Pascal (ggml-org#22541) scripts : add wc2wt.sh - create worktree from current HEAD (ggml-org#22513) add fast matmul iquants (ggml-org#22504) spec : fix draft model checkpoints (ggml-org#22521) spec : fix vocab compat checks in spec example (ggml-org#22426) common : do not pass prompt tokens to reasoning budget sampler (ggml-org#22488) hexagon: make vmem and buffer-size configurable (ggml-org#22487) CUDA: fuse SSM_CONV + ADD(bias) + SILU (ggml-org#22478) spec : disacard last drafted token with low prob (ggml-org#22506) sync : ggml ggml : bump version to 0.10.1 (ggml/1469) webui: fix slow mic stop and WAV encode (ggml-org#22480) ggml-cpu : disable tiled matmul on AIX to fix page boundary segfault (ggml-org#22293) ... # Conflicts: # .gitignore
2 parents 52ac2f6 + b941d72 commit 327c1ac

1,156 files changed

Lines changed: 165092 additions & 74801 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE

.devops/cpu.Dockerfile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
ARG UBUNTU_VERSION=22.04
1+
ARG UBUNTU_VERSION=24.04
22

33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
ARG TARGETARCH
66

77
RUN apt-get update && \
8-
apt-get install -y build-essential git cmake libssl-dev
8+
apt-get install -y gcc-14 g++-14 build-essential git cmake libssl-dev
9+
10+
ENV CC=gcc-14 CXX=g++-14
911

1012
WORKDIR /app
1113

@@ -34,7 +36,7 @@ RUN mkdir -p /app/full \
3436
FROM ubuntu:$UBUNTU_VERSION AS base
3537

3638
RUN apt-get update \
37-
&& apt-get install -y libgomp1 curl\
39+
&& apt-get install -y libgomp1 curl \
3840
&& apt autoremove -y \
3941
&& apt clean -y \
4042
&& rm -rf /tmp/* /var/tmp/* \
@@ -55,8 +57,9 @@ RUN apt-get update \
5557
git \
5658
python3 \
5759
python3-pip \
58-
&& pip install --upgrade pip setuptools wheel \
59-
&& pip install -r requirements.txt \
60+
python3-wheel \
61+
&& pip install --break-system-packages --upgrade setuptools \
62+
&& pip install --break-system-packages -r requirements.txt \
6063
&& apt autoremove -y \
6164
&& apt clean -y \
6265
&& rm -rf /tmp/* /var/tmp/* \

.devops/cuda-new.Dockerfile

Lines changed: 0 additions & 95 deletions
This file was deleted.

.devops/cuda.Dockerfile

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
ARG UBUNTU_VERSION=22.04
1+
ARG UBUNTU_VERSION=24.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=12.4.0
3+
ARG CUDA_VERSION=12.8.1
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66

@@ -12,7 +12,9 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1212
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
15+
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
16+
17+
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
1618

1719
WORKDIR /app
1820

@@ -39,7 +41,7 @@ RUN mkdir -p /app/full \
3941
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
4042

4143
RUN apt-get update \
42-
&& apt-get install -y libgomp1 curl\
44+
&& apt-get install -y libgomp1 curl \
4345
&& apt autoremove -y \
4446
&& apt clean -y \
4547
&& rm -rf /tmp/* /var/tmp/* \
@@ -60,7 +62,8 @@ RUN apt-get update \
6062
git \
6163
python3 \
6264
python3-pip \
63-
&& pip install --upgrade pip setuptools wheel \
65+
python3-wheel \
66+
&& pip install --break-system-packages --upgrade setuptools \
6467
&& pip install --break-system-packages -r requirements.txt \
6568
&& apt autoremove -y \
6669
&& apt clean -y \

.devops/intel.Dockerfile

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
1+
ARG ONEAPI_VERSION=2025.3.3-0-devel-ubuntu24.04
22

33
## Build Image
44

@@ -33,8 +33,25 @@ RUN mkdir -p /app/full \
3333

3434
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
3535

36+
ARG IGC_VERSION=v2.30.1
37+
ARG IGC_VERSION_FULL=2_2.30.1+20950
38+
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
39+
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
40+
ARG IGDGMM_VERSION=22.9.0
41+
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
42+
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
43+
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
44+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
45+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
46+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
47+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
48+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libigdgmm12_${IGDGMM_VERSION}_amd64.deb \
49+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
50+
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
51+
&& dpkg --install *.deb
52+
3653
RUN apt-get update \
37-
&& apt-get install -y libgomp1 curl\
54+
&& apt-get install -y libgomp1 curl \
3855
&& apt autoremove -y \
3956
&& apt clean -y \
4057
&& rm -rf /tmp/* /var/tmp/* \

.devops/llama-cli-cann.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
1+
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
22

33
FROM ascendai/cann:$ASCEND_VERSION AS build
44

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ RUN mkdir -p /app/full \
4646
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
4747

4848
RUN apt-get update \
49-
&& apt-get install -y libgomp1 curl\
49+
&& apt-get install -y libgomp1 curl \
5050
&& apt autoremove -y \
5151
&& apt clean -y \
5252
&& rm -rf /tmp/* /var/tmp/* \

.devops/nix/package.nix

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
rocmPackages,
1717
vulkan-headers,
1818
vulkan-loader,
19-
curl,
19+
openssl,
2020
shaderc,
21+
spirv-headers,
2122
useBlas ?
2223
builtins.all (x: !x) [
2324
useCuda
@@ -41,6 +42,7 @@
4142
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
4243
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
4344
precompileMetalShaders ? false,
45+
useWebUi ? true,
4446
}:
4547

4648
let
@@ -144,6 +146,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
144146
ninja
145147
pkg-config
146148
git
149+
spirv-headers
147150
]
148151
++ optionals useCuda [
149152
cudaPackages.cuda_nvcc
@@ -159,11 +162,13 @@ effectiveStdenv.mkDerivation (finalAttrs: {
159162
++ optionals useMpi [ mpi ]
160163
++ optionals useRocm rocmBuildInputs
161164
++ optionals useBlas [ blas ]
162-
++ optionals useVulkan vulkanBuildInputs;
165+
++ optionals useVulkan vulkanBuildInputs
166+
++ [ openssl ];
163167

164168
cmakeFlags =
165169
[
166170
(cmakeBool "LLAMA_BUILD_SERVER" true)
171+
(cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
167172
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
168173
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
169174
(cmakeBool "GGML_NATIVE" false)

0 commit comments

Comments
 (0)