Skip to content

Commit 13da971

Browse files
authored
Merge pull request #7 from marty1885/backend-dev
Enhance ET backend and sync upstream llama.cpp changes
2 parents f36eb79 + 07519b7 commit 13da971

1,697 files changed

Lines changed: 476104 additions & 137804 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/cann.Dockerfile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,17 @@
33
# ==============================================================================
44

55
# Define the CANN base image for easier version updates later
6-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
6+
ARG CHIP_TYPE=910b
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
78

89
# ==============================================================================
910
# BUILD STAGE
1011
# Compile all binary files and libraries
1112
# ==============================================================================
1213
FROM ${CANN_BASE_IMAGE} AS build
1314

14-
# Define the Ascend chip model for compilation. Default is Ascend910B3
15-
ARG ASCEND_SOC_TYPE=Ascend910B3
16-
1715
# -- Install build dependencies --
18-
RUN yum install -y gcc g++ cmake make git libcurl-devel python3 python3-pip && \
16+
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
1917
yum clean all && \
2018
rm -rf /var/cache/yum
2119

@@ -36,20 +34,22 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
3634
# For brevity, only core variables are listed here. You can paste the original ENV list here.
3735

3836
# -- Build llama.cpp --
39-
# Use the passed ASCEND_SOC_TYPE argument and add general build options
37+
# Use the passed CHIP_TYPE argument and add general build options
38+
ARG CHIP_TYPE
4039
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
4140
&& \
4241
cmake -B build \
4342
-DGGML_CANN=ON \
4443
-DCMAKE_BUILD_TYPE=Release \
45-
-DSOC_TYPE=${ASCEND_SOC_TYPE} \
44+
-DSOC_TYPE=ascend${CHIP_TYPE} \
45+
-DUSE_ACL_GRAPH=ON \
4646
. && \
4747
cmake --build build --config Release -j$(nproc)
4848

4949
# -- Organize build artifacts for copying in later stages --
5050
# Create a lib directory to store all .so files
5151
RUN mkdir -p /app/lib && \
52-
find build -name "*.so" -exec cp {} /app/lib \;
52+
find build -name "*.so*" -exec cp -P {} /app/lib \;
5353

5454
# Create a full directory to store all executables and Python scripts
5555
RUN mkdir -p /app/full && \
@@ -108,11 +108,11 @@ ENTRYPOINT ["/app/tools.sh"]
108108
# ENTRYPOINT ["/app/llama-server"]
109109

110110
### Target: light
111-
# Lightweight image containing only llama-cli
111+
# Lightweight image containing only llama-cli and llama-completion
112112
# ==============================================================================
113113
FROM base AS light
114114

115-
COPY --from=build /app/full/llama-cli /app
115+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
116116

117117
ENTRYPOINT [ "/app/llama-cli" ]
118118

.devops/cpu.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ FROM ubuntu:$UBUNTU_VERSION AS build
55
ARG TARGETARCH
66

77
RUN apt-get update && \
8-
apt-get install -y build-essential git cmake libcurl4-openssl-dev
8+
apt-get install -y build-essential git cmake libssl-dev
99

1010
WORKDIR /app
1111

@@ -20,7 +20,7 @@ RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
2020
cmake --build build -j $(nproc)
2121

2222
RUN mkdir -p /app/lib && \
23-
find build -name "*.so" -exec cp {} /app/lib \;
23+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2424

2525
RUN mkdir -p /app/full \
2626
&& cp build/bin/* /app/full \
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda-new.Dockerfile

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
ARG UBUNTU_VERSION=24.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=13.1.0
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10+
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23+
fi && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25+
cmake --build build --config Release -j$(nproc)
26+
27+
RUN mkdir -p /app/lib && \
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
29+
30+
RUN mkdir -p /app/full \
31+
&& cp build/bin/* /app/full \
32+
&& cp *.py /app/full \
33+
&& cp -r gguf-py /app/full \
34+
&& cp -r requirements /app/full \
35+
&& cp requirements.txt /app/full \
36+
&& cp .devops/tools.sh /app/full/tools.sh
37+
38+
## Base image
39+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40+
41+
RUN apt-get update \
42+
&& apt-get install -y libgomp1 curl\
43+
&& apt autoremove -y \
44+
&& apt clean -y \
45+
&& rm -rf /tmp/* /var/tmp/* \
46+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47+
&& find /var/cache -type f -delete
48+
49+
COPY --from=build /app/lib/ /app
50+
51+
### Full
52+
FROM base AS full
53+
54+
COPY --from=build /app/full /app
55+
56+
WORKDIR /app
57+
58+
RUN apt-get update \
59+
&& apt-get install -y \
60+
git \
61+
python3 \
62+
python3-pip \
63+
python3-wheel \
64+
&& pip install --break-system-packages --upgrade setuptools \
65+
&& pip install --break-system-packages -r requirements.txt \
66+
&& apt autoremove -y \
67+
&& apt clean -y \
68+
&& rm -rf /tmp/* /var/tmp/* \
69+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
70+
&& find /var/cache -type f -delete
71+
72+
73+
ENTRYPOINT ["/app/tools.sh"]
74+
75+
### Light, CLI only
76+
FROM base AS light
77+
78+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
79+
80+
WORKDIR /app
81+
82+
ENTRYPOINT [ "/app/llama-cli" ]
83+
84+
### Server, Server only
85+
FROM base AS server
86+
87+
ENV LLAMA_ARG_HOST=0.0.0.0
88+
89+
COPY --from=build /app/full/llama-server /app
90+
91+
WORKDIR /app
92+
93+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
94+
95+
ENTRYPOINT [ "/app/llama-server" ]

.devops/cuda.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1212
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
15+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
1616

1717
WORKDIR /app
1818

@@ -25,7 +25,7 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \
28-
find build -name "*.so" -exec cp {} /app/lib \;
28+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2929

3030
RUN mkdir -p /app/full \
3131
&& cp build/bin/* /app/full \
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
66

77
ARG GGML_SYCL_F16=OFF
88
RUN apt-get update && \
9-
apt-get install -y git libcurl4-openssl-dev
9+
apt-get install -y git libssl-dev
1010

1111
WORKDIR /app
1212

@@ -21,7 +21,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \
24-
find build -name "*.so" -exec cp {} /app/lib \;
24+
find build -name "*.so*" -exec cp -P {} /app/lib \;
2525

2626
RUN mkdir -p /app/full \
2727
&& cp build/bin/* /app/full \
@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/llama-cli-cann.Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ WORKDIR /app
66

77
COPY . .
88

9-
RUN yum install -y gcc g++ cmake make libcurl-devel
9+
RUN yum install -y gcc g++ cmake make openssl-devel
1010
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
1111
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
1212
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
@@ -23,11 +23,12 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
2323
RUN echo "Building with static libs" && \
2424
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
2525
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
26-
cmake --build build --config Release --target llama-cli
26+
cmake --build build --config Release --target llama-cli && \
27+
cmake --build build --config Release --target llama-completion
2728

2829
# TODO: use image with NNRT
2930
FROM ascendai/cann:$ASCEND_VERSION AS runtime
30-
COPY --from=build /app/build/bin/llama-cli /llama-cli
31+
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
3132

3233
ENV LC_ALL=C.utf8
3334

.devops/llama-cpp-cuda.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ make -j GGML_CUDA=1
3737
%install
3838
mkdir -p %{buildroot}%{_bindir}/
3939
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40+
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
4041
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
4142
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
4243

@@ -68,6 +69,7 @@ rm -rf %{_builddir}/*
6869

6970
%files
7071
%{_bindir}/llama-cuda-cli
72+
%{_bindir}/llama-cuda-completion
7173
%{_bindir}/llama-cuda-server
7274
%{_bindir}/llama-cuda-simple
7375
/usr/lib/systemd/system/llamacuda.service

.devops/llama-cpp.srpm.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ make -j
3939
%install
4040
mkdir -p %{buildroot}%{_bindir}/
4141
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42+
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
4243
cp -p llama-server %{buildroot}%{_bindir}/llama-server
4344
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
4445

@@ -70,6 +71,7 @@ rm -rf %{_builddir}/*
7071

7172
%files
7273
%{_bindir}/llama-cli
74+
%{_bindir}/llama-completion
7375
%{_bindir}/llama-server
7476
%{_bindir}/llama-simple
7577
/usr/lib/systemd/system/llama.service

.devops/musa.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ RUN apt-get update && \
1818
python3 \
1919
python3-pip \
2020
git \
21-
libcurl4-openssl-dev \
21+
libssl-dev \
2222
libgomp1
2323

2424
WORKDIR /app
@@ -32,7 +32,7 @@ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3232
cmake --build build --config Release -j$(nproc)
3333

3434
RUN mkdir -p /app/lib && \
35-
find build -name "*.so" -exec cp {} /app/lib \;
35+
find build -name "*.so*" -exec cp -P {} /app/lib \;
3636

3737
RUN mkdir -p /app/full \
3838
&& cp build/bin/* /app/full \
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/nix/nixpkgs-instances.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
55
# `_module.args.pkgs` (defined in this case by flake-parts).
66
perSystem =
7-
{ system, ... }:
7+
{ lib, system, ... }:
88
{
99
_module.args = {
1010
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
@@ -33,7 +33,7 @@
3333
"CUDA EULA"
3434
"cuDNN EULA"
3535
]
36-
) (p.meta.licenses or [ p.meta.license ]);
36+
) (p.meta.licenses or (lib.toList p.meta.license));
3737
};
3838
# Ensure dependencies use ROCm consistently
3939
pkgsRocm = import inputs.nixpkgs {

0 commit comments

Comments
 (0)