Skip to content

Commit bbde992

Browse files
committed
feat(llama.cpp): add turboquant support
This PR adds patchset from the great work of @TheTom in https://github.com/TheTom/llama-cpp-turboquant and creates a pipeline that updates the patches against upstream automatically. It also creates necessary scaffolding for doing this with other patches sources. Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 6c635e8 commit bbde992

4 files changed

Lines changed: 224 additions & 11 deletions

File tree

.github/workflows/bump_deps.yaml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
variable: "LLAMA_VERSION"
1515
branch: "master"
1616
file: "backend/cpp/llama-cpp/Makefile"
17+
patches-dir: "backend/cpp/llama-cpp/patches"
1718
- repository: "ggml-org/whisper.cpp"
1819
variable: "WHISPER_CPP_VERSION"
1920
branch: "master"
@@ -41,6 +42,25 @@ jobs:
4142
id: bump
4243
run: |
4344
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} ${{ matrix.file }}
45+
- name: Install yq
46+
if: matrix.patches-dir != ''
47+
run: |
48+
sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
49+
sudo chmod +x /usr/local/bin/yq
50+
- name: Upgrade patches 🩹
51+
if: matrix.patches-dir != ''
52+
id: patches
53+
continue-on-error: true
54+
run: |
55+
bash scripts/patch_utils/upgrade_patches.sh ${{ matrix.patches-dir }}
56+
- name: Note patch failure in PR body
57+
if: matrix.patches-dir != '' && steps.patches.outcome == 'failure'
58+
run: |
59+
echo "" >> "${{ matrix.variable }}_message.txt"
60+
echo "⚠️ **Patch rebase failed** for \`${{ matrix.patches-dir }}\`. Manual intervention required to update patches." >> "${{ matrix.variable }}_message.txt"
61+
- name: Prepare PR metadata
62+
id: metadata
63+
run: |
4464
{
4565
echo 'message<<EOF'
4666
cat "${{ matrix.variable }}_message.txt"
@@ -59,9 +79,9 @@ jobs:
5979
token: ${{ secrets.UPDATE_BOT_TOKEN }}
6080
push-to-fork: ci-forks/LocalAI
6181
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
62-
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
82+
title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.metadata.outputs.commit }}`'
6383
branch: "update/${{ matrix.variable }}"
64-
body: ${{ steps.bump.outputs.message }}
84+
body: ${{ steps.metadata.outputs.message }}
6585
signoff: true
6686

6787

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Patch sources for the llama-cpp backend.
2+
# Each source declares a fork whose commits are extracted as patches
3+
# and applied on top of upstream llama.cpp during the build.
4+
# See scripts/patch_utils/upgrade_patches.sh for the rebase/regenerate tool.
5+
sources:
6+
- name: turboquant
7+
repo: https://github.com/TheTom/llama-cpp-turboquant.git
8+
branch: feature/turboquant-kv-cache
9+
fork_base: ded446b34c0cd803a0122446b848619adbb458cf
10+
upstream_repo: https://github.com/ggml-org/llama.cpp.git
11+
version_file: Makefile
12+
version_var: LLAMA_VERSION

backend/cpp/llama-cpp/prepare.sh

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,66 @@
11
#!/bin/bash
2+
set -e
23

3-
## Patches
4+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
45

5-
## Apply patches from the `patches` directory
6-
if [ -d "patches" ]; then
7-
for patch in $(ls patches); do
8-
echo "Applying patch $patch"
9-
patch -d llama.cpp/ -p1 < patches/$patch
10-
done
6+
## Generate patches from fork sources if not already present.
7+
## If patches/<name>/ already contains .patch files (vendored), skip fetching.
8+
## Otherwise, clone the fork and generate them on the fly.
9+
if [ -f "patches/sources.yaml" ] && command -v yq &>/dev/null; then
10+
SOURCE_COUNT=$(yq '.sources | length' patches/sources.yaml)
11+
12+
for i in $(seq 0 $((SOURCE_COUNT - 1))); do
13+
NAME=$(yq ".sources[$i].name" patches/sources.yaml)
14+
REPO=$(yq ".sources[$i].repo" patches/sources.yaml)
15+
BRANCH=$(yq ".sources[$i].branch" patches/sources.yaml)
16+
FORK_BASE=$(yq ".sources[$i].fork_base" patches/sources.yaml)
17+
18+
SOURCE_DIR="patches/$NAME"
19+
EXISTING=$(ls "$SOURCE_DIR"/*.patch 2>/dev/null | wc -l)
20+
21+
if [ "$EXISTING" -gt 0 ]; then
22+
echo "Source '$NAME': $EXISTING patches already present — skipping fetch."
23+
else
24+
echo "Source '$NAME': no patches found — fetching from $REPO ($BRANCH)"
25+
26+
TMPDIR=$(mktemp -d)
27+
28+
if git clone --single-branch -b "$BRANCH" --depth=500 "$REPO" "$TMPDIR/fork" 2>&1; then
29+
cd "$TMPDIR/fork"
30+
PATCH_COUNT=$(git rev-list --count "$FORK_BASE"..HEAD 2>/dev/null || echo "0")
31+
echo " $PATCH_COUNT commits to extract"
32+
33+
if [ "$PATCH_COUNT" -gt 0 ]; then
34+
mkdir -p "$SCRIPT_DIR/$SOURCE_DIR"
35+
git format-patch "$FORK_BASE"..HEAD -o "$SCRIPT_DIR/$SOURCE_DIR/" >/dev/null 2>&1
36+
echo " Generated $PATCH_COUNT patches in $SOURCE_DIR/"
37+
fi
38+
cd "$SCRIPT_DIR"
39+
else
40+
echo "WARNING: Failed to clone $REPO — skipping source '$NAME'"
41+
fi
42+
43+
rm -rf "$TMPDIR"
44+
fi
45+
done
46+
elif [ -f "patches/sources.yaml" ]; then
47+
echo "WARNING: yq not found — skipping source-based patch generation. Install yq to enable."
1148
fi
1249

13-
set -e
50+
## Apply patches from source subdirectories (alphabetical order)
51+
if [ -d "patches" ]; then
52+
for source_dir in $(find patches -mindepth 1 -maxdepth 1 -type d | sort); do
53+
for p in $(ls "$source_dir"/*.patch 2>/dev/null | sort); do
54+
echo "Applying: $p"
55+
patch -d llama.cpp/ -p1 < "$p" || { echo "FAILED: $p"; exit 1; }
56+
done
57+
done
58+
# Apply any top-level .patch files (manual patches)
59+
for p in $(ls patches/*.patch 2>/dev/null | sort); do
60+
echo "Applying: $p"
61+
patch -d llama.cpp/ -p1 < "$p" || { echo "FAILED: $p"; exit 1; }
62+
done
63+
fi
1464

1565
for file in $(ls llama.cpp/tools/server/); do
1666
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
@@ -28,4 +78,3 @@ else
2878
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
2979
fi
3080
set -e
31-
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/bin/bash
2+
# upgrade_patches.sh — Rebase and regenerate patches from multiple fork sources.
3+
#
4+
# Usage: upgrade_patches.sh <backend-patches-dir>
5+
#
6+
# Reads sources.yaml in the given directory. For each source, clones the fork,
7+
# rebases its commits onto the current upstream version (read from the backend's
8+
# Makefile), and regenerates format-patch files into the source's subdirectory.
9+
#
10+
# Exit codes:
11+
# 0 — all sources rebased successfully
12+
# 1 — one or more sources failed (old patches left in place for those)
13+
#
14+
# Requires: git, yq (https://github.com/mikefarah/yq)
15+
16+
set -uo pipefail
17+
18+
PATCHES_DIR="$(realpath "$1")"
19+
BACKEND_DIR="$(dirname "$PATCHES_DIR")"
20+
SOURCES_FILE="$PATCHES_DIR/sources.yaml"
21+
22+
if [ ! -f "$SOURCES_FILE" ]; then
23+
echo "No sources.yaml found in $PATCHES_DIR — nothing to upgrade."
24+
exit 0
25+
fi
26+
27+
TMPDIR=$(mktemp -d)
28+
trap "rm -rf $TMPDIR" EXIT
29+
30+
SOURCE_COUNT=$(yq '.sources | length' "$SOURCES_FILE")
31+
FAILED=0
32+
33+
for i in $(seq 0 $((SOURCE_COUNT - 1))); do
34+
NAME=$(yq ".sources[$i].name" "$SOURCES_FILE")
35+
REPO=$(yq ".sources[$i].repo" "$SOURCES_FILE")
36+
BRANCH=$(yq ".sources[$i].branch" "$SOURCES_FILE")
37+
FORK_BASE=$(yq ".sources[$i].fork_base" "$SOURCES_FILE")
38+
UPSTREAM_REPO=$(yq ".sources[$i].upstream_repo" "$SOURCES_FILE")
39+
VERSION_FILE=$(yq ".sources[$i].version_file" "$SOURCES_FILE")
40+
VERSION_VAR=$(yq ".sources[$i].version_var" "$SOURCES_FILE")
41+
42+
echo "=========================================="
43+
echo "Processing source: $NAME"
44+
echo " repo: $REPO"
45+
echo " branch: $BRANCH"
46+
echo " fork_base: $FORK_BASE"
47+
echo "=========================================="
48+
49+
# Read the new upstream base commit from the backend's Makefile
50+
NEW_BASE=$(grep -m1 "^${VERSION_VAR}?=" "$BACKEND_DIR/$VERSION_FILE" | cut -d'=' -f2)
51+
if [ -z "$NEW_BASE" ]; then
52+
echo "ERROR: Could not read $VERSION_VAR from $BACKEND_DIR/$VERSION_FILE"
53+
FAILED=1
54+
continue
55+
fi
56+
echo " new base: $NEW_BASE"
57+
58+
if [ "$FORK_BASE" = "$NEW_BASE" ]; then
59+
echo " Base unchanged — skipping rebase."
60+
continue
61+
fi
62+
63+
CLONE_DIR="$TMPDIR/$NAME"
64+
65+
# Clone the fork
66+
echo " Cloning fork..."
67+
if ! git clone --single-branch -b "$BRANCH" "$REPO" "$CLONE_DIR" 2>&1; then
68+
echo "ERROR: Failed to clone $REPO branch $BRANCH"
69+
FAILED=1
70+
continue
71+
fi
72+
73+
cd "$CLONE_DIR"
74+
75+
# Fetch upstream to get the new base commit
76+
echo " Fetching upstream ($UPSTREAM_REPO)..."
77+
git remote add upstream "$UPSTREAM_REPO"
78+
if ! git fetch upstream 2>&1; then
79+
echo "ERROR: Failed to fetch upstream $UPSTREAM_REPO"
80+
FAILED=1
81+
continue
82+
fi
83+
84+
# Count fork-only commits
85+
COMMIT_COUNT=$(git rev-list --count "$FORK_BASE"..HEAD 2>/dev/null || echo "0")
86+
echo " Fork has $COMMIT_COUNT commits on top of base"
87+
88+
if [ "$COMMIT_COUNT" -eq 0 ]; then
89+
echo " No fork commits found — skipping."
90+
continue
91+
fi
92+
93+
# Rebase onto the new base
94+
echo " Rebasing $COMMIT_COUNT commits onto $NEW_BASE..."
95+
if git rebase --onto "$NEW_BASE" "$FORK_BASE" HEAD 2>&1; then
96+
echo " Rebase succeeded!"
97+
98+
# Count rebased commits
99+
REBASED_COUNT=$(git rev-list --count "$NEW_BASE"..HEAD)
100+
echo " Regenerating $REBASED_COUNT patches..."
101+
102+
# Clear old patches and regenerate
103+
SOURCE_PATCHES_DIR="$PATCHES_DIR/$NAME"
104+
mkdir -p "$SOURCE_PATCHES_DIR"
105+
rm -f "$SOURCE_PATCHES_DIR"/*.patch
106+
git format-patch "$NEW_BASE"..HEAD -o "$SOURCE_PATCHES_DIR/" 2>&1
107+
108+
# Update fork_base in sources.yaml
109+
yq -i ".sources[$i].fork_base = \"$NEW_BASE\"" "$SOURCES_FILE"
110+
111+
PATCH_COUNT=$(ls "$SOURCE_PATCHES_DIR"/*.patch 2>/dev/null | wc -l)
112+
echo " Generated $PATCH_COUNT patch files in $SOURCE_PATCHES_DIR"
113+
else
114+
echo "WARNING: Rebase failed for source '$NAME'"
115+
echo " Patches need manual attention."
116+
echo " Old patches left in place."
117+
git rebase --abort 2>/dev/null || true
118+
FAILED=1
119+
fi
120+
121+
cd "$PATCHES_DIR"
122+
done
123+
124+
if [ "$FAILED" -ne 0 ]; then
125+
echo ""
126+
echo "One or more sources failed to rebase. See above for details."
127+
exit 1
128+
fi
129+
130+
echo ""
131+
echo "All sources upgraded successfully."
132+
exit 0

0 commit comments

Comments
 (0)