Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
1876ef1
Document Depot/sccache cache as jllama-only in cross-repo scope
claude Jun 20, 2026
20e92d2
Trim jllama cache cross-repo note to a pointer
claude Jun 20, 2026
c643b20
ci: add sccache probe health-check so a crashing sccache falls back u…
claude Jun 20, 2026
4af2250
ci: re-enable sccache on the manylinux2014 dockcross job (phase 2, jo…
claude Jun 20, 2026
c4b3adf
ci: default fetched sccache to v0.16.0 (latest)
claude Jun 20, 2026
1ea472e
ci: bump all dockcross images to 20260515-5fd14ac (latest)
claude Jun 20, 2026
bf109c2
ci: enable sccache on the CUDA dockcross job; manylinux2014 verified …
claude Jun 20, 2026
625d743
build: bump NullAway 0.13.6 -> 0.13.7 and pitest-maven 1.25.4 -> 1.25.5
claude Jun 20, 2026
df71f9c
build: googletest 1.15.2->1.17.0 + opt-in CUDA_FAST_BUILD single-arch…
claude Jun 20, 2026
8f064c7
ci: cache GGUF test models via GitHub actions/cache (skip HuggingFace…
claude Jun 20, 2026
9a1d493
docs(ci): explain the GGUF model cache (purpose, no flag, vs sccache)
claude Jun 20, 2026
dd264b2
feat(server): add NativeServer JNI-bridge scaffold (native HTTP serve…
claude Jun 20, 2026
698258d
ci(cuda): fast single-arch CUDA build for validation, full arch set o…
claude Jun 20, 2026
c0a10cb
ci(cuda): pin newest arch (sm_120 Blackwell) for the fast validation …
claude Jun 20, 2026
3ab3aa7
ci(sccache): enable Phase 2 cache on all 5 dockcross jobs at once
claude Jun 20, 2026
346f247
docs(TODO): add Windows sccache item — needs Ninja, evaluate dual-art…
claude Jun 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 57 additions & 8 deletions .github/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@ fi
# while macOS installs it via brew in the workflow. Best-effort and inert-safe: any failure
# leaves sccache absent, so the build just proceeds uncached. The static musl binary runs in
# any x86_64 Linux container (the cross-compile host is always x86_64).
#
# SCCACHE_DL_VERSION is overridable per-job, so a container that crashes one sccache build can
# try another without editing this script (the in-container panic that stalled phase 2 was on
# v0.8.2; v0.16.0 is the latest release and the default). A wrong/unavailable version just fails
# the `curl -f` and falls back to an uncached build, so bumping it can never red a build.
SCCACHE_DL_VERSION="${SCCACHE_DL_VERSION:-0.16.0}"
if [ "${USE_CACHE:-true}" = "true" ] && [ -n "${SCCACHE_WEBDAV_TOKEN:-}${SCCACHE_GHA_ENABLED:-}" ] \
&& ! command -v sccache >/dev/null 2>&1 \
&& [ "$(uname -s)" = "Linux" ] && [ "$(uname -m)" = "x86_64" ]; then
SCCACHE_REL="sccache-v0.8.2-x86_64-unknown-linux-musl"
SCCACHE_REL="sccache-v${SCCACHE_DL_VERSION}-x86_64-unknown-linux-musl"
echo "build.sh: fetching ${SCCACHE_REL} (no sccache on PATH)..."
if curl -fsSL --proto =https --proto-redir =https \
"https://github.com/mozilla/sccache/releases/download/v0.8.2/${SCCACHE_REL}.tar.gz" \
"https://github.com/mozilla/sccache/releases/download/v${SCCACHE_DL_VERSION}/${SCCACHE_REL}.tar.gz" \
-o /tmp/sccache.tgz && tar -xzf /tmp/sccache.tgz -C /tmp; then
export PATH="/tmp/${SCCACHE_REL}:$PATH"
echo "build.sh: sccache -> $(command -v sccache || echo 'still missing')"
Expand All @@ -36,14 +42,55 @@ if [ "${USE_CACHE:-true}" = "true" ] && [ -n "${SCCACHE_WEBDAV_TOKEN:-}${SCCACHE
fi
fi

# Health-check before trusting sccache as the compiler launcher. Because sccache *is* the
# launcher (cmake runs `sccache <compiler> ...` for every TU), a present-but-crashing sccache
# fails every compile and reds the whole build — exactly the in-container panic that stalled
# phase 2 (the static-musl binary panicked while wrapping the cross-compiler, failing ggml.c.o).
# The probe runs the real compiler through sccache on a trivial TU; only if that succeeds is the
# launcher enabled. On any failure it logs the captured output (the Rust panic backtrace, plus
# the detached server's SCCACHE_ERROR_LOG when a job sets one) and the build runs WITHOUT the
# cache — a clean, uncached -O3 build that still goes green. This closes the gap the old
# absent-only guard left: it handled sccache *missing*, not sccache *crashing*.
sccache_can_wrap_compiler() {
probe_cc="${CC:-}"
if [ -z "$probe_cc" ]; then
for c in cc gcc clang; do
if command -v "$c" >/dev/null 2>&1; then probe_cc="$c"; break; fi
done
fi
if [ -z "$probe_cc" ]; then
echo "build.sh: sccache probe: no C compiler on PATH to probe; building uncached"
return 1
fi
probe_dir="$(mktemp -d 2>/dev/null || echo "/tmp/sccache-probe.$$")"
mkdir -p "$probe_dir" || return 1
printf 'int main(void){return 0;}\n' > "$probe_dir/probe.c"
probe_out="$(sccache "$probe_cc" -c "$probe_dir/probe.c" -o "$probe_dir/probe.o" 2>&1)"
probe_rc=$?
rm -rf "$probe_dir"
if [ "$probe_rc" -ne 0 ]; then
echo "build.sh: sccache probe FAILED (rc=${probe_rc}) wrapping '${probe_cc}' — building WITHOUT cache."
[ -n "$probe_out" ] && printf '%s\n' "$probe_out" | sed 's/^/build.sh: sccache-probe| /'
if [ -n "${SCCACHE_ERROR_LOG:-}" ] && [ -f "${SCCACHE_ERROR_LOG}" ]; then
echo "build.sh: --- detached server log (${SCCACHE_ERROR_LOG}) ---"
sed 's/^/build.sh: sccache-srv| /' "${SCCACHE_ERROR_LOG}" 2>/dev/null || true
fi
return 1
fi
echo "build.sh: sccache probe OK (wrapped '${probe_cc}')"
return 0
}

# Optional shared compiler cache: sccache fronting Depot Cache (WebDAV). Enabled only when
# USE_CACHE is true AND sccache + a cache token are present, so it stays inert before the
# DEPOT_TOKEN secret is configured and on fork PRs (secrets hidden) — those just compile
# normally. sccache is content-addressed, so a cache hit is bit-identical to a fresh -O3
# compile (release-safe), and it degrades to direct compilation if the cache is unreachable.
# USE_CACHE is true AND sccache + a cache token are present AND the probe confirms sccache can
# wrap the compiler — so it stays inert before the DEPOT_TOKEN secret is configured, on fork PRs
# (secrets hidden), and when sccache would crash; all of those just compile normally. sccache is
# content-addressed, so a cache hit is bit-identical to a fresh -O3 compile (release-safe), and
# it degrades to direct compilation if the cache is unreachable.
LAUNCH=""
if [ "${USE_CACHE:-true}" = "true" ] && command -v sccache >/dev/null 2>&1 \
&& [ -n "${SCCACHE_WEBDAV_TOKEN:-}${SCCACHE_GHA_ENABLED:-}" ]; then
&& [ -n "${SCCACHE_WEBDAV_TOKEN:-}${SCCACHE_GHA_ENABLED:-}" ] \
&& sccache_can_wrap_compiler; then
LAUNCH="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache"
echo "build.sh: sccache ON (endpoint=${SCCACHE_WEBDAV_ENDPOINT:-default}), building with -j${JOBS}"
else
Expand All @@ -53,6 +100,8 @@ fi
cmake -Bbuild $LAUNCH $@ || exit 1
cmake --build build --config Release -j"${JOBS}" || exit 1

if command -v sccache >/dev/null 2>&1; then
# Only query stats when sccache was actually used as the launcher; if the probe rejected a
# crashing sccache, re-invoking it here would just repeat the crash output (harmless but noisy).
if [ -n "$LAUNCH" ] && command -v sccache >/dev/null 2>&1; then
sccache --show-stats || true
fi
24 changes: 23 additions & 1 deletion .github/build_cuda_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,26 @@ sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute

sudo dnf install -y cuda-toolkit-13-2

exec .github/build.sh $@ -DGGML_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda-13.2/bin/nvcc
# CUDA target architectures — build-speed knob.
#
# Default (CUDA_FAST_BUILD unset): we do NOT pass CMAKE_CUDA_ARCHITECTURES, so ggml/llama.cpp
# compiles its full default arch set. That is exactly what release artifacts must ship (every
# supported GPU generation) and is the slow part of this ~70 min job: nvcc recompiles each .cu
# kernel once per architecture. sccache caches the gcc C/C++ TUs but NOT the nvcc .cu kernels
# (sccache's nvcc support is limited/experimental), so the per-arch nvcc passes dominate even
# with the cache on — which is why this knob exists as the real CUDA build-time lever.
#
# Dev fast build (CUDA_FAST_BUILD=1): compile for a SINGLE architecture instead of the full
# set, removing most of the nvcc time. Defaults to `native` (the build machine's own GPU —
# needs a GPU present at configure time); override with CUDA_ARCH, e.g. CUDA_ARCH=90. This is
# a MANUAL local-dev knob only: CI and release never set it, because an artifact built this
# way runs on a single GPU generation. (Direct-cmake equivalent: -DCMAKE_CUDA_ARCHITECTURES=native.)
CUDA_ARCH_ARGS=""
case "${CUDA_FAST_BUILD:-}" in
1 | true | TRUE | yes | on)
CUDA_ARCH_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH:-native}"
echo "build_cuda_linux.sh: CUDA_FAST_BUILD set -> ${CUDA_ARCH_ARGS} (DEV ONLY — not release-distributable)"
;;
esac

exec .github/build.sh $@ -DGGML_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda-13.2/bin/nvcc $CUDA_ARCH_ARGS
10 changes: 5 additions & 5 deletions .github/build_opencl_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ if [ ! -f "$LOADER_BUILD/libOpenCL.so" ]; then
cmake --build "$LOADER_BUILD" --config Release -j"$(nproc)"
fi

mkdir -p build
# Match .github/build.sh: pass $@ unquoted so the CI's single-string
# Delegate the jllama cmake configure + build to build.sh so it inherits the
# sccache probe, Depot cache launcher, and --show-stats output automatically —
# same as build_cuda_linux.sh. Pass $@ unquoted so the CI's single-string
# argument is word-split into individual -D flags for cmake.
cmake -Bbuild \
exec .github/build.sh \
-DOpenCL_INCLUDE_DIR="$HEADERS_DIR" \
-DOpenCL_LIBRARY="$LOADER_BUILD/libOpenCL.so" \
$@ || exit 1
cmake --build build --config Release -j"$(nproc)" || exit 1
$@
2 changes: 1 addition & 1 deletion .github/dockcross/dockcross-android-arm
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

DEFAULT_DOCKCROSS_IMAGE=dockcross/android-arm:20260312-9b3357c
DEFAULT_DOCKCROSS_IMAGE=dockcross/android-arm:20260515-5fd14ac

#------------------------------------------------------------------------------
# Helpers
Expand Down
2 changes: 1 addition & 1 deletion .github/dockcross/dockcross-android-arm64
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

DEFAULT_DOCKCROSS_IMAGE=dockcross/android-arm64:20260312-9b3357c
DEFAULT_DOCKCROSS_IMAGE=dockcross/android-arm64:20260515-5fd14ac

#------------------------------------------------------------------------------
# Helpers
Expand Down
2 changes: 1 addition & 1 deletion .github/dockcross/dockcross-linux-arm64-lts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

DEFAULT_DOCKCROSS_IMAGE=dockcross/linux-arm64-lts:20260313-9b3357c
DEFAULT_DOCKCROSS_IMAGE=dockcross/linux-arm64-lts:20260515-5fd14ac

#------------------------------------------------------------------------------
# Helpers
Expand Down
2 changes: 1 addition & 1 deletion .github/dockcross/dockcross-manylinux2014-x64
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

DEFAULT_DOCKCROSS_IMAGE=dockcross/manylinux2014-x64:20260312-9b3357c
DEFAULT_DOCKCROSS_IMAGE=dockcross/manylinux2014-x64:20260515-5fd14ac

#------------------------------------------------------------------------------
# Helpers
Expand Down
2 changes: 1 addition & 1 deletion .github/dockcross/dockcross-manylinux_2_28-x64
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

DEFAULT_DOCKCROSS_IMAGE=dockcross/manylinux_2_28-x64:20260312-9b3357c
DEFAULT_DOCKCROSS_IMAGE=dockcross/manylinux_2_28-x64:20260515-5fd14ac

#------------------------------------------------------------------------------
# Helpers
Expand Down
Loading
Loading