From 42378064d162f60dd5ce891b73f9c8e58d31a886 Mon Sep 17 00:00:00 2001 From: "zhengxiao.wu" Date: Thu, 7 May 2026 12:12:22 +0800 Subject: [PATCH 1/2] perf(docker): add cargo + ccache cache mounts to py-builder stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two heavy RUN steps in py-builder (uv sync + maturin build) re-execute on every Python source change because the upstream COPY layer for openviking/ invalidates the cache. Each rerun was ~510s + ~115s ≈ 10 min of wasted work even though Rust/C++ source was unchanged. Add BuildKit cache mounts so cargo and the C++ engine compilation can skip work whose inputs are unchanged: - Mount /cargo-target, cargo registry, and cargo git so cargo's incremental build artifacts persist across layer reruns. Pin CARGO_TARGET_DIR so the path stays stable when uv builds wheels in ephemeral isolated tempdirs. - Install ccache and prepend /usr/lib/ccache to PATH so cmake (which calls shutil.which("gcc")) resolves the ccache wrapper. ccache is path-agnostic, so it benefits the cmake_build subdir even though setup.py recreates it in a fresh tempdir each wheel build. - Mount /root/.ccache so the ccache hash store persists across reruns. Expected: hot rebuilds on Python-only changes drop step 15 from ~510s to ~60-120s (uv wheel packaging overhead remains; cargo + g++ skip on cache hit). --- Dockerfile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Dockerfile b/Dockerfile index 69ffa54112..8e7114431f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,10 +19,20 @@ ARG UV_LOCK_STRATEGY=auto RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ + ccache \ cmake \ git \ && rm -rf /var/lib/apt/lists/* +# Route gcc/g++/cc through ccache so cmake (which asks shutil.which("gcc")) picks +# up /usr/lib/ccache/gcc and benefits from the BuildKit cache mount on /root/.ccache. +ENV PATH="/usr/lib/ccache:${PATH}" +ENV CCACHE_DIR=/root/.ccache +# Pin Cargo's target dir to a stable path so a BuildKit cache mount can persist +# build artifacts across layer reruns even when uv builds the wheel in an +# ephemeral isolated tempdir. +ENV CARGO_TARGET_DIR=/cargo-target + ENV UV_COMPILE_BYTECODE=1 ENV UV_LINK_MODE=copy ENV UV_NO_DEV=1 @@ -44,6 +54,10 @@ COPY third_party/ third_party/ # stale, so Docker builds stay unblocked after dependency changes. Set # UV_LOCK_STRATEGY=locked to keep fail-fast reproducibility checks. RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ + --mount=type=cache,target=/cargo-target,id=cargo-target-${TARGETPLATFORM} \ + --mount=type=cache,target=/usr/local/cargo/registry,id=cargo-registry-${TARGETPLATFORM} \ + --mount=type=cache,target=/usr/local/cargo/git,id=cargo-git-${TARGETPLATFORM} \ + --mount=type=cache,target=/root/.ccache,id=ccache-${TARGETPLATFORM} \ if [ -n "${OPENVIKING_VERSION:-}" ]; then \ export SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OPENVIKING="${OPENVIKING_VERSION}"; \ elif [ -f openviking/_version.py ]; then \ @@ -71,6 +85,10 @@ RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ # Build ragfs-python (Rust RAGFS binding) and extract the native extension # into the installed openviking package. RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ + --mount=type=cache,target=/cargo-target,id=cargo-target-${TARGETPLATFORM} \ + --mount=type=cache,target=/usr/local/cargo/registry,id=cargo-registry-${TARGETPLATFORM} \ + --mount=type=cache,target=/usr/local/cargo/git,id=cargo-git-${TARGETPLATFORM} \ + --mount=type=cache,target=/root/.ccache,id=ccache-${TARGETPLATFORM} \ uv pip install maturin && \ export _TMPDIR=$(mktemp -d) && \ trap 'rm -rf "$_TMPDIR"' EXIT && \ From 9523b9e0ffdcb125f1fcdf39934473e30df1d6de Mon Sep 17 00:00:00 2001 From: "zhengxiao.wu" Date: Thu, 7 May 2026 12:45:42 +0800 Subject: [PATCH 2/2] perf(docker): drop redundant second maturin build step The second RUN step in py-builder built ragfs-python a second time and extracted its .so into the installed openviking package. This was redundant: setup.py's build_ragfs_python_artifact() already runs maturin during step 15 (uv sync --no-editable), and because build_meta passes 'bdist_wheel' through PEP 517, _should_require_ragfs_artifact() returns True and the build fails closed if maturin can't produce ragfs_python.so. The .so is then bundled into the wheel via package_data and installed into /app/.venv on wheel install. The second step's only effect was to overwrite the same file, costing ~115s per build. Verified after the fact by inspecting the installed venv and importing ragfs_python in the runtime container. --- Dockerfile | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e7114431f..7c172ac47d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -82,47 +82,6 @@ RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ ;; \ esac -# Build ragfs-python (Rust RAGFS binding) and extract the native extension -# into the installed openviking package. -RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \ - --mount=type=cache,target=/cargo-target,id=cargo-target-${TARGETPLATFORM} \ - --mount=type=cache,target=/usr/local/cargo/registry,id=cargo-registry-${TARGETPLATFORM} \ - --mount=type=cache,target=/usr/local/cargo/git,id=cargo-git-${TARGETPLATFORM} \ - --mount=type=cache,target=/root/.ccache,id=ccache-${TARGETPLATFORM} \ - uv pip install maturin && \ - export _TMPDIR=$(mktemp -d) && \ - trap 'rm -rf "$_TMPDIR"' EXIT && \ - cd crates/ragfs-python && \ - python -m maturin build --release --out "$_TMPDIR" && \ - cd ../.. && \ - export _OV_LIB=$(python -c "import openviking; from pathlib import Path; print(Path(openviking.__file__).resolve().parent / 'lib')") && \ - mkdir -p "$_OV_LIB" && \ - python - <<'PY' -import glob -import os -import sys -import zipfile - -tmpdir = os.environ["_TMPDIR"] -ov_lib = os.environ["_OV_LIB"] -whls = glob.glob(os.path.join(tmpdir, "ragfs_python-*.whl")) -assert whls, "maturin produced no wheel" - -with zipfile.ZipFile(whls[0]) as zf: - for name in zf.namelist(): - bn = os.path.basename(name) - if bn.startswith("ragfs_python") and (bn.endswith(".so") or bn.endswith(".pyd")): - dst = os.path.join(ov_lib, bn) - with zf.open(name) as src, open(dst, "wb") as f: - f.write(src.read()) - os.chmod(dst, 0o755) - print(f"ragfs-python: extracted {bn} -> {dst}") - sys.exit(0) - -print("WARNING: No ragfs_python .so/.pyd in wheel") -sys.exit(1) -PY - # Stage 3: runtime FROM python:3.13-slim-trixie