feat(docker): slim/full image variants + cached deps layer + [full] extra rename (#138)

georgeh0 · web-flow · commit 6f84edca2e11 · 2026-04-14T16:25:42.000-07:00
* perf(docker): split install into stable deps + per-release layers; add GHA cache

Dockerfile previously installed cocoindex-code, cocoindex, torch,
sentence-transformers, and all transitive deps in one RUN. Any change to
the source tree (via COPY . /ccc-src) invalidated that single layer,
forcing a full re-install — ~1 GB of wheels for torch + friends — on
every release. Under QEMU for the arm64 cross-build this was slow
enough to be painful.

Split into two stages:
- `deps`: install cocoindex + cocoindex-code[default] from PyPI. Cache
  key is just the RUN command string, so this layer is reused across
  releases until we bump the pins.
- `builder`: overlay the release version via
  `CCC_INSTALL_SPEC=/ccc-src[default]` with `--no-deps
  --force-reinstall` — only the cocoindex-code package is touched; the
  heavy deps layer stays untouched.

Also add BuildKit layer cache (`type=gha`) to the publish-docker job so
the deps layer persists across workflow runs, not just within a single
build.

* feat(docker,packaging): slim/full image variants; rename [default]→[full] extra

Build two Docker image variants per release:
- slim (:latest, default) — ~450 MB. LiteLLM-only. cocoindex + cocoindex-code
  without sentence-transformers. Targets cloud-backed embeddings.
- full (:full)            — ~5 GB. Bundles sentence-transformers + torch +
  a pre-baked default model. Targets offline-ready local embeddings.

Dockerfile gains a CCC_VARIANT build arg that gates stage 1's
sentence-transformers install and stage 3's model bake. Release workflow
matrices on {slim, full}; each variant has its own GHA cache scope so
layer reuse works across releases without the variants evicting each
other.

Also rename the PyPI `[default]` umbrella extra to `[full]` so pip and
Docker names match. `[embeddings-local]` remains the canonical primary
extra (the one that specifically pulls in sentence-transformers); `[full]`
is its umbrella alias that may bundle additional optional niceties later.
CLI hints that point at missing sentence-transformers continue to name
`[embeddings-local]` directly — the most specific pointer for that case.

README documents both image variants with a comparison table and narrows
the Mac-on-Docker MPS note to only :full users (slim + LiteLLM is
unaffected).
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -112,7 +112,7 @@ jobs:
           --repo '${{ github.repository }}'
 
   publish-docker:
-    name: Build & push Docker image to Docker Hub and GHCR
+    name: Build & push Docker image (${{ matrix.variant }})
     # Runs on real releases, and on manual dispatch with `test_docker=true`
     # for verifying registry credentials before the first release.
     if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && inputs.test_docker)
@@ -123,6 +123,17 @@ jobs:
     permissions:
       contents: read
       packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # slim (default) — LiteLLM-only, ~300 MB. Publishes as `:latest`.
+          - variant: slim
+            install_spec: /ccc-src
+          # full — bundles sentence-transformers + torch + baked model,
+          # ~2 GB. Publishes as `:full`.
+          - variant: full
+            install_spec: /ccc-src[full]
     steps:
       - uses: actions/checkout@v4
 
@@ -151,25 +162,43 @@ jobs:
 
       - name: Compute image tags
         id: tags
-        # Real releases: push `:latest` and `:<version>` to both registries.
-        # Manual dispatches: push only `:test` so we don't clobber `:latest`.
+        # Tag scheme:
+        #   slim on release:  :latest, :<version>
+        #   full on release:  :full,   :<version>-full
+        #   slim on dispatch: :test
+        #   full on dispatch: :test-full
+        # Dispatched tags stay out of the `:latest` / `:<version>` namespace
+        # so manual test runs don't clobber what users pull.
         run: |
+          variant="${{ matrix.variant }}"
+          if [ "$variant" = "slim" ]; then
+              slim_suffix=""
+          else
+              slim_suffix="-$variant"
+          fi
           if [ "${{ github.event_name }}" = "release" ]; then
-            {
-              echo "tags<<EOF"
-              echo "cocoindex/cocoindex-code:latest"
-              echo "cocoindex/cocoindex-code:${{ github.ref_name }}"
-              echo "ghcr.io/cocoindex-io/cocoindex-code:latest"
-              echo "ghcr.io/cocoindex-io/cocoindex-code:${{ github.ref_name }}"
-              echo "EOF"
-            } >> "$GITHUB_OUTPUT"
+              version="${{ github.ref_name }}"
+              if [ "$variant" = "slim" ]; then
+                  latest_tag="latest"
+              else
+                  latest_tag="$variant"
+              fi
+              {
+                  echo "tags<<EOF"
+                  echo "cocoindex/cocoindex-code:${latest_tag}"
+                  echo "cocoindex/cocoindex-code:${version}${slim_suffix}"
+                  echo "ghcr.io/cocoindex-io/cocoindex-code:${latest_tag}"
+                  echo "ghcr.io/cocoindex-io/cocoindex-code:${version}${slim_suffix}"
+                  echo "EOF"
+              } >> "$GITHUB_OUTPUT"
           else
-            {
-              echo "tags<<EOF"
-              echo "cocoindex/cocoindex-code:test"
-              echo "ghcr.io/cocoindex-io/cocoindex-code:test"
-              echo "EOF"
-            } >> "$GITHUB_OUTPUT"
+              test_tag="test${slim_suffix}"
+              {
+                  echo "tags<<EOF"
+                  echo "cocoindex/cocoindex-code:${test_tag}"
+                  echo "ghcr.io/cocoindex-io/cocoindex-code:${test_tag}"
+                  echo "EOF"
+              } >> "$GITHUB_OUTPUT"
           fi
 
       - name: Build and push to both registries
@@ -186,5 +215,11 @@ jobs:
           # PyPI's CDN yet (which happened on v0.2.24 release), and ensures
           # the image matches the tagged commit byte-for-byte.
           build-args: |
-            CCC_INSTALL_SPEC=/ccc-src[default]
+            CCC_VARIANT=${{ matrix.variant }}
+            CCC_INSTALL_SPEC=${{ matrix.install_spec }}
           tags: ${{ steps.tags.outputs.tags }}
+          # Per-variant BuildKit cache so slim and full don't evict each
+          # other's layers. The heavy `deps` layer (torch + friends for
+          # full; empty for slim) reuses across releases.
+          cache-from: type=gha,scope=${{ matrix.variant }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.variant }}
diff --git a/README.md b/README.md
@@ -46,18 +46,18 @@ A lightweight, effective **(AST-based)** semantic code search tool for your code
 
 Using [pipx](https://pipx.pypa.io/stable/installation/):
 ```bash
-pipx install 'cocoindex-code[default]'       # batteries included (local embeddings)
+pipx install 'cocoindex-code[full]'          # batteries included (local embeddings)
 pipx upgrade cocoindex-code                  # upgrade
 ```
 
 Using [uv](https://docs.astral.sh/uv/getting-started/installation/):
 ```bash
-uv tool install --upgrade 'cocoindex-code[default]' --prerelease explicit --with "cocoindex>=1.0.0a24"
+uv tool install --upgrade 'cocoindex-code[full]' --prerelease explicit --with "cocoindex>=1.0.0a24"
 ```
 
-Two install styles:
-- `cocoindex-code[default]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
-- `cocoindex-code` — slim. LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
+Two install styles — they mirror the Docker image variants of the same names:
+- `cocoindex-code[full]` — batteries-included. Pulls in `sentence-transformers` so local embeddings (no API key required) work out of the box. The `ccc init` interactive prompt defaults to [Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs).
+- `cocoindex-code` (slim) — LiteLLM-only; requires a cloud embedding provider and API key. Use when you don't want the local-embedding deps (~1 GB of torch + transformers).
 
 Next, set up your [coding agent integration](#coding-agent-integration) — or jump to [Manual CLI Usage](#manual-cli-usage) if you prefer direct control.
 
@@ -198,6 +198,25 @@ The recommended approach is a **persistent container**: start it once, and use
 `docker exec` to run CLI commands or connect MCP sessions to it. The daemon
 inside stays warm across sessions, so the embedding model is loaded only once.
 
+### Choosing an image
+
+Two variants are published from each release:
+
+| Tag | Size | Embedding backends | When to pick |
+|---|---|---|---|
+| `cocoindex/cocoindex-code:latest` (slim, default) | ~450 MB | LiteLLM (cloud: OpenAI, Voyage, Gemini, Ollama, …) | Most users. Cloud-backed embeddings, smaller image, fast pulls. |
+| `cocoindex/cocoindex-code:full` | ~5 GB | sentence-transformers (local) + LiteLLM | When you want local embeddings without an API key, or an offline-ready container. Heavier because of torch + transformers. |
+
+The rest of this section uses `:latest` — substitute `:full` in the `image:` /
+`docker run` commands if you want the full variant.
+
+> **Mac users running the `:full` variant:** local embedding inference is
+> CPU-only inside Docker, because Docker on macOS can't access Apple's Metal
+> (MPS) GPU. If you want local embeddings and fast inference, install
+> natively instead: `pipx install 'cocoindex-code[full]'`. The `:latest`
+> (slim) variant is unaffected — LiteLLM runs the model on the provider's
+> side, so Docker vs. native makes no difference.
+
 ### Quick start — `docker compose up -d`
 
 Grab [`docker/docker-compose.yml`](./docker/docker-compose.yml) from this repo and run:
@@ -352,7 +371,7 @@ docker build -t cocoindex-code:local -f docker/Dockerfile .
 - **Ultra Performant**: ⚡ Built on top of ultra performant [Rust indexing engine](https://github.com/cocoindex-io/cocoindex). Only re-indexes changed files for fast updates.
 - **Multi-Language Support**: Python, JavaScript/TypeScript, Rust, Go, Java, C/C++, C#, SQL, Shell, and more.
 - **Embedded**: Portable and just works, no database setup required!
-- **Flexible Embeddings**: Local SentenceTransformers via the `[default]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
+- **Flexible Embeddings**: Local SentenceTransformers via the `[full]` extra (free, no API key!) or 100+ cloud providers via LiteLLM.
 
 ## Configuration
 
@@ -439,7 +458,7 @@ See [`src/cocoindex_code/chunking.py`](./src/cocoindex_code/chunking.py) for the
 
 ## Embedding Models
 
-With the `[default]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
+With the `[full]` extra installed, `ccc init` defaults to a local SentenceTransformers model ([Snowflake/snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs)) — no API key required. To use a different model, edit `~/.cocoindex_code/global_settings.yml`.
 
 > The `envs` entries below are only needed if the key isn't already in your shell environment — the daemon inherits your environment automatically.
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,37 +1,85 @@
-# ─── Stage 1: install dependencies ───────────────────────────────────────────
+# ─── Stage 1: heavy stable dependencies (variant-aware) ──────────────────────
+# Two image variants are published from this Dockerfile:
+#   - slim (default, `:latest`) — ~450 MB. cocoindex-code + LiteLLM only.
+#     For users who'll point the embedding at a cloud provider (OpenAI,
+#     Voyage, Gemini, …).
+#   - full (`:full`)             — ~5 GB. Also bundles sentence-transformers
+#     + torch + a pre-baked default model. For users who want offline-ready
+#     local embeddings without an API key.
+#
+# This stage installs only the big, slow-changing deps that are shared across
+# releases:
+#   - full: `sentence-transformers` (pulls torch + transformers + tokenizers
+#     transitively, ~1 GB of wheels).
+#   - slim: nothing — cocoindex-code's LiteLLM deps get installed in stage 2.
+#
+# The cache key is the RUN command string, which changes with CCC_VARIANT, so
+# BuildKit keeps separate cache entries per variant and reuses each across
+# releases until we bump the deps.
+#
+# `cocoindex` and `cocoindex-code` are deliberately NOT installed here —
+# they bump often, so pinning them at this layer would invalidate the heavy
+# cache on every release. Stage 2 installs them on top; transitive deps are
+# already satisfied, so uv only fetches the two packages themselves.
+#
 # Use slim (glibc-based) — cocoindex ships pre-built Rust wheels that need glibc.
 # Alpine / musl-libc would require building from source.
-FROM python:3.12-slim AS builder
+#
+# `--system` tells uv to install into the base Python at
+# /usr/local/lib/python3.12/... since there's no virtualenv in the image.
+FROM python:3.12-slim AS deps
 
 RUN pip install --quiet uv
 
+ARG CCC_VARIANT=slim
+RUN if [ "$CCC_VARIANT" = "full" ]; then \
+        uv pip install --system --prerelease=allow sentence-transformers; \
+    fi
+
+# ─── Stage 2: install cocoindex + cocoindex-code (per release) ───────────────
+# Cheap relative to stage 1: transitive deps like torch are already in place
+# for the full variant; for slim there are no heavy deps to pull. uv only
+# needs to fetch the cocoindex + cocoindex-code wheels themselves.
+FROM deps AS builder
 WORKDIR /build
+ARG CCC_VARIANT=slim
 
-# Default: install the released cocoindex-code from PyPI (release flow).
-# Tests/local dev override with:
-#   --build-arg CCC_INSTALL_SPEC=/ccc-src[default]
-# which installs from the copied-in source tree instead. The COPY always runs;
-# with .dockerignore trimming build artifacts it adds ~nothing.
-ARG CCC_INSTALL_SPEC="cocoindex-code[default]"
+# Default behaviour: install cocoindex-code from PyPI, picking the extras
+# that match CCC_VARIANT.
+# Release workflow / local tests override with (respectively):
+#   --build-arg CCC_INSTALL_SPEC=/ccc-src
+#   --build-arg CCC_INSTALL_SPEC=/ccc-src[full]
+ARG CCC_INSTALL_SPEC=""
 COPY . /ccc-src
+RUN if [ -z "$CCC_INSTALL_SPEC" ]; then \
+        if [ "$CCC_VARIANT" = "full" ]; then \
+            CCC_INSTALL_SPEC="cocoindex-code[full]"; \
+        else \
+            CCC_INSTALL_SPEC="cocoindex-code"; \
+        fi; \
+    fi; \
+    uv pip install --system --prerelease=allow \
+        "cocoindex>=1.0.0a33" \
+        "${CCC_INSTALL_SPEC}"
 
-RUN uv pip install --system --prerelease=allow \
-    "cocoindex>=1.0.0a33" \
-    "${CCC_INSTALL_SPEC}"
-
-# ─── Stage 2: pre-bake the default embedding model ────────────────────────────
-# Bakes Snowflake/snowflake-arctic-embed-xs into the merged data directory at
-# /var/cocoindex/cache/..., so on first run Docker's volume copy-up populates
-# the cocoindex-data volume with the model — no network fetch needed.
+# ─── Stage 3: pre-bake the default embedding model (full only) ───────────────
+# For the full variant, bakes Snowflake/snowflake-arctic-embed-xs into
+# /var/cocoindex/cache/... so Docker's first-mount copy-up populates the
+# cocoindex-data volume with the model — no network fetch on first start.
+# For slim, just creates empty cache dirs so the runtime stage's COPY works
+# regardless of variant.
 FROM builder AS model_cache
+ARG CCC_VARIANT=slim
 
 ENV HF_HOME=/var/cocoindex/cache/huggingface \
     SENTENCE_TRANSFORMERS_HOME=/var/cocoindex/cache/sentence-transformers
 
 RUN mkdir -p /var/cocoindex/cache/huggingface /var/cocoindex/cache/sentence-transformers \
-    && python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('Snowflake/snowflake-arctic-embed-xs'); print('Model cached.')"
+    && if [ "$CCC_VARIANT" = "full" ]; then \
+        python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('Snowflake/snowflake-arctic-embed-xs'); print('Model cached.')"; \
+    fi
 
-# ─── Stage 3: runtime ─────────────────────────────────────────────────────────
+# ─── Stage 4: runtime ─────────────────────────────────────────────────────────
 FROM python:3.12-slim AS runtime
 
 # gosu for privilege-drop (PUID/PGID pattern); create non-root coco user.
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,10 +36,20 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+# `embeddings-local` is the primary feature extra: it pulls in
+# `sentence-transformers` (via cocoindex) so local embeddings work without
+# an API key.
 embeddings-local = [
     "cocoindex[sentence-transformers]==1.0.0a43",
 ]
-default = [
+# `full` is the umbrella "batteries-included" alias. Today it's just
+# `embeddings-local`, but we expect to bundle more optional niceties under
+# it over time — users who want everything can keep using `[full]` and pick
+# up the additions automatically. The name also matches the Docker
+# `:full` image variant for consistency across install paths. Contents are
+# inlined rather than self-referencing `cocoindex-code[embeddings-local]`
+# to avoid resolver edge cases with older pip.
+full = [
     "cocoindex[sentence-transformers]==1.0.0a43",
 ]
 dev = [
diff --git a/skills/ccc/references/management.md b/skills/ccc/references/management.md
@@ -5,11 +5,11 @@
 Install CocoIndex Code via pipx. Two install styles:
 
 ```bash
-pipx install 'cocoindex-code[default]'   # batteries included (local embeddings via sentence-transformers)
+pipx install 'cocoindex-code[full]'      # batteries included (local embeddings via sentence-transformers)
 pipx install cocoindex-code              # slim (LiteLLM-only; requires a cloud embedding provider + API key)
 ```
 
-The `[default]` extra pulls in `sentence-transformers` so the first-run default (local embeddings, no API key) works out of the box. The slim install is for environments where you don't want the torch/transformers deps and plan to use a LiteLLM-supported cloud provider instead.
+The `[full]` extra pulls in `sentence-transformers` so the first-run default (local embeddings, no API key) works out of the box. The slim install is for environments where you don't want the torch/transformers deps and plan to use a LiteLLM-supported cloud provider instead.
 
 To upgrade to the latest version:
 
diff --git a/src/cocoindex_code/cli.py b/src/cocoindex_code/cli.py
@@ -327,7 +327,7 @@ def _resolve_embedding_choice(
             return EmbeddingSettings(provider="sentence-transformers", model=DEFAULT_ST_MODEL)
         _typer.echo(
             "Error: sentence-transformers is not installed and stdin is not a TTY.\n"
-            "Either install the extra (`pip install cocoindex-code[embeddings-local]`)\n"
+            "Either install the extra (`pip install 'cocoindex-code[embeddings-local]'`)\n"
             "or pass `--litellm-model MODEL` to select a LiteLLM model.",
             err=True,
         )
diff --git a/tests/e2e_docker/conftest.py b/tests/e2e_docker/conftest.py
@@ -26,6 +26,9 @@ def docker_image() -> str:
     """Build the image once per test session, installing cocoindex-code from the
     local source tree (not PyPI) so tests exercise the current changes. Returns the tag.
     """
+    # Tests exercise the `full` variant so `ccc init -f` in non-TTY mode can
+    # fall back to sentence-transformers (the slim variant requires
+    # `--litellm-model`, which would add setup boilerplate to every test).
     tag = "cocoindex-code:pytest"
     subprocess.run(
         [
@@ -34,7 +37,9 @@ def docker_image() -> str:
             "-f",
             str(DOCKERFILE),
             "--build-arg",
-            "CCC_INSTALL_SPEC=/ccc-src[default]",
+            "CCC_VARIANT=full",
+            "--build-arg",
+            "CCC_INSTALL_SPEC=/ccc-src[full]",
             "-t",
             tag,
             str(REPO_ROOT),
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
@@ -838,12 +838,16 @@ async def embed(self, text: str) -> object:  # noqa: ARG002
 # ---------------------------------------------------------------------------
 
 
-def test_dockerfile_install_line_uses_default_extra() -> None:
-    """Dockerfile should install via `cocoindex-code[default]`, no separate ST pin."""
+def test_dockerfile_install_line_uses_full_extra() -> None:
+    """Dockerfile should install via `cocoindex-code[full]` (not the old
+    `[default]` alias) and should not hard-pin sentence-transformers.
+    """
     repo_root = Path(__file__).resolve().parent.parent
     content = (repo_root / "docker" / "Dockerfile").read_text()
-    assert "cocoindex-code[default]" in content
+    assert "cocoindex-code[full]" in content
+    assert "cocoindex-code[default]" not in content
     assert "sentence-transformers>=" not in content
+    assert "sentence-transformers==" not in content
 
 
 # ---------------------------------------------------------------------------
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -327,7 +327,7 @@ def _resolve_embedding_choice(`
`327`	`327`	`return EmbeddingSettings(provider="sentence-transformers", model=DEFAULT_ST_MODEL)`
`328`	`328`	`_typer.echo(`
`329`	`329`	`"Error: sentence-transformers is not installed and stdin is not a TTY.\n"`
`330`		- "Either install the extra (`pip install cocoindex-code[embeddings-local]`)\n"
	`330`	+ "Either install the extra (`pip install 'cocoindex-code[embeddings-local]'`)\n"
`331`	`331`	"or pass `--litellm-model MODEL` to select a LiteLLM model.",
`332`	`332`	`err=True,`
`333`	`333`	`)`