diff --git a/Dockerfile.multistage b/Dockerfile.multistage index 4629a97..0defcff 100644 --- a/Dockerfile.multistage +++ b/Dockerfile.multistage @@ -12,7 +12,11 @@ # ============================================================================ # STAGE 1: Builder - Install dependencies # ============================================================================ -FROM rocker/r-ver:4.4.3 AS builder +FROM rocker/r2u:24.04 AS builder + +# Configure renv cache to use the same path as runtime +# This ensures symlinks remain valid when copied to runtime stage +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install system dependencies needed for package compilation RUN apt-get update && apt-get install -y \ @@ -28,7 +32,7 @@ RUN apt-get update && apt-get install -y \ libjpeg-dev \ && rm -rf /var/lib/apt/lists/* -WORKDIR /build +WORKDIR /app # OPTIMIZATION 1: Copy only dependency files first # This creates a cache-friendly layer that only rebuilds when dependencies change @@ -49,7 +53,10 @@ COPY app.R app.R # ============================================================================ # STAGE 2: Runtime - Minimal production image # ============================================================================ -FROM rocker/r-ver:4.4.3 +FROM rocker/r2u:24.04 + +# Configure renv cache location to match where packages were installed +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install ONLY runtime system dependencies (no build tools) # Note: Using minimal runtime libraries without -dev packages @@ -68,16 +75,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app -# Copy the entire renv library from builder stage -COPY --from=builder /build/renv /app/renv -COPY --from=builder /build/.Rprofile /app/.Rprofile -COPY --from=builder /build/renv.lock /app/renv.lock - -# Copy the renv cache (symlinks in renv/library point to this cache) -COPY --from=builder /root/.cache/R/renv /root/.cache/R/renv +# Copy the entire renv library from builder stage (includes cache at /app/renv/.cache) +COPY --from=builder /app/renv /app/renv +COPY --from=builder /app/.Rprofile /app/.Rprofile +COPY --from=builder /app/renv.lock /app/renv.lock # Copy application code -COPY --from=builder /build/app.R /app/app.R +COPY --from=builder /app/app.R /app/app.R # Expose Shiny port EXPOSE 3838 @@ -85,7 +89,7 @@ EXPOSE 3838 # Run the application # Note: Use --vanilla to skip .Rprofile and avoid renv activation at runtime # The renv library path is already in R's search path -CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] +CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] # ============================================================================ # Build and Run Commands: diff --git a/Dockerfile.single-stage b/Dockerfile.single-stage index 0408281..e57268f 100644 --- a/Dockerfile.single-stage +++ b/Dockerfile.single-stage @@ -8,7 +8,10 @@ # 4. Larger final image size (~2GB+) # ============================================================================ -FROM rocker/r-ver:4.4.3 +FROM rocker/r2u:24.04 + +# Configure renv cache to be inside the app directory +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install system dependencies RUN apt-get update && apt-get install -y \ @@ -41,4 +44,4 @@ EXPOSE 3838 # Run the application # Note: Use --vanilla to skip .Rprofile and avoid renv activation at runtime # The renv library path is already in R's search path -CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] +CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] diff --git a/Dockerfile.three-stage b/Dockerfile.three-stage index 6c4a0d4..71b2d8b 100644 --- a/Dockerfile.three-stage +++ b/Dockerfile.three-stage @@ -17,7 +17,11 @@ # ============================================================================ # STAGE 1: Base - CRAN packages and build environment # ============================================================================ -FROM rocker/r-ver:4.4.3 AS base +FROM rocker/r2u:24.04 AS base + +# Configure renv cache to use the same path as runtime +# This ensures symlinks remain valid when copied to runtime stage +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install system dependencies needed for compilation AND runtime # Build tools are needed here to compile CRAN packages @@ -35,7 +39,7 @@ RUN apt-get update && apt-get install -y \ libjpeg-dev \ && rm -rf /var/lib/apt/lists/* -WORKDIR /base +WORKDIR /app # OPTIMIZATION 1: Copy only dependency files first # This creates a cache-friendly layer that only rebuilds when CRAN dependencies change @@ -84,7 +88,10 @@ COPY app.R app.R # ============================================================================ # STAGE 3: Runtime - Pure application (NO build tools) # ============================================================================ -FROM rocker/r-ver:4.4.3 +FROM rocker/r2u:24.04 + +# Configure renv cache location to match where packages were installed +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install ONLY runtime system dependencies (no -dev packages, no build tools) # Note: Using minimal runtime libraries without -dev packages @@ -103,13 +110,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app -# Copy CRAN packages from base stage -COPY --from=base /base/renv /app/renv -COPY --from=base /base/.Rprofile /app/.Rprofile -COPY --from=base /base/renv.lock /app/renv.lock - -# Copy the renv cache (symlinks in renv/library point to this cache) -COPY --from=base /root/.cache/R/renv /root/.cache/R/renv +# Copy CRAN packages from base stage (includes cache at /app/renv/.cache) +COPY --from=base /app/renv /app/renv +COPY --from=base /app/.Rprofile /app/.Rprofile +COPY --from=base /app/renv.lock /app/renv.lock # Copy custom packages from builder stage (if any were installed) # COPY --from=builder /usr/local/lib/R/site-library/mypackage /usr/local/lib/R/site-library/mypackage @@ -123,7 +127,7 @@ EXPOSE 3838 # Run the application # Note: Use --vanilla to skip .Rprofile and avoid renv activation at runtime # The renv library path is already in R's search path -CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.4/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] +CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] # ============================================================================ # Layer Optimization Strategy: diff --git a/blog-post.md b/blog-post.md index 8da02ab..06d0f66 100644 --- a/blog-post.md +++ b/blog-post.md @@ -1,23 +1,27 @@ --- -title: "Optimizing R Shiny Docker Builds: From 15 Minutes to 30 Seconds" +title: "Optimizing R Shiny Docker Builds: From 40 Minutes to 10 Minutes" date: 2024-12-16 author: Sumedh R. Sankhe -tags: [Docker, R, Shiny, DevOps, Performance, Kubernetes] -description: "How we reduced Docker build times by 94% and image sizes by 43% for production R Shiny applications using multistage builds" +tags: [Docker, R, Shiny, DevOps, Performance, Kubernetes, SaaS] +description: "How we reduced Docker build times by 80% and image sizes by 42% for a customer-facing R Shiny SaaS application using multistage builds with rocker/r2u" --- -# Optimizing R Shiny Docker Builds: From 15 Minutes to 30 Seconds +# Optimizing R Shiny Docker Builds: From 40 Minutes to 10 Minutes -When you're deploying R Shiny applications to production on Kubernetes, Docker build efficiency becomes critical. At Alamar Biosciences, we faced a common challenge: our NULISA Analysis Software (NAS) Docker builds were slow, bloated, and frustrating to iterate on. A simple code change meant waiting 8+ minutes for a rebuild. Our images ballooned to over 2GB. And every deployment felt unnecessarily slow. +This is my first time writing up a technical blog post, so bear with me. I'm going to share what I learned optimizing Docker builds for R Shiny apps, including all the mistakes I made along the way. -This article walks through the systematic optimization process that cut our build times by 94% and reduced image sizes by 43%—improvements that compound across dozens of daily builds and deployments. +At Alamar Biosciences, I work on the NULISA Analysis Software (NAS) - basically a Shiny app for analyzing proteomics data. Unlike typical internal Shiny apps deployed with Posit Connect, NAS is a **customer-facing SaaS application** running on Azure Kubernetes Service (AKS). Our customers access it directly for analyzing their proteomics experiments, which means deployment speed, reliability, and scalability matter differently than for internal tools. + +When I started, our Docker builds were painfully slow. Like, grab coffee, queue some villagers in AoE2, maybe respond to some emails slow. A simple one-line code change? Wait 40 minutes for Docker to rebuild the image. Our images were pushing 1.5GB compressed. Every deployment to Kubernetes felt like it took forever. When you're shipping features to customers and fixing production bugs, 40-minute build times kill your velocity. + +This post walks through how I cut Docker build times by 80% for code changes (40 mins → 8-15 mins) and reduced image sizes by 42% (1.5GB → 875MB). The optimization involves separating build from runtime using Docker multistage builds and leveraging rocker/r2u for binary package installation. More importantly, this post covers the things that broke along the way and how I fixed them. ## The Problem: Slow, Bloated Docker Images Our original Dockerfile followed a common pattern—straightforward but inefficient: ```dockerfile -FROM rocker/r-ver:4.3.2 +FROM rocker/r2u:24.04 # Install system dependencies RUN apt-get update && apt-get install -y \ @@ -39,46 +43,53 @@ EXPOSE 3838 CMD ["R", "-e", "shiny::runApp('/app', host='0.0.0.0', port=3838)"] ``` -This approach had three critical problems: +This approach had three big problems: ### Problem 1: Poor Layer Caching -Any change to our application code—even a single line in `app.R`—invalidated the `renv::restore()` layer. Docker would reinstall *all* 50+ R packages from scratch. For a complex proteomics analysis platform with dependencies like `ggplot2`, `plotly`, `DT`, and domain-specific packages, this meant 10+ minutes of redundant package compilation. +Any change to our application code—even fixing a typo in `app.R`—would invalidate the `renv::restore()` layer. Docker would reinstall ALL 200+ R packages from scratch. We're talking the usual suspects like `ggplot2`, `plotly`, `DT`, plus a ton of domain-specific proteomics packages, and 2 custom in-house packages. Every. Single. Time. On our GitHub Actions runners, that's 60+ minutes of watching packages compile and tests run. ### Problem 2: Bloated Production Images -Our final images contained everything needed to *build* the application, not just *run* it. Build tools like `gcc`, `make`, and `-dev` system libraries remained in production containers. The result? 2.1GB images when the actual runtime requirements were under 1GB. +Our final images had everything needed to build the app, not just run it. All the build tools like `gcc`, `make`, and those `-dev` system libraries were just sitting there in production taking up space. The result? Nearly 2GB uncompressed images when the actual runtime stuff could be much smaller. ### Problem 3: Slow CI/CD Pipelines -In our Azure Kubernetes Service deployment workflow: -1. Developer pushes code change -2. GitHub Actions triggers build -3. Wait 8-15 minutes for Docker build +Here's what our Azure Kubernetes deployment looked like: +1. Push a code change (bug fix, new feature, etc.) +2. GitHub Actions starts building +3. Wait 40 minutes (go get coffee, check Slack, lose focus) 4. Push to Azure Container Registry 5. Deploy to AKS -Those 8-15 minute build times became a bottleneck. Developers would push changes, then context-switch while waiting. Iteration velocity suffered. +Those build times killed productivity. You'd push a fix, then switch to something else while waiting. By the time the build finished, you'd forgotten what you were even working on. It was like waiting for a Wonder to be built while your opponents are rushing you with trebuchets. + +**Why this matters for SaaS:** With Posit Connect, you typically deploy once and iterate internally. With a customer-facing SaaS on Kubernetes, you're constantly shipping features, bug fixes, and updates. Fast build times directly impact how quickly you can respond to customer issues and ship improvements. A 40-minute build cycle means you can only deploy 2-3 times per day max. That's not acceptable for modern SaaS development. ## The Solution: Multistage Docker Builds -The fix involves three core strategies: +The fix involves three core strategies (think of it as your build order): 1. **Separate build from runtime** using Docker multistage builds 2. **Optimize layer caching** by copying dependencies before code 3. **Minimize runtime dependencies** to only what's needed to run the app -Let me show you exactly how this works. +Let me show you exactly how this works. If you're familiar with AoE2, this is basically advancing from Dark Age (single-stage mess) to Imperial Age (optimized multistage build). ## Implementation: Building the Optimized Dockerfile +I'm going to walk through the multistage Dockerfile step by step. In the demo repo, I have a simple two-stage version for the example app. But for NAS, we actually use a three-stage build that separates CRAN packages from our custom packages. This makes sense when you have custom packages that change more frequently than your CRAN dependencies. For simpler apps, two stages is plenty. + ### Stage 1: The Builder -The builder stage compiles packages and prepares the application: +This stage compiles all the packages and prepares the app. In our real NAS setup, this is where we also install custom packages and run unit tests (unit testing in Docker builds deserves its own blog post, so I won't dive into that here): ```dockerfile # ============ STAGE 1: Builder ============ -FROM rocker/r-ver:4.3.2 AS builder +FROM rocker/r2u:24.04 AS builder + +# Configure renv cache to use consistent path across stages +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install build dependencies (with -dev packages) RUN apt-get update && apt-get install -y \ @@ -88,7 +99,7 @@ RUN apt-get update && apt-get install -y \ libfontconfig1-dev \ && rm -rf /var/lib/apt/lists/* -WORKDIR /build +WORKDIR /app # CRITICAL: Copy ONLY dependency files first COPY renv.lock renv.lock @@ -105,15 +116,18 @@ RUN R -e "renv::restore()" COPY app.R app.R ``` -**Key insight**: By separating `renv.lock` from `app.R` in the COPY operations, we ensure package installation is cached independently. Your `renv.lock` file changes rarely (only when adding/removing packages), but your application code changes constantly. +**The key part:** Notice how we copy `renv.lock` and the renv files separately from `app.R`. Your lockfile only changes when you add or remove packages (maybe once a week?). Your application code changes constantly (multiple times a day). By separating them, Docker can cache the expensive `renv::restore()` step and skip it when you only change your app code. ### Stage 2: The Runtime -The runtime stage creates a minimal production image: +Now we build a clean runtime image that only has what's needed to run the app: ```dockerfile # ============ STAGE 2: Runtime ============ -FROM rocker/r-ver:4.3.2 +FROM rocker/r2u:24.04 + +# Configure renv cache to match builder stage +ENV RENV_PATHS_CACHE="/app/renv/.cache" # Install ONLY runtime libraries (no -dev packages) RUN apt-get update && apt-get install -y \ @@ -125,59 +139,112 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app -# Copy the compiled renv library from builder -COPY --from=builder /build/renv /app/renv -COPY --from=builder /build/.Rprofile /app/.Rprofile -COPY --from=builder /build/renv.lock /app/renv.lock +# Copy the compiled renv library from builder (includes cache) +COPY --from=builder /app/renv /app/renv +COPY --from=builder /app/.Rprofile /app/.Rprofile +COPY --from=builder /app/renv.lock /app/renv.lock # Copy application code -COPY --from=builder /build/app.R /app/app.R +COPY --from=builder /app/app.R /app/app.R EXPOSE 3838 -CMD ["R", "-e", "shiny::runApp('/app', host='0.0.0.0', port=3838)"] +CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu'); shiny::runApp('/app', host='0.0.0.0', port=3838)"] +``` + +**What's happening here:** The runtime stage starts fresh and uses `COPY --from=builder` to grab the compiled packages. Notice we're installing `libcurl4` instead of `libcurl4-openssl-dev`. The `-dev` packages include headers and build tools. We don't need those to run the app, only to compile packages. This alone saves hundreds of megabytes. + +Also notice the CMD uses `--vanilla` - that's the fix for the renv runtime issue I mentioned earlier. + +## Things That Broke (And How I Fixed Them) + +Okay, so I thought I was done after writing the multistage Dockerfile. Built the images, they looked great. Then I actually tried to run them and... the app wouldn't start. Here's what went wrong. + +### Issue 1: renv Trying to Reinstall Packages at Runtime + +The containers would start, but then renv would activate (because of `.Rprofile`) and immediately complain that packages were missing or out of sync. It would try to reinstall everything at runtime. Turns out my `renv.lock` file was missing a bunch of transitive dependencies - things like `cpp11`, `crosstalk`, `farver`. These aren't packages I explicitly use, but they're dependencies of dependencies. + +When renv detected these weren't in the lockfile, it thought something was wrong and tried to "fix" it by reinstalling. In a running container. Which obviously failed. + +**The fix:** I disabled renv activation at runtime by using `R --vanilla` which skips the `.Rprofile`. Then I explicitly set the library path in the CMD: + +```dockerfile +CMD ["R", "--vanilla", "-e", ".libPaths('/app/renv/library/linux-ubuntu-noble/R-4.5/x86_64-pc-linux-gnu'); shiny::runApp('/app', host = '0.0.0.0', port = 3838)"] ``` -**Key insight**: The runtime stage uses `COPY --from=builder` to pull pre-compiled packages. It installs only the *runtime* versions of system libraries (e.g., `libcurl4` instead of `libcurl4-openssl-dev`). No build tools, no compilers, no development headers—just what's needed to execute. +This way, R uses the pre-installed packages but doesn't trigger renv's "helpful" automatic restoration. + +### Issue 2: Broken Symlinks in Multistage Builds + +Even after fixing the renv activation issue, the multistage builds still wouldn't work. Shiny couldn't find any packages. Turns out renv uses symlinks to a cache directory at `/root/.cache/R/renv/cache/`. When I copied the renv library between stages, I was copying the symlinks but not the actual files they pointed to. So every package was just a broken link. + +I spent way too long debugging this before I realized what was happening. + +**The fix:** Copy the renv cache along with the library: + +```dockerfile +# Copy the renv cache (symlinks in renv/library point to this cache) +COPY --from=builder /root/.cache/R/renv /root/.cache/R/renv +``` + +Now the symlinks work and packages load properly. + +### Lesson Learned + +Test your containers actually run, not just build. I wasted a couple hours assuming that if the build succeeded, everything was fine. Docker's multistage builds add complexity, especially with package managers that use caching strategies like renv. Don't be like me clicking "I'm ready!" before actually being ready - test your builds like you're scouting your opponent's base before committing to a strategy. ## Results: Quantified Performance Improvements -Testing on NAS with 50+ R package dependencies: +Here are the real numbers from our GitHub Actions workflow on the demo repo: + +| Metric | Single-Stage | Two-Stage | Three-Stage | Improvement | +|--------|-------------|-----------|-------------|-------------| +| **Image size (uncompressed)** | 1.89 GB | 1.44 GB | 1.44 GB | **24% smaller** | +| **Image size (compressed/registry)** | 512 MB | 403 MB | 403 MB | **21% smaller** | +| **Warm build** (code change only) | 5-7 mins | 30-45s | 30-45s | **85-92% faster** | +| **Cold build** (no cache) | 8-10 mins | 6-8 mins | 6-8 mins | 20-25% faster | -| Metric | Before (Single-Stage) | After (Multistage) | Improvement | -|--------|----------------------|-------------------|-------------| -| **Cold build** (no cache) | 15m 22s | 12m 18s | 20% faster | -| **Warm build** (cached deps, code change) | 7m 45s | 28s | **94% faster** | -| **Final image size** | 2.14 GB | 1.22 GB | **43% smaller** | -| **Deployment time to AKS** | ~2m 30s | ~1m 20s | 47% faster | +For our production NAS application with 200+ CRAN packages and 2 custom in-house packages: -The 94% improvement in warm builds is transformative for developer experience. What was a coffee-break wait is now nearly instantaneous. +| Metric | Before (Single-Stage) | After (Three-Stage) | Improvement | +|--------|----------------------|---------------------|-------------| +| **Image size (compressed)** | 1.5 GB | 875 MB | **42% smaller** | +| **Cold build** (all layers) | 40 mins | 30-35 mins | **20-25% faster** | +| **Warm build** (code change only) | 40 mins | 8-15 mins | **70-80% faster** | -## Layer Caching Deep Dive +**Note:** Our production setup includes additional optimizations beyond the scope of this post: automated base image rebuilds triggered by renv.lock hash changes, cache-busting strategies for custom package updates, and GitHub Actions runner cleanup for multi-stage builds. These advanced CI/CD integrations will be covered in a follow-up post. -Understanding Docker's layer caching is crucial. Each `RUN`, `COPY`, or `ADD` instruction creates a new layer. Docker caches layers and reuses them if: +**Note on image sizes:** Container registries (like Azure Container Registry, Docker Hub, GitHub Container Registry) store images in compressed format, which is why the "compressed/registry" sizes are significantly smaller than what you see locally with `docker images`. When you push an image to a registry, Docker compresses the layers, typically achieving 25-35% of the uncompressed size. This is important when considering deployment times and registry storage costs. + +**Note on warm builds:** Even single-stage Dockerfiles can have warm builds if you structure the layers correctly. The problem is that most single-stage setups use `COPY . .` early, which invalidates package installation on every code change. Our original single-stage Dockerfile had this issue, which is why warm builds were still slow. + +The full CI/CD pipeline includes additional steps beyond the Docker build: running unit tests, extracting test results, publishing them to GitHub, security scanning, etc. That's why the end-to-end time is longer than just the Docker build. The unit testing integration will be covered in a separate blog post. + +## How Layer Caching Actually Works + +This took me a while to really understand, so let me break it down. Each `RUN`, `COPY`, or `ADD` instruction in a Dockerfile creates a new layer. Docker caches these layers and reuses them if: 1. The instruction hasn't changed 2. All previous layers are unchanged 3. For `COPY`, the file contents are identical -**Inefficient ordering:** +**The wrong way (what I had originally):** ```dockerfile COPY . . # Any file change invalidates this -RUN R -e "renv::restore()" # This gets invalidated too! +RUN R -e "renv::restore()" # So this has to run again. Every time. ``` -**Optimized ordering:** +**The right way:** ```dockerfile -COPY renv.lock . # Only changes when dependencies change -RUN R -e "renv::restore()" # Cached for code-only changes -COPY app.R . # Frequently changes, but doesn't invalidate above +COPY renv.lock . # Only changes when you add/remove packages +RUN R -e "renv::restore()" # Gets cached and reused for code changes +COPY app.R . # Changes all the time, but doesn't break cache above ``` -This strategic ordering is why our warm builds dropped from 7+ minutes to under 30 seconds. +That simple reordering is the entire reason warm builds went from 8-10 minutes to 30 seconds. Put your stable stuff first, your frequently changing stuff last. -## Production Deployment Workflow +## How This Works in Production -Here's how this integrates with our Azure Kubernetes pipeline: +Here's how this fits into our actual Azure Kubernetes pipeline: ```yaml # .github/workflows/deploy.yml (simplified) @@ -208,15 +275,15 @@ jobs: docker push ${{ secrets.ACR_NAME }}.azurecr.io/nasapp:${{ github.sha }} ``` -The `--cache-from` flag leverages the previous build's layers, compounding the caching benefits. +The `--cache-from` flag helps Docker reuse layers from previous builds, which makes the caching even better. -## Beyond the Basics: Advanced Optimizations +## Other Things That Helped -Once you've implemented multistage builds, consider these additional optimizations: +Once I got the basic multistage build working, here are some other tricks I picked up: ### 1. BuildKit and Build Secrets -For private package repositories: +If you have private R packages (we do), you need to pass GitHub PATs or other credentials. Don't bake them into your image: ```dockerfile # syntax=docker/dockerfile:1 @@ -226,70 +293,78 @@ RUN --mount=type=secret,id=github_pat \ R -e "renv::restore()" ``` -This avoids baking credentials into layers. +This keeps secrets out of your layers. ### 2. Parallel Package Installation -For packages with many dependencies: +This is a small win but it adds up: ```dockerfile RUN R -e "options(Ncpus = 4); renv::restore()" ``` -### 3. Base Image Selection +Uses 4 cores to compile packages instead of 1. On a GitHub Actions runner, this shaved off another minute or two. + +### 3. Pick the Right Base Image -For Shiny apps with built-in Shiny Server: +Use `rocker/r2u` for significantly faster package installation through binary packages. This is especially beneficial for large projects with many dependencies. ```dockerfile -FROM rocker/shiny:4.3.2 AS builder +FROM rocker/r2u:24.04 AS builder ``` -This includes pre-configured Shiny Server, reducing setup time. +If you need Shiny Server (we don't - we use Kubernetes and run Shiny directly), `rocker/shiny` is also available with Shiny Server pre-installed. However, for Kubernetes deployments, `rocker/r2u` provides faster builds with smaller images. -## Lessons Learned +## What I Learned -1. **Profile before optimizing**: Use `docker history` to see where size bloat comes from -2. **Measure real workflows**: Cold build time matters less than warm rebuild time -3. **Layer ordering is critical**: Most frequently changing files should be COPYed last -4. **Multistage builds compound benefits**: Faster builds + smaller images + cleaner deployments +1. **Test everything**: Building successfully doesn't mean it runs successfully +2. **Measure what matters**: I cared way more about warm rebuild time than cold build time - optimize for your actual gameplay, not theoretical perfect builds +3. **Order matters**: Put stable stuff (dependencies) before frequently changing stuff (code) +4. **renv has quirks**: It's great for reproducibility but you need to understand its caching and symlink behavior when containerizing ## Try It Yourself -I've created a complete working example demonstrating these techniques: +I put together a working example with all the code: **GitHub Repository**: [shiny-docker-optimization](https://github.com/SumedhSankhe/shiny-docker-optimization) -The repo includes: -- Example Shiny application (proteomics QC dashboard) -- Both single-stage and multistage Dockerfiles -- Complete renv setup -- Detailed README with build instructions +It includes: +- A simple Shiny app (mtcars dashboard, nothing fancy) +- Three Dockerfiles: single-stage (bad), multistage (better), and three-stage (for complex apps) +- Full renv setup that actually works +- Scripts to test build times -Clone it, build both versions, and compare the results yourself: +Note: This demo app is way simpler than NAS (a few packages vs 200+, no custom packages, no unit tests). But the principles are the same, and you can see the optimization benefits even on a small app. + +Clone it and compare the results: ```bash git clone https://github.com/SumedhSankhe/shiny-docker-optimization.git cd shiny-docker-optimization -# Build and compare +# Build both versions docker build -f Dockerfile.single-stage -t shiny-app:single . docker build -f Dockerfile.multistage -t shiny-app:optimized . + +# Compare sizes docker images | grep shiny-app -``` -## Conclusion +# Run it +docker run -p 3838:3838 shiny-app:optimized +# Open localhost:3838 +``` -Optimizing Docker builds for R Shiny applications isn't just about faster builds—it's about better developer experience, more efficient CI/CD pipelines, and leaner production deployments. The multistage approach demonstrated here has become standard practice for all our Shiny applications at Alamar Biosciences. +## Wrapping Up -The key principles apply beyond R and Shiny: separate build from runtime, optimize layer caching through strategic ordering, and ruthlessly minimize what makes it into production images. +This was my first real dive into Docker optimization and I learned a lot. The multistage build approach is now what I use for all our Shiny apps at work. The principles apply to other languages too - separate build from runtime, order your layers carefully, and test that things actually run. -If you're deploying Shiny apps to Kubernetes or any containerized environment, these optimizations will pay dividends immediately. +If you're deploying Shiny apps in containers, hopefully this saves you some time and headache. --- -**Questions or improvements?** Open an issue on the [GitHub repo](https://github.com/SumedhSankhe/shiny-docker-optimization) or connect with me on [LinkedIn](https://linkedin.com/in/sumedhsankhe). +**Found this helpful or have questions?** Open an issue on the [GitHub repo](https://github.com/SumedhSankhe/shiny-docker-optimization) or connect with me on [LinkedIn](https://linkedin.com/in/sumedhsankhe). -**Related Reading**: -- [Engineering Production-Grade Shiny Apps](https://engineering-shiny.org/) -- [Docker Build Best Practices](https://docs.docker.com/develop/dev-best-practices/) -- [renv: R Dependency Management](https://rstudio.github.io/renv/) +**Some resources I found useful:** +- [Engineering Production-Grade Shiny Apps](https://engineering-shiny.org/) - Great book on building real Shiny apps +- [Docker Build Best Practices](https://docs.docker.com/develop/dev-best-practices/) - Official Docker docs +- [renv documentation](https://rstudio.github.io/renv/) - Understanding how renv works helps a lot