diff --git a/.github/workflows/test-devcontainer.yml b/.github/workflows/test-devcontainer.yml new file mode 100644 index 0000000..9bf5fa8 --- /dev/null +++ b/.github/workflows/test-devcontainer.yml @@ -0,0 +1,66 @@ +name: Test DevContainer + +on: + push: + branches: [main] + pull_request: + paths: + - 'devenv/**' + - 'common/.devcontainer/**' + - '.github/workflows/test-devcontainer.yml' + +env: + REGISTRY: ghcr.io + +jobs: + test: + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + os: [debian] + # TODO: c10s has PAM/sudo issues with devcontainer CLI's --userns=keep-id + # include: + # - os: c10s + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up runner + uses: bootc-dev/actions/bootc-ubuntu-setup@main + + - name: Build devcontainer image + run: just devenv-build-${{ matrix.os }} + + - name: Create override config for local image + run: | + cat > /tmp/devcontainer-override.json << 'EOF' + { + "image": "localhost/bootc-devenv-${{ matrix.os }}:latest", + "runArgs": [ + "--security-opt", "label=disable", + "--security-opt", "unmask=/proc/*", + "--device", "/dev/net/tun", + "--device", "/dev/kvm" + ], + "postCreateCommand": { + "devenv-init": "sudo /usr/local/bin/devenv-init.sh" + } + } + EOF + + - name: Start devcontainer + run: | + npx --yes @devcontainers/cli up \ + --workspace-folder . \ + --docker-path podman \ + --override-config /tmp/devcontainer-override.json \ + --remove-existing-container + + - name: Test nested podman in devcontainer + run: | + npx @devcontainers/cli exec \ + --workspace-folder . \ + --docker-path podman \ + /usr/libexec/devenv-selftest.sh diff --git a/Justfile b/Justfile index 79df07e..c3b12cb 100644 --- a/Justfile +++ b/Justfile @@ -12,3 +12,26 @@ devenv-build-c10s: # Build devenv image with local tag (defaults to Debian) devenv-build: devenv-build-debian + +# Test devcontainer with a locally built image +# Usage: just devcontainer-test +# Example: just devcontainer-test debian +devcontainer-test os: + #!/bin/bash + set -euo pipefail + cat > /tmp/devcontainer-override.json << 'EOF' + { + "image": "localhost/bootc-devenv-{{os}}:latest", + "runArgs": [ + "--security-opt", "label=disable", + "--security-opt", "unmask=/proc/*", + "--device", "/dev/net/tun", + "--device", "/dev/kvm" + ], + "postCreateCommand": { + "devenv-init": "sudo /usr/local/bin/devenv-init.sh" + } + } + EOF + npx --yes @devcontainers/cli up --workspace-folder . --docker-path podman --override-config /tmp/devcontainer-override.json --remove-existing-container + npx @devcontainers/cli exec --workspace-folder . --docker-path podman /usr/libexec/devenv-selftest.sh diff --git a/common/.devcontainer/devcontainer.json b/common/.devcontainer/devcontainer.json index 26e62a2..3916c02 100644 --- a/common/.devcontainer/devcontainer.json +++ b/common/.devcontainer/devcontainer.json @@ -13,9 +13,14 @@ }, "features": {}, "runArgs": [ - // Because we want to be able to run podman and also use e.g. /dev/kvm - // among other things - "--privileged" + // Minimal security options for nested podman (avoids --privileged): + // - label=disable: Required for mounting /proc in nested user namespace + // - unmask=/proc/*: Allows access to /proc paths needed for nested containers + "--security-opt", "label=disable", + "--security-opt", "unmask=/proc/*", + // Device access for nested containers and VMs + "--device", "/dev/net/tun", + "--device", "/dev/kvm" ], "postCreateCommand": { // Our init script diff --git a/common/.github/actions/bootc-ubuntu-setup/action.yml b/common/.github/actions/bootc-ubuntu-setup/action.yml index 63f5e00..4548775 100644 --- a/common/.github/actions/bootc-ubuntu-setup/action.yml +++ b/common/.github/actions/bootc-ubuntu-setup/action.yml @@ -14,7 +14,8 @@ runs: run: | set -xeuo pipefail sudo df -h - unwanted_pkgs=('^aspnetcore-.*' '^dotnet-.*' '^llvm-.*' 'php.*' '^mongodb-.*' '^mysql-.*' + # Use globs for package patterns (apt and dpkg both support fnmatch globs) + unwanted_pkgs=('aspnetcore-*' 'dotnet-*' 'llvm-*' 'php*' 'mongodb-*' 'mysql-*' azure-cli google-chrome-stable firefox mono-devel) unwanted_dirs=(/usr/share/dotnet /opt/ghc /usr/local/lib/android /opt/hostedtoolcache/CodeQL) # Start background removal operations as systemd units; if this causes @@ -30,9 +31,12 @@ runs: for x in ${unwanted_dirs[@]}; do runcleanup rm -rf "$x" done - # Apt removals in foreground, as we can't parallelize these + # Apt removals in foreground, as we can't parallelize these. + # Only attempt removal if matching packages are installed. for x in ${unwanted_pkgs[@]}; do - /bin/time -f '%E %C' sudo apt-get remove -y $x + if dpkg -l "$x" >/dev/null 2>&1; then + /bin/time -f '%E %C' sudo apt-get remove -y "$x" + fi done # We really want support for heredocs - name: Update podman and install just diff --git a/devenv/.dockerignore b/devenv/.dockerignore index c6004e9..28ec6c3 100644 --- a/devenv/.dockerignore +++ b/devenv/.dockerignore @@ -15,3 +15,5 @@ !fetch-tools.sh !install-rust.sh !install-kani.sh +!devenv-selftest.sh +!userns-setup diff --git a/devenv/Containerfile.c10s b/devenv/Containerfile.c10s index 0abc7db..8c8e22c 100644 --- a/devenv/Containerfile.c10s +++ b/devenv/Containerfile.c10s @@ -55,6 +55,10 @@ set -xeuo pipefail grep -hEve '^#' packages-common.txt packages-c10s.txt | /bin/time -f '%E %C' xargs dnf -y install grep -vEe '^#' build-deps-c10s.txt | /bin/time -f '%E %C' xargs dnf -y builddep dnf clean all +# Restore file capabilities for newuidmap/newgidmap - these are defined in the +# shadow-utils RPM but get stripped during container image builds. +# Required for nested rootless podman. +rpm --setcaps shadow-utils EORUN COPY npm.txt /run/src RUN grep -vEe '^#' npm.txt | /bin/time -f '%E %C' xargs npm i -g @@ -71,6 +75,13 @@ ENV RUSTUP_HOME=/usr/local/rustup ENV KANI_HOME=/usr/local/kani # Setup for codespaces COPY devenv-init.sh /usr/local/bin/ +COPY userns-setup /usr/lib/devenv/userns-setup +COPY devenv-selftest.sh /usr/libexec/ +# Set file capabilities for newuidmap/newgidmap (C10s shadow-utils doesn't set these by default, +# unlike Debian's uidmap package). Required for nested rootless podman. +RUN chmod 755 /usr/libexec/devenv-selftest.sh /usr/lib/devenv/userns-setup && \ + setcap cap_setuid+ep /usr/bin/newuidmap && \ + setcap cap_setgid+ep /usr/bin/newgidmap WORKDIR / # Create user before declaring volumes so home directory has correct ownership @@ -81,6 +92,8 @@ useradd -m devenv -s /bin/bash mkdir -p ~devenv/.local/share/containers chown -R -h devenv: ~devenv/.local echo 'devenv ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/devenv && chmod 0440 /etc/sudoers.d/devenv +# TODO: /etc/shadow permissions need fixing for PAM/sudo with --userns=keep-id +# See https://github.com/bootc-dev/infra/issues/XXX EORUN # To avoid overlay-on-overlay with nested containers VOLUME [ "/var/lib/containers", "/home/devenv/.local/share/containers/" ] diff --git a/devenv/Containerfile.debian b/devenv/Containerfile.debian index d82e725..4a1ea3b 100644 --- a/devenv/Containerfile.debian +++ b/devenv/Containerfile.debian @@ -71,6 +71,9 @@ ENV RUSTUP_HOME=/usr/local/rustup ENV KANI_HOME=/usr/local/kani # Setup for codespaces COPY devenv-init.sh /usr/local/bin/ +COPY userns-setup /usr/lib/devenv/userns-setup +COPY devenv-selftest.sh /usr/libexec/ +RUN chmod 755 /usr/libexec/devenv-selftest.sh /usr/lib/devenv/userns-setup WORKDIR / # Create user before declaring volumes so home directory has correct ownership diff --git a/devenv/devenv-init.sh b/devenv/devenv-init.sh index aba993b..c87ea2b 100755 --- a/devenv/devenv-init.sh +++ b/devenv/devenv-init.sh @@ -1,26 +1,3 @@ #!/bin/bash -set -euo pipefail -# Set things up so that podman can run nested inside the privileged -# docker container of a codespace or devpod. - -# Fix the propagation - only needed in some environments (e.g., codespaces) -# In devpod with rootless podman, / may already have shared propagation -# or we may not have permission to remount it. -propagation=$(findmnt -J -o TARGET,PROPAGATION / | jq -r '.filesystems[0].propagation // "unknown"') -if [ "$propagation" = "private" ]; then - if mount -o remount --make-shared / 2>/dev/null; then - echo "Set / to shared propagation" - else - echo "Warning: Could not set / to shared propagation (may not be needed)" - fi -fi - -# This is actually safe to expose to all users really, like Fedora derivatives do -if [ -e /dev/kvm ]; then - chmod a+rw /dev/kvm 2>/dev/null || true -fi - -# Handle nested cgroups - update containers.conf if it exists and has the settings commented out -if [ -f /usr/share/containers/containers.conf ]; then - sed -i -e 's,^#cgroups =.*,cgroups = "no-conmon",' -e 's,^#cgroup_manager =.*,cgroup_manager = "cgroupfs",' /usr/share/containers/containers.conf -fi +# Thin wrapper that calls the Python implementation +exec python3 /usr/lib/devenv/userns-setup "$@" diff --git a/devenv/devenv-selftest.sh b/devenv/devenv-selftest.sh new file mode 100755 index 0000000..10106bd --- /dev/null +++ b/devenv/devenv-selftest.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Test that nested podman and VMs work correctly in this devcontainer. +# This script is designed to be run inside the container after devenv-init.sh +# has already been executed (e.g., via postCreateCommand). +set -euo pipefail + +echo "=== Testing nested podman and VMs ===" + +echo "Podman version:" +podman --version + +echo "Podman info (rootless):" +podman info --format '{{.Host.Security.Rootless}}' + +# Use CentOS Stream 10 as the test image for both container and VM +image="quay.io/centos-bootc/centos-bootc:stream10" + +echo "Pulling $image..." +podman pull "$image" + +echo "Running nested container..." +podman run --rm "$image" echo "Hello from nested podman!" + +echo "=== Nested container test passed ===" + +# Test bcvk (VM) if available and /dev/kvm exists +if command -v bcvk >/dev/null 2>&1 && [ -e /dev/kvm ]; then + echo "" + echo "=== Testing bcvk VM ===" + echo "bcvk version:" + bcvk --version + + echo "Running bcvk ephemeral VM with SSH..." + bcvk ephemeral run-ssh "$image" -- echo "Hello from bcvk VM!" + + echo "=== bcvk VM test passed ===" +else + echo "" + echo "=== Skipping bcvk VM test (bcvk not available or /dev/kvm missing) ===" +fi + +echo "" +echo "=== All tests passed ===" diff --git a/devenv/userns-setup b/devenv/userns-setup new file mode 100644 index 0000000..2858c05 --- /dev/null +++ b/devenv/userns-setup @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +Set up nested podman inside privileged docker/podman containers (codespaces, devpod). + +This handles: +- Mount propagation fixes +- /dev/kvm permissions +- subuid/subgid configuration for constrained UID namespaces +- containers.conf configuration for nested operation +""" + +import argparse +import json +import os +import shutil +import subprocess +import sys +from pathlib import Path + + +def run_cmd(cmd: list[str], check: bool = True, capture: bool = False) -> subprocess.CompletedProcess: + """Run a command, optionally capturing output.""" + return subprocess.run(cmd, check=check, capture_output=capture, text=True) + + +def get_mount_propagation(target: str) -> str: + """Get mount propagation type for a given mount point.""" + result = run_cmd(["findmnt", "-J", "-o", "TARGET,PROPAGATION", target], capture=True, check=False) + if result.returncode != 0: + return "unknown" + try: + data = json.loads(result.stdout) + return data.get("filesystems", [{}])[0].get("propagation", "unknown") + except (json.JSONDecodeError, IndexError, KeyError): + return "unknown" + + +def fix_mount_propagation() -> None: + """Fix root mount propagation if needed (e.g., in codespaces).""" + propagation = get_mount_propagation("/") + if propagation == "private": + result = run_cmd(["mount", "-o", "remount", "--make-shared", "/"], check=False) + if result.returncode == 0: + print("Set / to shared propagation") + else: + print("Warning: Could not set / to shared propagation (may not be needed)") + + +def fix_kvm_permissions() -> None: + """Make /dev/kvm accessible to all users (safe, like Fedora derivatives do).""" + kvm = Path("/dev/kvm") + if kvm.exists(): + try: + kvm.chmod(0o666) + except PermissionError: + pass + + +def detect_constrained_namespace() -> tuple[bool, int]: + """ + Detect whether we're in a constrained UID namespace. + + Returns: + (is_constrained, max_uid): True if constrained (1000-100000 UIDs available), + along with the maximum usable UID. + """ + max_uid = 0 + try: + with open("/proc/self/uid_map") as f: + for line in f: + parts = line.split() + if len(parts) >= 3: + inside = int(parts[0]) + count = int(parts[2]) + end = inside + count + if end > max_uid: + max_uid = end + except (OSError, ValueError): + return False, 0 + + # Constrained if between 1000 and 100000 UIDs + is_constrained = 1000 < max_uid < 100000 + return is_constrained, max_uid + + +def configure_subuid_subgid(target_user: str | None = None) -> None: + """ + Configure subuid/subgid for nested rootless podman in constrained UID namespaces. + + Args: + target_user: Username to configure. Defaults to SUDO_USER or current user. + """ + # Only proceed if podman is available + if not shutil.which("podman"): + return + + # Check for newuidmap/newgidmap + if not shutil.which("newuidmap"): + print("Warning: newuidmap not found, nested podman may fail") + + is_constrained, max_uid = detect_constrained_namespace() + if not is_constrained: + print(f"Full UID namespace available (max={max_uid}), using default podman config") + return + + # Determine target user + if target_user is None: + target_user = os.environ.get("SUDO_USER") + if target_user is None: + import pwd + target_user = pwd.getpwuid(os.getuid()).pw_name + + # Get target user's UID + import pwd + try: + target_uid = pwd.getpwnam(target_user).pw_uid + except KeyError: + print(f"Warning: User {target_user} not found") + return + + # Calculate subuid range + subuid_start = target_uid + 1 + subuid_count = max_uid - subuid_start + + if subuid_count < 1000: + print(f"Insufficient UID range for nested podman (only {subuid_count} UIDs available)") + return + + expected = f"{target_user}:{subuid_start}:{subuid_count}" + + # Check if already configured correctly + subuid_path = Path("/etc/subuid") + if subuid_path.exists(): + current = None + for line in subuid_path.read_text().splitlines(): + if line.startswith(f"{target_user}:"): + current = line + break + if current == expected: + print(f"Nested podman subuid/subgid already configured for {target_user}") + return + + print(f"Configuring nested podman for {target_user} (subuid {subuid_start}:{subuid_count})") + + # Configure subuid/subgid + for path in [Path("/etc/subuid"), Path("/etc/subgid")]: + lines = [] + if path.exists(): + lines = [line for line in path.read_text().splitlines() + if not line.startswith(f"{target_user}:")] + lines.append(expected) + path.write_text("\n".join(lines) + "\n") + + # Reset podman storage if it exists (may have wrong UID mappings) + import pwd + user_home = Path(pwd.getpwnam(target_user).pw_dir) + storage_dir = user_home / ".local/share/containers/storage" + if storage_dir.exists(): + print("Resetting podman storage for new UID mappings") + shutil.rmtree(storage_dir) + + print("Nested podman subuid/subgid configured successfully") + + +def configure_containers_conf() -> None: + """Configure containers.conf for nested container operation.""" + if not shutil.which("podman"): + return + + is_constrained, _ = detect_constrained_namespace() + + if not is_constrained: + # Full namespace - just update the shipped config + conf_path = Path("/usr/share/containers/containers.conf") + if conf_path.exists(): + content = conf_path.read_text() + content = content.replace("#cgroups =", 'cgroups = "no-conmon" #') + content = content.replace("#cgroup_manager =", 'cgroup_manager = "cgroupfs" #') + conf_path.write_text(content) + else: + # Constrained namespace - create full config for nested operation + conf_dir = Path("/etc/containers") + conf_dir.mkdir(parents=True, exist_ok=True) + conf_path = conf_dir / "containers.conf" + conf_path.write_text("""\ +# Generated for nested container support in constrained UID namespace +[containers] +cgroups = "disabled" +utsns = "host" + +[engine] +cgroup_manager = "cgroupfs" +""") + print("Configured containers.conf for constrained UID namespace") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Configure nested podman for devcontainers" + ) + parser.add_argument( + "user", + nargs="?", + help="Target user for subuid/subgid configuration (default: SUDO_USER or current user)", + ) + args = parser.parse_args() + + fix_mount_propagation() + fix_kvm_permissions() + configure_subuid_subgid(args.user) + configure_containers_conf() + + return 0 + + +if __name__ == "__main__": + sys.exit(main())