From 68f5dc3020785494ec46dda3a625b9e32cabae6e Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 6 Feb 2026 17:34:51 +0100 Subject: [PATCH 1/6] targets/linux/deb/distro: add minimal images target for DEB distros This commit adds a new target for building minimal images for DEB-based distros. In new implementation, base image for DEB-based distros is not required, although still supported. New implementation in a nutshell downloads all required DEB packages into a volume using work image, extracts them into a target image then runs dpkg --install to run post-install scripts in the proper environment. It also moves container-related tests to a separate function so they can be executed to test both implementations while we keep the old, experimental implementation around. Closes #448 Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 172 ++- targets/linux/deb/distro/container_test.go | 242 +++- targets/linux/deb/distro/distro.go | 22 +- test/linux_target_test.go | 1184 ++++++++++---------- test/target_ubuntu_test.go | 11 +- website/docs/examples/targets.md | 7 + 6 files changed, 1028 insertions(+), 610 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index 381afcf86..399f62462 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -24,6 +24,10 @@ func (c *Config) BuildContainer(ctx context.Context, client gwclient.Client, sOp Opts: opts, } + if c.DefaultOutputImage == "" { + return bootstrapContainer(ctx, input) + } + baseImg := baseImageFromSpec(llb.Image(c.DefaultOutputImage, llb.WithMetaResolver(sOpt.Resolver), dalec.WithConstraints(opts...)), input) if len(c.BasePackages) > 0 { @@ -95,26 +99,26 @@ type buildContainerInput struct { Opts []llb.ConstraintsOpt } -func extraRepos(input buildContainerInput) llb.RunOption { +func extraRepos(input buildContainerInput, opts ...llb.ConstraintsOpt) llb.RunOption { // Those base repos come from distro configuration. repos := dalec.GetExtraRepos(input.Config.ExtraRepos, "install") // These are user specified via spec. repos = append(repos, input.Spec.GetInstallRepos(input.Target)...) - return input.Config.RepoMounts(repos, input.SOpt, input.Opts...) + return input.Config.RepoMounts(repos, input.SOpt, opts...) } func installPackagesInContainer(input buildContainerInput, ro []llb.RunOption) llb.StateOption { return func(baseImg llb.State) llb.State { - opts := append(input.Opts, dalec.ProgressGroup("Install spec package")) + opts := append(input.Opts, dalec.ProgressGroup("Install DEB Packages")) debug := llb.Scratch().File(llb.Mkfile("debug", 0o644, []byte(`debug=2`)), opts...) return baseImg.Run( append(ro, dalec.WithConstraints(opts...), - extraRepos(input), + extraRepos(input, opts...), // This file makes dpkg give more verbose output which can be useful when things go awry. llb.AddMount("/etc/dpkg/dpkg.cfg.d/99-dalec-debug", debug, llb.SourcePath("debug"), llb.Readonly), dalec.RunOptFunc(func(cfg *llb.ExecInfo) { @@ -136,3 +140,163 @@ func installPackagesInContainer(input buildContainerInput, ro []llb.RunOption) l With(dalec.InstallPostSymlinks(input.Spec.GetImagePost(input.Target), input.Worker, opts...)) } } + +func bootstrapContainer(ctx context.Context, input buildContainerInput) llb.State { + opts := input.Opts + + baseImgOpts := append(opts, dalec.ProgressGroup("Bootstrap Base Image")) + + baseImg := llb.Scratch().File(llb.Mkdir("/etc", 0o755), baseImgOpts...). + File(llb.Mkdir("/etc/apt", 0o755), baseImgOpts...). + File(llb.Mkdir("/etc/apt/apt.conf.d", 0o755), baseImgOpts...). + File(llb.Mkdir("/etc/apt/preferences.d", 0o755), baseImgOpts...). + File(llb.Mkdir("/etc/apt/sources.list.d", 0o755), baseImgOpts...). + File(llb.Mkdir("/var", 0o755), baseImgOpts...). + File(llb.Mkdir("/var/cache", 0o755), baseImgOpts...). + File(llb.Mkdir("/var/cache/apt", 0o755), baseImgOpts...). + File(llb.Mkdir("/var/cache/apt/archives", 0o755), baseImgOpts...). + File(llb.Mkdir("/var/lib", 0o755), baseImgOpts...). + File(llb.Mkdir("/var/lib/dpkg", 0o755), baseImgOpts...). + File(llb.Mkfile("/var/lib/dpkg/status", 0o644, []byte{}), baseImgOpts...) + + installScript := `#!/bin/sh +set -exu + +rootfs=/tmp/rootfs +apt_archives=/var/cache/apt/archives + +# Make sure any cached data from local repos is purged since this should not +# be shared between builds. +rm -f /var/lib/apt/lists/_* +# autoclean removes cached deb files which are no longer available in any configured repository. +apt autoclean -y + +# Remove any previously failed attempts to get repo data +rm -rf /var/lib/apt/lists/partial/* + +# Ensure package index is up to date, required when cache is empty. +apt update + +# Select essential packages, since those will be used as a base for the image. +# +# We can't use ?essential since some distros we support have too old apt which does not support patterns. +essential_packages=$(dpkg-query -Wf '${Package} ${Essential}\n' | awk '$2 == "yes" {print $1}') + +local_package_files=$(ls /base-packages/*.deb /spec-packages/*.deb) + +# Get names of local packages so we can exclude them from apt-get install. +local_package_names=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Package 2>/dev/null; done | sort -u) + +# Extract dependencies of local packages, since we need to download those as well. +# +# Spec packages may depend on base packages, so we need to filter to only download remaining packages, since downloading local packages +# would fail. +dependencies_to_download=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Depends 2>/dev/null; done | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g' | grep -v '^$' | sort -u | grep -vxF "${local_package_names}") + +# Get the exact filenames apt needs by using --print-uris with an empty cache dir. +# This forces apt to report ALL needed packages (not just uncached ones), giving +# us exact filenames including correct version and architecture suffixes. +# --print-uris output format: 'URL' filename size hash +# We extract the second field (the filename). +needed_filenames=$(apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ + -o Dir::Cache::Archives=/tmp \ + --yes --print-uris install ${essential_packages} ${dependencies_to_download} \ + | grep '\.deb ' | awk '{print $2}') + +mkdir -p "${rootfs}${apt_archives}"/partial +cp ${local_package_files} "${rootfs}${apt_archives}"/ + +# Copy already-cached needed .deb files from the persistent apt cache into the +# rootfs cache. This avoids picking up stale .deb files from previous unrelated +# builds that remain in the persistent cache. +for filename in ${needed_filenames}; do + if [ -f "${apt_archives}/${filename}" ]; then + cp "${apt_archives}/${filename}" "${rootfs}${apt_archives}"/ + fi +done + +# Download remaining needed packages directly into the rootfs cache. +# apt skips packages already present, so only missing ones are fetched. +apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ + -o Dir::Cache::Archives="${rootfs}${apt_archives}" \ + --yes --download-only install ${essential_packages} ${dependencies_to_download} + +deb_files=$(ls "${rootfs}${apt_archives}"/*.deb) + +# Extract all packages into the target rootfs. +# +# Extract base-files first to establish merged-usr symlinks (/bin -> usr/bin, etc.) +# before other packages create those paths as real directories, which would +# cause tar to fail when base-files tries to create the symlinks later. +base_files_package=$(echo "${deb_files}" | tr ' ' '\n' | grep '/base-files_' || true) +for f in ${base_files_package} $(echo "${deb_files}" | tr ' ' '\n' | grep -v '/base-files_'); do + dpkg-deb --extract "${f}" "${rootfs}" +done + +# Fix merged-usr: on Noble+, /bin, /sbin, /lib should be symlinks to usr/bin, usr/sbin, usr/lib +# but dpkg-deb --extract may recreate them as real directories. +# +# This is required so we can actually run shell using target image to re-install packages for running post-install scripts. +for dir in bin sbin lib; do + if [ -d "${rootfs}/usr/${dir}" ] && [ -d "${rootfs}/${dir}" ] && [ ! -L "${rootfs}/${dir}" ]; then + cp -a "${rootfs}/${dir}"/* "${rootfs}/usr/${dir}/" 2>/dev/null || true + rm -rf "${rootfs}/${dir}" + ln -s "usr/${dir}" "${rootfs}/${dir}" + fi +done + +# dpkg-deb --extract doesn't run postinst scripts, so the /bin/sh symlink +# normally created by update-alternatives is missing. Create it manually. +if [ ! -e "${rootfs}/usr/bin/sh" ] && [ ! -e "${rootfs}/bin/sh" ]; then + ln -s dash "${rootfs}/usr/bin/sh" +fi + +# Remove usrmerge package - our merged-usr fixup above already handles this, +# and usrmerge's postinst fails on overlayfs (which BuildKit uses). +# Create a fake dpkg status entry so dpkg thinks it's installed. +# +# This only runs when usrmerge package is not installed in the base image, since only then the deb file will be downloaded. +for f in $(echo "${deb_files}" | tr ' ' '\n' | grep -E '/(usrmerge|usr-is-merged)_' || true); do + pkg=$(dpkg-deb -f "${f}" Package) + ver=$(dpkg-deb -f "${f}" Version) + arch=$(dpkg-deb -f "${f}" Architecture) + printf 'Package: %s\nStatus: install ok installed\nVersion: %s\nArchitecture: %s\nDescription: faked by dalec\n\n' "${pkg}" "${ver}" "${arch}" >> "${rootfs}/var/lib/dpkg/status" + + # Remove the deb file so it won't be re-installed. + rm "${f}" +done +` + + opts = append(opts, dalec.ProgressGroup("Fetch DEB Packages")) + + script := llb.Scratch().File(llb.Mkfile("install.sh", 0o755, []byte(installScript)), opts...) + + // Use worker to download all packages + deps and install into baseImg. + baseImg = input.Worker.Run( + dalec.WithConstraints(opts...), + llb.AddMount("/tmp/install.sh", script, llb.SourcePath("install.sh")), + llb.AddMount("/base-packages", basePackages(ctx, input), llb.Readonly), + llb.AddMount("/spec-packages", input.SpecPackages, llb.Readonly), + extraRepos(input, opts...), + dalec.WithMountedAptCache(input.Config.AptCachePrefix, opts...), + llb.AddEnv("DEBIAN_FRONTEND", "noninteractive"), + dalec.ShArgs("/tmp/install.sh"), + frontend.IgnoreCache(input.Client, targets.IgnoreCacheKeyContainer), + ).AddMount("/tmp/rootfs", baseImageFromSpec(baseImg, input)) + + result := baseImg.With(installPackagesInContainer(input, []llb.RunOption{ + dalec.ProgressGroup("Install DEB Packages"), + llb.AddEnv("DEBIAN_FRONTEND", "noninteractive"), + llb.Args([]string{"/usr/bin/sh", "-c", "dpkg --install --force-depends /var/cache/apt/archives/*.deb && rm -rf /var/cache/apt/archives/*.deb"}), + })) + + // Squash all layers into one by copying the final filesystem into a fresh + // scratch state. Without this, files extracted in the bootstrap layer but + // removed during cleanup still occupy space in the earlier layer. + squashOpts := append(opts, dalec.ProgressGroup("Squash container layers")) + return llb.Scratch().File(llb.Copy(result, "/", "/", &llb.CopyInfo{ + CopyDirContentsOnly: true, + CreateDestPath: true, + AllowWildcard: true, + }), squashOpts...) +} diff --git a/targets/linux/deb/distro/container_test.go b/targets/linux/deb/distro/container_test.go index 27a593175..f241b134d 100644 --- a/targets/linux/deb/distro/container_test.go +++ b/targets/linux/deb/distro/container_test.go @@ -24,6 +24,7 @@ func Test_Building_container(t *testing.T) { t.Parallel() c := &Config{ + ImageRef: "foo", DefaultOutputImage: "foo", } @@ -49,24 +50,37 @@ func Test_Building_container(t *testing.T) { ctx := t.Context() - state := c.BuildContainer(ctx, client, dalec.SourceOpts{}, spec, "target", llb.State{}) + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + + state := c.BuildContainer(ctx, client, sopt, spec, "target", llb.State{}) ops, err := test.LLBOpsFromState(ctx, state) if err != nil { t.Fatalf("failed to get llb ops from state: %v", err) } - if len(ops) == 0 { - t.Fatalf("expected at least one llb op, got none") - } + specPackageImageSourceFound := false - s := ops[0].Op.GetSource() - if s == nil { - t.Fatalf("expected source op, got nil") + for _, op := range ops { + s := op.Op.GetSource() + + if s == nil || op.OpMetadata.ProgressGroup.Name != "Build Container Image" { + continue + } + + specPackageImageSourceFound = true + + if !strings.Contains(s.Identifier, expectedRef) { + t.Fatalf("expected source identifier to contain %q, got %q", expectedRef, s.Identifier) + } } - if !strings.Contains(s.Identifier, expectedRef) { - t.Fatalf("expected source identifier to contain %q, got %q", expectedRef, s.Identifier) + if !specPackageImageSourceFound { + t.Fatalf("Expected to find spec package source in llb ops") } }) @@ -78,6 +92,7 @@ func Test_Building_container(t *testing.T) { expectedRef := "foo" c := &Config{ + ImageRef: "foo", DefaultOutputImage: expectedRef, } @@ -89,7 +104,13 @@ func Test_Building_container(t *testing.T) { ctx := t.Context() - state := c.BuildContainer(ctx, client, dalec.SourceOpts{}, spec, "target", llb.State{}) + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + + state := c.BuildContainer(ctx, client, sopt, spec, "target", llb.State{}) ops, err := test.LLBOpsFromState(ctx, state) if err != nil { @@ -110,6 +131,189 @@ func Test_Building_container(t *testing.T) { } }) + t.Run("when_bootstrapping_an_image", func(t *testing.T) { + t.Parallel() + + // Bootstrap path is taken when DefaultOutputImage is not set. + // This is not user-configurable, hence it cannot be tested via integration tests. + t.Run("creates_base_directory_structure", func(t *testing.T) { + t.Parallel() + + c := &Config{ + ImageRef: "foo", + // DefaultOutputImage is intentionally empty to trigger bootstrap path. + } + + ctx := t.Context() + + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + + ops, err := test.LLBOpsFromState(ctx, c.BuildContainer(ctx, &testClient{}, sopt, &dalec.Spec{}, "target", llb.Scratch())) + if err != nil { + t.Fatalf("failed to get llb ops from state: %v", err) + } + + for _, op := range ops { + if op.OpMetadata.ProgressGroup != nil && op.OpMetadata.ProgressGroup.Name == "Bootstrap Base Image" { + return + } + } + + t.Fatalf("Expected bootstrap directory structure when DefaultOutputImage is not set") + }) + + t.Run("downloads_dependencies", func(t *testing.T) { + t.Parallel() + + t.Run("with_extra_distro_config_repos_mounted", func(t *testing.T) { + t.Parallel() + + extraInstallRepo := "extra-install-repo" + + c := &Config{ + ImageRef: "foo", + ExtraRepos: []dalec.PackageRepositoryConfig{ + { + Envs: []string{"install"}, + Config: map[string]dalec.Source{ + extraInstallRepo: { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: extraInstallRepo, + }, + }, + }, + }, + }, + { + Envs: []string{"build"}, + Config: map[string]dalec.Source{ + extraInstallRepo: { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "unexpected-repo", + }, + }, + }, + }, + }, + }, + } + + ctx := t.Context() + + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + + ops, err := test.LLBOpsFromState(ctx, c.BuildContainer(ctx, &testClient{}, sopt, &dalec.Spec{}, "target", llb.Scratch())) + if err != nil { + t.Fatalf("failed to get llb ops from state: %v", err) + } + + expectedMountPath := "/etc/apt/sources.list.d/" + extraInstallRepo + ".list" + + for _, op := range ops { + e := op.Op.GetExec() + if e == nil { + continue + } + + // Find the bootstrap download exec by its unique install script mount. + isBootstrapExec := false + for _, mount := range e.Mounts { + if mount.Dest == "/tmp/install.sh" { + isBootstrapExec = true + break + } + } + + if !isBootstrapExec { + continue + } + + for _, mount := range e.Mounts { + if mount.Dest == expectedMountPath { + return + } + } + + t.Fatalf("Bootstrap download exec does not have extra repo mount at %q", expectedMountPath) + } + + t.Fatalf("No bootstrap download exec found") + }) + + t.Run("with_mounted_apt_cache", func(t *testing.T) { + t.Parallel() + + aptCachePrefix := "apt-cache-prefix" + + c := &Config{ + ImageRef: "foo", + AptCachePrefix: aptCachePrefix, + } + + ctx := t.Context() + + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + + ops, err := test.LLBOpsFromState(ctx, c.BuildContainer(ctx, &testClient{}, sopt, &dalec.Spec{}, "target", llb.Scratch())) + if err != nil { + t.Fatalf("failed to get llb ops from state: %v", err) + } + + for _, op := range ops { + e := op.Op.GetExec() + if e == nil { + continue + } + + // Find the bootstrap download exec by its unique install script mount. + isBootstrapExec := false + for _, mount := range e.Mounts { + if mount.Dest == "/tmp/install.sh" { + isBootstrapExec = true + break + } + } + + if !isBootstrapExec { + continue + } + + for _, mount := range e.Mounts { + if mount.Dest == "/var/cache/apt" { + if mount.CacheOpt == nil { + t.Fatalf("Expected cache mount to have cache options, got none") + } + + if !strings.HasPrefix(mount.CacheOpt.ID, aptCachePrefix) { + t.Fatalf("Expected cache mount ID to have prefix %q, got %q", aptCachePrefix, mount.CacheOpt.ID) + } + + return + } + } + + t.Fatalf("Apt cache mount not found on bootstrap download exec") + } + + t.Fatalf("No bootstrap download exec found") + }) + }) + }) + t.Run("installs_spec_package", func(t *testing.T) { t.Parallel() @@ -119,6 +323,7 @@ func Test_Building_container(t *testing.T) { extraInstallRepo := "extra-install-repo" c := &Config{ + ImageRef: "foo", DefaultOutputImage: "foo", ExtraRepos: []dalec.PackageRepositoryConfig{ { @@ -150,7 +355,12 @@ func Test_Building_container(t *testing.T) { ctx := t.Context() - ops, err := test.LLBOpsFromState(ctx, c.BuildContainer(ctx, &testClient{}, dalec.SourceOpts{}, &dalec.Spec{}, "target", llb.State{})) + sopt := dalec.SourceOpts{ + GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { + return nil, nil + }, + } + ops, err := test.LLBOpsFromState(ctx, c.BuildContainer(ctx, &testClient{}, sopt, &dalec.Spec{}, "target", llb.State{})) if err != nil { t.Fatalf("failed to get llb ops from state: %v", err) } @@ -190,6 +400,7 @@ func Test_Building_container(t *testing.T) { aptCachePrefix := "apt-cache-prefix" c := &Config{ + ImageRef: "foo", DefaultOutputImage: "foo", VersionID: "bar", ContextRef: "distro-context-ref", @@ -200,9 +411,7 @@ func Test_Building_container(t *testing.T) { sopt := dalec.SourceOpts{ GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { - s := llb.Scratch() - - return &s, nil + return nil, nil }, } @@ -311,6 +520,7 @@ func Test_Building_container(t *testing.T) { t.Parallel() c := &Config{ + ImageRef: "foo", DefaultOutputImage: "foo", BasePackages: []string{"base-package-1"}, VersionID: "bar", @@ -321,9 +531,7 @@ func Test_Building_container(t *testing.T) { sopt := dalec.SourceOpts{ GetContext: func(string, ...llb.LocalOption) (*llb.State, error) { - s := llb.Scratch() - - return &s, nil + return nil, nil }, } diff --git a/targets/linux/deb/distro/distro.go b/targets/linux/deb/distro/distro.go index 33511d881..ac0927bcc 100644 --- a/targets/linux/deb/distro/distro.go +++ b/targets/linux/deb/distro/distro.go @@ -15,13 +15,11 @@ import ( "github.com/project-dalec/dalec/targets/linux" ) -var ( - defaultRepoConfig = &dalec.RepoPlatformConfig{ - ConfigRoot: "/etc/apt/sources.list.d", - GPGKeyRoot: "/usr/share/keyrings", - ConfigExt: ".list", - } -) +var defaultRepoConfig = &dalec.RepoPlatformConfig{ + ConfigRoot: "/etc/apt/sources.list.d", + GPGKeyRoot: "/usr/share/keyrings", + ConfigExt: ".list", +} type Config struct { ImageRef string @@ -112,6 +110,16 @@ func (cfg *Config) Handle(ctx context.Context, client gwclient.Client) (*gwclien Description: "Builds a container image for testing purposes only.", }) + mux.Add("container", func(ctx context.Context, client gwclient.Client) (*gwclient.Result, error) { + cfg := *cfg + cfg.DefaultOutputImage = "" + + return linux.HandleContainer(&cfg)(ctx, client) + }, &targets.Target{ + Name: "container", + Description: "Builds a container image.", + }) + mux.Add("dsc", cfg.HandleSourcePkg, &targets.Target{ Name: "dsc", Description: "Builds a Debian source package.", diff --git a/test/linux_target_test.go b/test/linux_target_test.go index 2c70792c9..f9b7b2779 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -12,6 +12,7 @@ import ( "io/fs" "os" "path/filepath" + "slices" "strings" "testing" "time" @@ -63,6 +64,8 @@ type targetConfig struct { Container string // DepsOnly is the target for creating a deps-only container (no package built, only runtime deps installed). DepsOnly string + // MinimalContainer is the target for creating a minimal container. + MinimalContainer string // Worker is the target for creating the worker image. Worker string // Sysext is the target for creating a systemd system extension. @@ -719,93 +722,28 @@ index 0000000..5260cb1 t.Run("container", func(t *testing.T) { t.Parallel() + testContainerTarget(ctx, t, testConfig, testConfig.Target.Container) - t.Run("depsonly", func(t *testing.T) { - if testConfig.Target.DepsOnly == "" { - t.Skip("depsonly target not defined") - } - - t.Parallel() - ctx := startTestSpan(ctx, t) - testDepsOnly(ctx, t, testConfig) - }) - - t.Run("creates_post_install_symlinks", func(t *testing.T) { + t.Run("allows_upgrades", func(t *testing.T) { t.Parallel() - ctx := startTestSpan(baseCtx, t) + ctx := startTestSpan(ctx, t) spec := testLinuxSpec(t, dalec.Spec{ - Sources: map[string]dalec.Source{ - "src1": { - Inline: &dalec.SourceInline{ - File: &dalec.SourceInlineFile{ - Contents: "#!/usr/bin/env bash\necho hello world", - Permissions: 0o700, - }, - }, - }, - "src3": { - Inline: &dalec.SourceInline{ - File: &dalec.SourceInlineFile{ - Contents: "#!/usr/bin/env bash\necho goodbye", - Permissions: 0o700, - }, - }, - }, - }, - Artifacts: dalec.Artifacts{ - Binaries: map[string]dalec.ArtifactConfig{ - "src1": {}, - "src3": {}, - }, - Users: []dalec.AddUserConfig{ - { - Name: "need", - }, - }, - Groups: []dalec.AddGroupConfig{ + Build: dalec.ArtifactBuild{ + Steps: []dalec.BuildStep{ { - Name: "coffee", - }, - }, - }, - Image: &dalec.ImageConfig{ - Post: &dalec.PostInstall{ - Symlinks: map[string]dalec.SymlinkTarget{ - "/usr/bin/src1": { - Path: "/src1", - User: "need", - }, - "/usr/bin/src3": { - Paths: []string{"/non/existing/dir/src3", "/non/existing/dir2/src3"}, - User: "need", - Group: "coffee", - }, - }, - }, - }, - Tests: []*dalec.TestSpec{ - { - Name: "Post-install symlinks should be created and have correct ownership", - Files: map[string]dalec.FileCheckOutput{ - "/src1": {}, - "/non/existing/dir/src3": {}, - }, - Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /src1'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /src1)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /src1); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/src1", Stdout: dalec.CheckOutput{Equals: "hello world\n"}, Stderr: dalec.CheckOutput{Empty: true}}, + Command: ` +# This is not a debian build, skip this. +[ ! -d debian ] && exit 0; - {Command: "/bin/bash -exc 'test -L /non/existing/dir/src3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir/src3)\" = \"/usr/bin/src3\"'"}, - {Command: "/bin/bash -exc 'test -L /non/existing/dir2/src3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir2/src3)\" = \"/usr/bin/src3\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir2/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/non/existing/dir/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, - {Command: "/non/existing/dir2/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, +# Inject a custom postinst script to inspect the install environment +[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -x debian/postinst ] || chmod +x debian/postinst +cat >> debian/postinst << 'EOF' +if [ "${DALEC_UPGRADE}" != "true" ]; then echo "Expected DALEC_UPGRADE to be \"true\", got \"${DALEC_UPGRADE}\""; exit 1; fi +EOF +`, }, }, }, @@ -816,505 +754,82 @@ index 0000000..5260cb1 withSpec(ctx, t, &spec), withBuildTarget(testConfig.Target.Container), ) - solveT(ctx, t, gwc, sr) - }) - }) - - t.Run("contains_etc_os_release_file", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{ - Tests: []*dalec.TestSpec{ - { - Name: "Check /etc/os-release", - Files: map[string]dalec.FileCheckOutput{ - "/etc/os-release": { - CheckOutput: dalec.CheckOutput{ - Matches: []string{ - // Some distros have quotes around the values - // Regex is to match the values with or without quotes - // "(?m)" enables multi-line mode so that ^ and $ match the start and end of lines rather than the full document. - // - // Due to these values getting processed for build args, quotes are stripped unless they are escaped. - `(?m)^ID=(\")?` + testConfig.Release.ID + `(\")?`, - `(?m)^VERSION_ID=(\")?` + testConfig.Release.VersionID + `(\")?`, - }, - }, - }, - }, - }, - }, - }) - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) solveT(ctx, t, gwc, sr) }) }) + }) - t.Run("runs_tests", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - // Make sure the test framework was actually executed by the build target. - // This appends a test case so that is expected to fail and as such cause the build to fail. - spec := testLinuxSpec(t, dalec.Spec{ - Tests: []*dalec.TestSpec{ - { - Name: "Test framework should be executed", - Steps: []dalec.TestStep{ - {Command: "/bin/sh -c 'echo this command should fail; exit 42'"}, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - sr.Evaluate = true - - _, err := gwc.Solve(ctx, sr) - if err == nil { - t.Fatal("Expected test spec to run with error but got none") - } - }) - }) - - t.Run("has_image_config_available_with_build_time", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{}) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - sr.Evaluate = true - - beforeBuild := time.Now() - res := solveT(ctx, t, gwc, sr) - - dt, ok := res.Metadata[exptypes.ExporterImageConfigKey] - assert.Assert(t, ok, "result metadata should contain an image config: available metadata: %s", strings.Join(maps.Keys(res.Metadata), ", ")) - - var cfg dalec.DockerImageSpec - assert.Assert(t, json.Unmarshal(dt, &cfg)) - assert.Check(t, cfg.Created.After(beforeBuild)) - assert.Check(t, cfg.Created.Before(time.Now())) - }) - }) - - t.Run("respects_container_cache_key", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{}) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - withIgnoreCache(targets.IgnoreCacheKeyContainer), - ) - - res := solveT(ctx, t, gwc, sr) - - ops, err := test.LLBOpsFromState(ctx, resultToState(t, res)) - if err != nil { - t.Fatalf("Unexpected error extracting LLB OPs from state: %v", err) - } - - cacheIgnored := []test.LLBOp{} - execFound := false - - for _, op := range ops { - if op.OpMetadata.IgnoreCache { - cacheIgnored = append(cacheIgnored, op) - } - - e := op.Op.GetExec() - pg := op.OpMetadata.ProgressGroup.Name - if e == nil || (pg != "Install spec package" && pg != "Install RPMs") { - continue - } - - execFound = true - - if !op.OpMetadata.IgnoreCache { - t.Errorf("Expected install step to have cache ignore enabled") - } - } - - if !execFound { - t.Errorf("No exec ops found in the build") - } - - if len(cacheIgnored) > 1 { - ops, err := test.LLBOpsToJSON(cacheIgnored) - if err != nil { - t.Errorf("Error converting ops to JSON: %v", err) - } - - t.Errorf("Expected only one operation to have cache ignore enabled, found %d: \n%s", len(cacheIgnored), ops) - } - }) - }) - - t.Run("respects_ignoring_all_caches", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{}) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - withIgnoreCache(), - ) - - res := solveT(ctx, t, gwc, sr) - - ops, err := test.LLBOpsFromState(ctx, resultToState(t, res)) - if err != nil { - t.Fatalf("Unexpected error extracting LLB OPs from state: %v", err) - } + t.Run("minimal_container", func(t *testing.T) { + skip.If(t, testConfig.Target.MinimalContainer == "", "skipping test as it is not supported for this config") + t.Parallel() + testContainerTarget(ctx, t, testConfig, testConfig.Target.MinimalContainer) + }) - badOps := []test.LLBOp{} + t.Run("depsonly", func(t *testing.T) { + if testConfig.Target.DepsOnly == "" { + t.Skip("depsonly target not defined") + } - for _, op := range ops { - if op.OpMetadata.IgnoreCache { - continue - } + t.Parallel() + ctx := startTestSpan(ctx, t) + testDepsOnly(ctx, t, testConfig) + }) - badOps = append(badOps, op) - } + t.Run("sysext", func(t *testing.T) { + skip.If(t, testConfig.Target.Sysext == "", "skipping test as it is not supported for this config") - if len(badOps) != 0 { - opsJSON, err := test.LLBOpsToJSON(badOps) - if err != nil { - t.Fatalf("Unexpected error converting bad ops to JSON: %v", err) - } + t.Parallel() + ctx := startTestSpan(baseCtx, t) - t.Fatalf("Unexpected %d operations without cache ignore:\n%s", len(badOps), opsJSON) - } - }) - }) + const src2Patch3File = "patch3" + src2Patch3Content := []byte(` +diff --git a/file3 b/file3 +new file mode 100700 +index 0000000..5260cb1 +--- /dev/null ++++ b/file3 +@@ -0,0 +1,3 @@ ++#!/usr/bin/env bash ++ ++echo "Added another new file" +`) - t.Run("when_installing_spec_package", func(t *testing.T) { - t.Parallel() + src2Patch4Content := []byte(` +diff --git a/file4 b/file4 +new file mode 100700 +index 0000000..5260cb1 +--- /dev/null ++++ b/file4 +@@ -0,0 +1,3 @@ ++#!/usr/bin/env bash ++ ++echo "Added yet another new file" +`) - t.Run("makes_extra_repos_from_spec_available", func(t *testing.T) { - t.Parallel() + src2Patch5Content := []byte(` +diff --git a/file5 b/file5 +new file mode 100700 +index 0000000..5260cb1 +--- /dev/null ++++ b/file5 +@@ -0,0 +1,3 @@ ++#!/usr/bin/env bash ++ ++echo "Added yet again...another new file" +`) - ctx := startTestSpan(baseCtx, t) + const src2Patch4File = "patches/patch4" + const src2Patch5File = "patches/patch5" + const patchContextName = "patch-context" - // Create repository configurations for different phases - // This test verifies that repos configured for "install" are properly processed during container build - // and that repos configured for other phases (like "build") don't interfere - installRepoConfig := llb.Scratch().File( - llb.Mkfile("install-repo.list", 0o644, []byte("# Install phase repository config\n")), - dalec.ProgressGroup("Create install repo config"), - ) + opts := dalec.ProgressGroup("test-patch-sources") - buildRepoConfig := llb.Scratch().File( - llb.Mkfile("build-repo.list", 0o644, []byte("# Unexpected repo\n")), - dalec.ProgressGroup("Create build repo config"), - ) - - spec := testLinuxSpec(t, dalec.Spec{ - Dependencies: &dalec.PackageDependencies{ - ExtraRepos: []dalec.PackageRepositoryConfig{ - { - Config: map[string]dalec.Source{ - "install-repo.list": { - Context: &dalec.SourceContext{ - Name: "install-repo-config", - }, - Path: "install-repo.list", - }, - }, - Envs: []string{"install"}, - }, - { - Config: map[string]dalec.Source{ - "build-repo.list": { - Context: &dalec.SourceContext{ - Name: "build-repo-config", - }, - Path: "build-repo.list", - }, - }, - Envs: []string{"build"}, - }, - }, - }, - Build: dalec.ArtifactBuild{ - Steps: []dalec.BuildStep{ - { - Command: ` -# This is not a debian build, skip this. -[ ! -d debian ] && exit 0; - -# Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) -[ -x debian/postinst ] || chmod +x debian/postinst -cat >> debian/postinst << 'EOF' -cat /etc/apt/sources.list.d/* -grep 'Unexpected repo' /etc/apt/sources.list.d/* && exit 1 || exit 0 -EOF -`, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - withBuildContext(ctx, t, "install-repo-config", installRepoConfig), - withBuildContext(ctx, t, "build-repo-config", buildRepoConfig), - ) - solveT(ctx, t, gwc, sr) - }) - }) - - t.Run("enables_dpkg_debug", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{ - Build: dalec.ArtifactBuild{ - Steps: []dalec.BuildStep{ - { - Command: ` -# This is not a debian build, skip this. -[ ! -d debian ] && exit 0; - -# Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) -[ -x debian/postinst ] || chmod +x debian/postinst -cat >> debian/postinst << 'EOF' -grep debug=2 /etc/dpkg/dpkg.cfg.d/99-dalec-debug -EOF -`, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - - solveT(ctx, t, gwc, sr) - }) - }) - - t.Run("allows_upgrades", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{ - Build: dalec.ArtifactBuild{ - Steps: []dalec.BuildStep{ - { - Command: ` -# This is not a debian build, skip this. -[ ! -d debian ] && exit 0; - -# Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) -[ -x debian/postinst ] || chmod +x debian/postinst -cat >> debian/postinst << 'EOF' -if [ "${DALEC_UPGRADE}" != "true" ]; then echo "Expected DALEC_UPGRADE to be \"true\", got \"${DALEC_UPGRADE}\""; exit 1; fi -EOF - `, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - - solveT(ctx, t, gwc, sr) - }) - }) - - t.Run("handles_ubuntu_dpkg_excludes_config", func(t *testing.T) { - t.Parallel() - - t.Run("by_masking_when_target_has_docs", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{ - Sources: map[string]dalec.Source{ - "foo": { - Inline: &dalec.SourceInline{ - File: &dalec.SourceInlineFile{ - Contents: "hello world!", - }, - }, - }, - }, - Artifacts: dalec.Artifacts{ - Docs: map[string]dalec.ArtifactConfig{ - "foo": {}, - }, - }, - Build: dalec.ArtifactBuild{ - Steps: []dalec.BuildStep{ - { - Command: ` -# This is not a debian build, skip this. -[ ! -d debian ] && exit 0; - -# Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) -[ -x debian/postinst ] || chmod +x debian/postinst -cat >> debian/postinst << 'EOF' -[ -s /etc/dpkg/dpkg.cfg.d/excludes ] && exit 1 -exit 0 -EOF - `, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - - solveT(ctx, t, gwc, sr) - }) - }) - - t.Run("by_not_masking_when_target_has_no_docs", func(t *testing.T) { - t.Parallel() - - ctx := startTestSpan(baseCtx, t) - - spec := testLinuxSpec(t, dalec.Spec{ - Build: dalec.ArtifactBuild{ - Steps: []dalec.BuildStep{ - { - Command: ` -# This is not a debian build, skip this. -[ ! -d debian ] && exit 0; - -# Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) -[ -x debian/postinst ] || chmod +x debian/postinst -cat >> debian/postinst << 'EOF' -set -x - -# If file does not exist, all good. -[ ! -f /etc/dpkg/dpkg.cfg.d/excludes ] && exit 0 - -# if file exists, ensure it is not masked. -if [ ! -s /etc/dpkg/dpkg.cfg.d/excludes ]; then echo "Unexpected masking found"; exit 1; fi -EOF - `, - }, - }, - }, - }) - - testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { - sr := newSolveRequest( - withSpec(ctx, t, &spec), - withBuildTarget(testConfig.Target.Container), - ) - - solveT(ctx, t, gwc, sr) - }) - }) - }) - }) - }) - - t.Run("sysext", func(t *testing.T) { - skip.If(t, testConfig.Target.Sysext == "", "skipping test as it is not supported for this config") - - t.Parallel() - ctx := startTestSpan(baseCtx, t) - - const src2Patch3File = "patch3" - src2Patch3Content := []byte(` -diff --git a/file3 b/file3 -new file mode 100700 -index 0000000..5260cb1 ---- /dev/null -+++ b/file3 -@@ -0,0 +1,3 @@ -+#!/usr/bin/env bash -+ -+echo "Added another new file" -`) - - src2Patch4Content := []byte(` -diff --git a/file4 b/file4 -new file mode 100700 -index 0000000..5260cb1 ---- /dev/null -+++ b/file4 -@@ -0,0 +1,3 @@ -+#!/usr/bin/env bash -+ -+echo "Added yet another new file" -`) - - src2Patch5Content := []byte(` -diff --git a/file5 b/file5 -new file mode 100700 -index 0000000..5260cb1 ---- /dev/null -+++ b/file5 -@@ -0,0 +1,3 @@ -+#!/usr/bin/env bash -+ -+echo "Added yet again...another new file" -`) - - const src2Patch4File = "patches/patch4" - const src2Patch5File = "patches/patch5" - const patchContextName = "patch-context" - - opts := dalec.ProgressGroup("test-patch-sources") - - patchContext := llb.Scratch(). - File(llb.Mkfile(src2Patch3File, 0o600, src2Patch3Content), opts). - File(llb.Mkdir("patches", 0o755), opts). - File(llb.Mkfile(src2Patch4File, 0o600, src2Patch4Content), opts). - File(llb.Mkfile(src2Patch5File, 0o600, src2Patch5Content), opts) + patchContext := llb.Scratch(). + File(llb.Mkfile(src2Patch3File, 0o600, src2Patch3Content), opts). + File(llb.Mkdir("patches", 0o755), opts). + File(llb.Mkfile(src2Patch4File, 0o600, src2Patch4Content), opts). + File(llb.Mkfile(src2Patch5File, 0o600, src2Patch5Content), opts) spec := dalec.Spec{ Name: "test-sysext-build", @@ -3621,6 +3136,7 @@ func main() { t.Fatal(err) } + // We could assert package deps probably instead, but asserting a file is distro agnostic. _, err = ref.StatFile(ctx, gwclient.StatRequest{ Path: "/usr/bin/curl", }) @@ -3658,11 +3174,6 @@ func main() { ctx := startTestSpan(baseCtx, t) testLinuxSymlinkArtifacts(ctx, t, testConfig) }) - t.Run("test image configs", func(t *testing.T) { - t.Parallel() - ctx := startTestSpan(baseCtx, t) - testImageConfig(ctx, t, testConfig.Target.Container) - }) t.Run("test package tests cause build to fail", func(t *testing.T) { t.Parallel() @@ -4631,7 +4142,7 @@ func testLinuxPackageTestsFail(ctx context.Context, t *testing.T, cfg testLinuxC _, err := res.SingleRef() assert.NilError(t, err) - sr = newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Container)) + sr = newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Package)) res = solveT(ctx, t, client, sr) _, err = res.SingleRef() assert.NilError(t, err) @@ -4670,7 +4181,7 @@ func testUserAndGroupCreation(ctx context.Context, t *testing.T, testCfg targetC } testEnv.RunTest(ctx, t, func(ctx context.Context, client gwclient.Client) { - sr := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(testCfg.Container)) + sr := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(testCfg.Package)) res := solveT(ctx, t, client, sr) _, err := res.SingleRef() assert.NilError(t, err) @@ -4703,7 +4214,7 @@ func testDalecTargetArg(ctx context.Context, t *testing.T, testCfg targetConfig) func testMixGlobalTargetDependencies(ctx context.Context, t *testing.T, cfg testLinuxConfig) { t.Run("global target dependencies", func(t *testing.T) { - distro := strings.Split(cfg.Target.Container, "/")[0] + distro := strings.Split(cfg.Target.Package, "/")[0] spec := newSimpleSpec() spec.Dependencies = &dalec.PackageDependencies{ Runtime: map[string]dalec.PackageConstraints{ @@ -4806,7 +4317,7 @@ func testDisableStrip(ctx context.Context, t *testing.T, cfg testLinuxConfig) { }, }) - req := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Container)) + req := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Package)) solveT(ctx, t, client, req) }) }) @@ -4828,7 +4339,7 @@ func testDisableStrip(ctx context.Context, t *testing.T, cfg testLinuxConfig) { }, }) - req := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Container)) + req := newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Package)) solveT(ctx, t, client, req) }) }) @@ -4886,7 +4397,7 @@ func testTargetPlatform(ctx context.Context, t *testing.T, cfg testLinuxConfig) }) assert.NilError(t, err) - sr = newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Container), withPlatform(tp)) + sr = newSolveRequest(withSpec(ctx, t, spec), withBuildTarget(cfg.Target.Package), withPlatform(tp)) res = solveT(ctx, t, client, sr) dt, ok := res.Metadata[exptypes.ExporterImageConfigKey] assert.Assert(t, ok, "missing image config in result metadata") @@ -5559,7 +5070,7 @@ echo "This is a third test binary" } testEnv.RunTest(ctx, t, func(ctx context.Context, client gwclient.Client) { - req := newSolveRequest(withBuildTarget(testConfig.Target.Container), withSpec(ctx, t, spec)) + req := newSolveRequest(withBuildTarget(testConfig.Target.Package), withSpec(ctx, t, spec)) res := solveT(ctx, t, client, req) _, err := res.SingleRef() @@ -5648,6 +5159,525 @@ func testDepsOnly(ctx context.Context, t *testing.T, testConfig testLinuxConfig) }) } +func testContainerTarget(ctx context.Context, t *testing.T, testConfig testLinuxConfig, target string) { + t.Helper() + + t.Run("image_configs", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + testImageConfig(ctx, t, target) + }) + + t.Run("creates_post_install_symlinks", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Sources: map[string]dalec.Source{ + "src1": { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "#!/usr/bin/env bash\necho hello world", + Permissions: 0o700, + }, + }, + }, + "src3": { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "#!/usr/bin/env bash\necho goodbye", + Permissions: 0o700, + }, + }, + }, + }, + Artifacts: dalec.Artifacts{ + Binaries: map[string]dalec.ArtifactConfig{ + "src1": {}, + "src3": {}, + }, + Users: []dalec.AddUserConfig{ + { + Name: "need", + }, + }, + Groups: []dalec.AddGroupConfig{ + { + Name: "coffee", + }, + }, + }, + Image: &dalec.ImageConfig{ + Post: &dalec.PostInstall{ + Symlinks: map[string]dalec.SymlinkTarget{ + "/usr/bin/src1": { + Path: "/src1", + User: "need", + }, + "/usr/bin/src3": { + Paths: []string{"/non/existing/dir/src3", "/non/existing/dir2/src3"}, + User: "need", + Group: "coffee", + }, + }, + }, + }, + Tests: []*dalec.TestSpec{ + { + Name: "Post-install symlinks should be created and have correct ownership", + Files: map[string]dalec.FileCheckOutput{ + "/src1": {}, + "/non/existing/dir/src3": {}, + }, + Steps: []dalec.TestStep{ + {Command: "/bin/bash -exc 'test -L /src1'"}, + {Command: "/bin/bash -exc 'test \"$(readlink /src1)\" = \"/usr/bin/src1\"'"}, + {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /src1); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/src1", Stdout: dalec.CheckOutput{Equals: "hello world\n"}, Stderr: dalec.CheckOutput{Empty: true}}, + + {Command: "/bin/bash -exc 'test -L /non/existing/dir/src3'"}, + {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir/src3)\" = \"/usr/bin/src3\"'"}, + {Command: "/bin/bash -exc 'test -L /non/existing/dir2/src3'"}, + {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir2/src3)\" = \"/usr/bin/src3\"'"}, + {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir2/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/non/existing/dir/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, + {Command: "/non/existing/dir2/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("contains_etc_os_release_file", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "Check /etc/os-release", + Files: map[string]dalec.FileCheckOutput{ + "/etc/os-release": { + CheckOutput: dalec.CheckOutput{ + Matches: []string{ + // Some distros have quotes around the values + // Regex is to match the values with or without quotes + // "(?m)" enables multi-line mode so that ^ and $ match the start and end of lines rather than the full document. + // + // Due to these values getting processed for build args, quotes are stripped unless they are escaped. + `(?m)^ID=(\")?` + testConfig.Release.ID + `(\")?`, + `(?m)^VERSION_ID=(\")?` + testConfig.Release.VersionID + `(\")?`, + }, + }, + }, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("runs_tests", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + // Make sure the test framework was actually executed by the build target. + // This appends a test case so that is expected to fail and as such cause the build to fail. + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "Test framework should be executed", + Steps: []dalec.TestStep{ + {Command: "/bin/sh -c 'echo this command should fail; exit 42'"}, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + sr.Evaluate = true + + _, err := gwc.Solve(ctx, sr) + if err == nil { + t.Fatal("Expected test spec to run with error but got none") + } + }) + }) + + t.Run("has_image_config_available_with_build_time", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + sr.Evaluate = true + + beforeBuild := time.Now() + res := solveT(ctx, t, gwc, sr) + + dt, ok := res.Metadata[exptypes.ExporterImageConfigKey] + assert.Assert(t, ok, "result metadata should contain an image config: available metadata: %s", strings.Join(maps.Keys(res.Metadata), ", ")) + + var cfg dalec.DockerImageSpec + assert.Assert(t, json.Unmarshal(dt, &cfg)) + assert.Check(t, cfg.Created.After(beforeBuild)) + assert.Check(t, cfg.Created.Before(time.Now())) + }) + }) + + t.Run("respects_container_cache_key", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + withIgnoreCache(targets.IgnoreCacheKeyContainer), + ) + + res := solveT(ctx, t, gwc, sr) + + ops, err := test.LLBOpsFromState(ctx, resultToState(t, res)) + if err != nil { + t.Fatalf("Unexpected error extracting LLB OPs from state: %v", err) + } + + cacheIgnored := 0 + execFound := false + + pgNames := []string{} + + expectedNames := []string{ + "Fetch DEB Packages", + "Install DEB Packages", + "Install RPMs", + } + + for _, op := range ops { + if op.OpMetadata.IgnoreCache { + cacheIgnored++ + } + + e := op.Op.GetExec() + pg := op.OpMetadata.ProgressGroup.Name + if e == nil { + continue + } + + if !slices.Contains(expectedNames, pg) { + pgNames = append(pgNames, pg) + + continue + } + + execFound = true + + if !op.OpMetadata.IgnoreCache { + s, err := test.LLBOpsToJSON([]test.LLBOp{op}) + if err != nil { + t.Fatalf("Unexpected error converting LLB OP to JSON: %v", err) + } + + t.Errorf("Expected install step to have cache ignore enabled:\n%s", s) + } + } + + if !execFound { + t.Errorf("No exec ops found in the build with progress group names: %v, got: %v", expectedNames, pgNames) + } + + if cacheIgnored != 2 && cacheIgnored != 1 { + t.Fatalf("Expected only one or two operations to have cache ignore enabled, found %d", cacheIgnored) + } + }) + }) + + t.Run("respects_ignoring_all_caches", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + withIgnoreCache(), + ) + + res := solveT(ctx, t, gwc, sr) + + ops, err := test.LLBOpsFromState(ctx, resultToState(t, res)) + if err != nil { + t.Fatalf("Unexpected error extracting LLB OPs from state: %v", err) + } + + badOps := []test.LLBOp{} + + for _, op := range ops { + if op.OpMetadata.IgnoreCache { + continue + } + + badOps = append(badOps, op) + } + + if len(badOps) != 0 { + opsJSON, err := test.LLBOpsToJSON(badOps) + if err != nil { + t.Fatalf("Unexpected error converting bad ops to JSON: %v", err) + } + + t.Fatalf("Unexpected %d operations without cache ignore:\n%s", len(badOps), opsJSON) + } + }) + }) + + t.Run("when_installing_spec_package", func(t *testing.T) { + t.Parallel() + + t.Run("makes_extra_repos_from_spec_available", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + // Create repository configurations for different phases + // This test verifies that repos configured for "install" are properly processed during container build + // and that repos configured for other phases (like "build") don't interfere + installRepoConfig := llb.Scratch().File( + llb.Mkfile("install-repo.list", 0o644, []byte("# Install phase repository config\n")), + dalec.ProgressGroup("Create install repo config"), + ) + + buildRepoConfig := llb.Scratch().File( + llb.Mkfile("build-repo.list", 0o644, []byte("# Unexpected repo\n")), + dalec.ProgressGroup("Create build repo config"), + ) + + spec := testLinuxSpec(t, dalec.Spec{ + Dependencies: &dalec.PackageDependencies{ + ExtraRepos: []dalec.PackageRepositoryConfig{ + { + Config: map[string]dalec.Source{ + "install-repo.list": { + Context: &dalec.SourceContext{ + Name: "install-repo-config", + }, + Path: "install-repo.list", + }, + }, + Envs: []string{"install"}, + }, + { + Config: map[string]dalec.Source{ + "build-repo.list": { + Context: &dalec.SourceContext{ + Name: "build-repo-config", + }, + Path: "build-repo.list", + }, + }, + Envs: []string{"build"}, + }, + }, + }, + Build: dalec.ArtifactBuild{ + Steps: []dalec.BuildStep{ + { + Command: ` +# This is not a debian build, skip this. +[ ! -d debian ] && exit 0; + +# Inject a custom postinst script to inspect the install environment +[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -x debian/postinst ] || chmod +x debian/postinst +cat >> debian/postinst << 'EOF' +cat /etc/apt/sources.list.d/* +grep 'Unexpected repo' /etc/apt/sources.list.d/* && exit 1 || exit 0 +EOF +`, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + withBuildContext(ctx, t, "install-repo-config", installRepoConfig), + withBuildContext(ctx, t, "build-repo-config", buildRepoConfig), + ) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("enables_dpkg_debug", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Build: dalec.ArtifactBuild{ + Steps: []dalec.BuildStep{ + { + Command: ` +# This is not a debian build, skip this. +[ ! -d debian ] && exit 0; + +# Inject a custom postinst script to inspect the install environment +[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -x debian/postinst ] || chmod +x debian/postinst +cat >> debian/postinst << 'EOF' +grep debug=2 /etc/dpkg/dpkg.cfg.d/99-dalec-debug +EOF +`, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("handles_ubuntu_dpkg_excludes_config", func(t *testing.T) { + t.Parallel() + + t.Run("by_masking_when_target_has_docs", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Sources: map[string]dalec.Source{ + "foo": { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "hello world!", + }, + }, + }, + }, + Artifacts: dalec.Artifacts{ + Docs: map[string]dalec.ArtifactConfig{ + "foo": {}, + }, + }, + Build: dalec.ArtifactBuild{ + Steps: []dalec.BuildStep{ + { + Command: ` +# This is not a debian build, skip this. +[ ! -d debian ] && exit 0; + +# Inject a custom postinst script to inspect the install environment +[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -x debian/postinst ] || chmod +x debian/postinst +cat >> debian/postinst << 'EOF' +[ -s /etc/dpkg/dpkg.cfg.d/excludes ] && exit 1 +exit 0 +EOF + `, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("by_not_masking_when_target_has_no_docs", func(t *testing.T) { + t.Parallel() + + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Build: dalec.ArtifactBuild{ + Steps: []dalec.BuildStep{ + { + Command: ` +# This is not a debian build, skip this. +[ ! -d debian ] && exit 0; + +# Inject a custom postinst script to inspect the install environment +[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -x debian/postinst ] || chmod +x debian/postinst +cat >> debian/postinst << 'EOF' +set -x + +# If file does not exist, all good. +[ ! -f /etc/dpkg/dpkg.cfg.d/excludes ] && exit 0 + +# if file exists, ensure it is not masked. +if [ ! -s /etc/dpkg/dpkg.cfg.d/excludes ]; then echo "Unexpected masking found"; exit 1; fi +EOF + `, + }, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest( + withSpec(ctx, t, &spec), + withBuildTarget(target), + ) + + solveT(ctx, t, gwc, sr) + }) + }) + }) + }) +} + func testLinuxSpec(t *testing.T, userSpec dalec.Spec) dalec.Spec { t.Helper() @@ -5671,7 +5701,7 @@ func testLinuxSpec(t *testing.T, userSpec dalec.Spec) dalec.Spec { userSpecRaw, err := json.Marshal(userSpec) assert.NilError(t, err, "marshaling user spec to json") - assert.NilError(t, json.Unmarshal(userSpecRaw, &result), "unmarshalling user spec into result spec") + assert.NilError(t, json.Unmarshal(userSpecRaw, &result), "unmarshaling user spec into result spec") return result } diff --git a/test/target_ubuntu_test.go b/test/target_ubuntu_test.go index 62730040e..68a6daa61 100644 --- a/test/target_ubuntu_test.go +++ b/test/target_ubuntu_test.go @@ -37,11 +37,12 @@ func debLinuxTestConfigFor(targetKey string, cfg *distro.Config, opts ...func(*t tlc := testLinuxConfig{ Target: targetConfig{ - Key: targetKey, - Container: targetKey + "/testing/container", - Package: targetKey + "/deb", - Worker: targetKey + "/worker", - Sysext: sysextTarget, + Key: targetKey, + Container: targetKey + "/testing/container", + MinimalContainer: targetKey + "/container", + Package: targetKey + "/deb", + Worker: targetKey + "/worker", + Sysext: sysextTarget, FormatDepEqual: func(ver, rev string) string { return ver + "-" + cfg.VersionID + "u" + rev }, diff --git a/website/docs/examples/targets.md b/website/docs/examples/targets.md index 8423c6072..d1b4eaac5 100644 --- a/website/docs/examples/targets.md +++ b/website/docs/examples/targets.md @@ -15,14 +15,17 @@ azlinux3/rpm Builds an rpm and src.rpm. azlinux3/rpm/debug Debug options for rpm builds. azlinux3/testing/sysext Builds a systemd system extension image. azlinux3/worker Builds the base worker image responsible for building the rpm +bionic/container Builds a container image. bionic/deb (default) Builds a deb package. bionic/dsc Builds a Debian source package. bionic/testing/container Builds a container image for testing purposes only. bionic/worker Builds the worker image. +bookworm/container Builds a container image. bookworm/deb (default) Builds a deb package. bookworm/dsc Builds a Debian source package. bookworm/testing/container Builds a container image for testing purposes only. bookworm/worker Builds the worker image. +bullseye/container Builds a container image. bullseye/deb (default) Builds a deb package. bullseye/dsc Builds a Debian source package. bullseye/testing/container Builds a container image for testing purposes only. @@ -33,14 +36,17 @@ debug/patched-sources Outputs all patched sources from a dalec spec f debug/pip Outputs all the pip dependencies for the spec debug/resolve Outputs the resolved dalec spec file with build args applied. debug/sources Outputs all sources from a dalec spec file. +focal/container Builds a container image. focal/deb (default) Builds a deb package. focal/dsc Builds a Debian source package. focal/testing/container Builds a container image for testing purposes only. focal/worker Builds the worker image. +jammy/container Builds a container image. jammy/deb (default) Builds a deb package. jammy/dsc Builds a Debian source package. jammy/testing/container Builds a container image for testing purposes only. jammy/worker Builds the worker image. +noble/container Builds a container image. noble/deb (default) Builds a deb package. noble/dsc Builds a Debian source package. noble/testing/container Builds a container image for testing purposes only. @@ -56,6 +62,7 @@ rockylinux9/container/depsonly Builds a container image with only the runtime rockylinux9/rpm Builds an rpm and src.rpm. rockylinux9/rpm/debug Debug options for rpm builds. rockylinux9/worker Builds the base worker image responsible for building the rpm +trixie/container Builds a container image. trixie/deb (default) Builds a deb package. trixie/dsc Builds a Debian source package. trixie/testing/container Builds a container image for testing purposes only. From 8c4ec09c49d3dc270788b466ab56b03c3d5015eb Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Tue, 31 Mar 2026 14:31:37 +0200 Subject: [PATCH 2/6] targets/linux/deb/distro: cleanup minimal container images So resulting images are smaller and have fewer packages. Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 228 +++++++++++++++++++- test/linux_target_test.go | 289 +++++++++++++++++++++----- 2 files changed, 461 insertions(+), 56 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index 399f62462..f2b77fcdd 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -2,6 +2,7 @@ package distro import ( "context" + "strconv" "github.com/moby/buildkit/client/llb" gwclient "github.com/moby/buildkit/frontend/gateway/client" @@ -182,7 +183,17 @@ apt update # We can't use ?essential since some distros we support have too old apt which does not support patterns. essential_packages=$(dpkg-query -Wf '${Package} ${Essential}\n' | awk '$2 == "yes" {print $1}') -local_package_files=$(ls /base-packages/*.deb /spec-packages/*.deb) +# Extra packages required to run user package maintainer scripts (postinst etc.) +# during dpkg --install. These are not Essential but commonly assumed to exist +# (e.g. useradd/groupadd from passwd). Cleanup will purge them later unless a +# user package depends on them. + +# Extra packages, which would normally be in base packages list for each distro release. However, since +# we want to be able to clean them up after installation and after e.g. creation of users and groups in +# the container, we define them here. +bootstrap_extra_packages="passwd" + +local_package_files=$(ls /spec-packages/*.deb) # Get names of local packages so we can exclude them from apt-get install. local_package_names=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Package 2>/dev/null; done | sort -u) @@ -191,7 +202,7 @@ local_package_names=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Pac # # Spec packages may depend on base packages, so we need to filter to only download remaining packages, since downloading local packages # would fail. -dependencies_to_download=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Depends 2>/dev/null; done | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g' | grep -v '^$' | sort -u | grep -vxF "${local_package_names}") +dependencies_to_download=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Depends 2>/dev/null; done | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g' | grep -v '^$' | sort -u | grep -vxF "${local_package_names}" || true) # Get the exact filenames apt needs by using --print-uris with an empty cache dir. # This forces apt to report ALL needed packages (not just uncached ones), giving @@ -200,7 +211,7 @@ dependencies_to_download=$(for f in ${local_package_files}; do dpkg-deb -f "${f} # We extract the second field (the filename). needed_filenames=$(apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ -o Dir::Cache::Archives=/tmp \ - --yes --print-uris install ${essential_packages} ${dependencies_to_download} \ + --yes --print-uris install ${essential_packages} ${bootstrap_extra_packages} ${dependencies_to_download} \ | grep '\.deb ' | awk '{print $2}') mkdir -p "${rootfs}${apt_archives}"/partial @@ -219,7 +230,7 @@ done # apt skips packages already present, so only missing ones are fetched. apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ -o Dir::Cache::Archives="${rootfs}${apt_archives}" \ - --yes --download-only install ${essential_packages} ${dependencies_to_download} + --yes --download-only install ${essential_packages} ${bootstrap_extra_packages} ${dependencies_to_download} deb_files=$(ls "${rootfs}${apt_archives}"/*.deb) @@ -275,7 +286,6 @@ done baseImg = input.Worker.Run( dalec.WithConstraints(opts...), llb.AddMount("/tmp/install.sh", script, llb.SourcePath("install.sh")), - llb.AddMount("/base-packages", basePackages(ctx, input), llb.Readonly), llb.AddMount("/spec-packages", input.SpecPackages, llb.Readonly), extraRepos(input, opts...), dalec.WithMountedAptCache(input.Config.AptCachePrefix, opts...), @@ -290,6 +300,8 @@ done llb.Args([]string{"/usr/bin/sh", "-c", "dpkg --install --force-depends /var/cache/apt/archives/*.deb && rm -rf /var/cache/apt/archives/*.deb"}), })) + result = cleanupBootstrapContainer(result, input, opts...) + // Squash all layers into one by copying the final filesystem into a fresh // scratch state. Without this, files extracted in the bootstrap layer but // removed during cleanup still occupy space in the earlier layer. @@ -300,3 +312,209 @@ done AllowWildcard: true, }), squashOpts...) } + +// cleanupBootstrapContainer removes package manager infrastructure, unnecessary +// packages, and caches from the container image. +func cleanupBootstrapContainer(st llb.State, input buildContainerInput, opts ...llb.ConstraintsOpt) llb.State { + cleanupOpts := append(opts, dalec.ProgressGroup("Cleanup Bootstrap Container")) + + script := `#!/bin/sh + +set -x + +# Remove problematic maintainer scripts that cause infinite loops during purge. +rm -f /var/lib/dpkg/info/libpam-runtime.prerm 2>/dev/null || true + +# Recursive dependency resolver: prints the transitive closure of installed +# Depends/Pre-Depends starting from the given space-separated package list. +resolve_deps() { + queue="$1" + resolved="" + while [ -n "${queue}" ]; do + pkg=$(echo "${queue}" | head -n1) + queue=$(echo "${queue}" | tail -n +2) + + if [ -z "${pkg}" ] || echo "${resolved}" | grep -qw "${pkg}"; then continue; fi + + resolved="${resolved} ${pkg}" + + deps=$(dpkg-query -W -f='${Depends}\n${Pre-Depends}\n' "${pkg}" 2>/dev/null \ + | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g; s/:.*//g' | grep -v '^$' | sort -u) + + for dep in ${deps}; do + if ! dpkg -s "${dep}" 2>/dev/null | grep -q '^Status: install ok installed'; then + continue + fi + if echo "${resolved}" | grep -qw "${dep}"; then + continue + fi + queue=$(printf '%s\n%s' "${queue}" "${dep}") + done + done + echo "${resolved}" +} + +# Packages from the user's spec — the starting point of the keep set. +keep_set="" +for f in $(ls /tmp/dalec-spec-packages/*.deb 2>/dev/null); do + keep_set="${keep_set} $(dpkg-deb -f "${f}" Package)" +done + +# Full transitive closure of spec packages. Cleanup tools end up here only +# if a spec package actually depends on them (directly or transitively), +# in which case we keep them and their deps. +keep_set=$(resolve_deps "$(echo ${keep_set} | tr ' ' '\n')") + +# purge_last: cleanup tools (+ their deps) not in the keep set. These +# survive the main purge so they remain available for it, then get purged +# at the very end. +purge_last="" + +# Tools needed by the cleanup process itself (purging packages, running +# maintainer scripts, etc.) but not necessarily wanted in the final image. +# If a spec package transitively depends on any of these, it (and its full +# dependency tree) stays in the keep set; otherwise it gets purged at the end. +for pkg in dpkg dash coreutils base-files libc-bin grep; do + if echo "${keep_set}" | grep -qw "${pkg}"; then continue; fi + + # dpkg can't purge itself from inside the container; signal the worker + # step to do it from outside instead. + if [ "${pkg}" = "dpkg" ]; then + echo > /var/lib/dpkg/.dalec-remove-dpkg + continue + fi + + purge_last="${purge_last} ${pkg}" +done +for pkg in $(resolve_deps "$(echo ${purge_last} | tr ' ' '\n')"); do + if [ "${pkg}" = "dpkg" ]; then continue; fi + if echo " ${keep_set} " | grep -q " ${pkg} "; then continue; fi + if echo " ${purge_last} " | grep -q " ${pkg} "; then continue; fi + purge_last="${purge_last} ${pkg}" +done + +# purge_first: everything not in the keep set, purge_last, or dpkg. +# dpkg is kept around for the purge passes and removed by the worker step. +purge_first="" +# Strip :arch suffixes (e.g. libc6:amd64 -> libc6) so names match. +for pkg in $(dpkg-query -W -f='${Package}\n' | sed 's/:.*//g'); do + if [ "${pkg}" = "dpkg" ]; then continue; fi + if echo "${keep_set}" | grep -qw "${pkg}"; then continue; fi + if echo "${purge_last}" | grep -qw "${pkg}"; then continue; fi + purge_first="${purge_first} ${pkg}" +done + +if [ -n "${purge_first}" ]; then + dpkg --purge --force-depends --force-remove-essential ${purge_first} || true +fi + +# Remove leftover directories (after dpkg purge so maintainer scripts still work). +cleanup_dirs=" +/etc/apt +/etc/systemd +/usr/lib/apt +/usr/share/bash-completion +/usr/share/bug +/usr/share/debconf +/usr/share/lintian +/usr/share/locale +/var/cache/apt +/var/cache/debconf +/var/lib/apt +/var/lib/pam +/var/lib/systemd +/var/log +" + +if [ "${DALEC_HAS_DOCS}" != "true" ]; then + cleanup_dirs="${cleanup_dirs} +/usr/share/doc +/usr/share/man +/usr/share/info +" +fi + +for d in ${cleanup_dirs}; do + rm -rf "${d}" +done + +# Final purge: strip all maintainer scripts first (prevents triggers from +# firing after /bin/sh is gone), then purge the cleanup tools we kept around +# for the main purge. dpkg itself is purged from outside via the worker. +rm -f /var/lib/dpkg/info/*.prerm \ + /var/lib/dpkg/info/*.postrm \ + /var/lib/dpkg/info/*.preinst \ + /var/lib/dpkg/info/*.postinst 2>/dev/null || true + +# --force-remove-protected was added in dpkg 1.20.6; older releases (e.g. +# Debian buster, Ubuntu 18.04) don't recognize it and will error out. +force_remove_protected="" +if dpkg --force-help 2>/dev/null | grep -qw remove-protected; then + force_remove_protected="--force-remove-protected" +fi + +if [ -n "${purge_last}" ]; then + PATH="/tmp:${PATH}" dpkg --purge --force-depends --force-remove-essential ${force_remove_protected} ${purge_last} || true +fi +` + + // Script that runs on the worker to remove dpkg from the target rootfs. + // Using --root= lets the worker's own dpkg binary operate on the mounted rootfs + // without needing dpkg to exist inside the target. + dpkgRemoveScript := `#!/bin/sh +set -x + +# Only proceed if the cleanup script signalled that dpkg should be removed. +if [ ! -f /target/var/lib/dpkg/.dalec-remove-dpkg ]; then + echo "dpkg is a runtime dependency, skipping removal" + exit 0 +fi +rm -f /target/var/lib/dpkg/.dalec-remove-dpkg + +# --force-remove-protected was added in dpkg 1.20.6; older releases don't +# recognize it. The worker's dpkg may differ from the target's, so probe it. +force_remove_protected="" +if dpkg --force-help 2>/dev/null | grep -qw remove-protected; then + force_remove_protected="--force-remove-protected" +fi + +# Remove dpkg and any leftover packages from the target rootfs using the +# worker's dpkg binary. Use --purge to clean config-files entries too. +# /var/lib/dpkg/status is preserved because dpkg only removes files it owns, +# not the status database itself. +for pkg in $(dpkg --root=/target -l 2>/dev/null | awk '/^[irpu]/ && !/^ii/ {print $2}' || true); do + dpkg --root=/target --purge --force-depends --force-remove-essential ${force_remove_protected} "${pkg}" 2>/dev/null || true +done +if dpkg --root=/target -s dpkg 2>/dev/null | grep -q '^Status:.*installed'; then + dpkg --root=/target --purge --force-depends --force-remove-essential dpkg || true +fi +` + + scriptSt := llb.Scratch().File(llb.Mkfile("cleanup.sh", 0o755, []byte(script)), cleanupOpts...) + dpkgRemoveScriptSt := llb.Scratch().File(llb.Mkfile("dpkg-remove.sh", 0o755, []byte(dpkgRemoveScript)), cleanupOpts...) + + // No-op stub mounted at /tmp/diff and /tmp/tar so dpkg's maintainer scripts + // find the binaries they expect (diff, tar) without writing to the rootfs. + stubSt := llb.Scratch().File(llb.Mkfile("stub", 0o755, []byte("#!/bin/sh\nexit 1\n")), cleanupOpts...) + + // Run the main cleanup inside the container (purges everything except dpkg). + st = st.Run( + dalec.WithConstraints(cleanupOpts...), + llb.AddMount("/tmp/dalec-cleanup.sh", scriptSt, llb.SourcePath("cleanup.sh"), llb.Readonly), + llb.AddMount("/tmp/dalec-spec-packages", input.SpecPackages, llb.Readonly), + llb.AddMount("/tmp/diff", stubSt, llb.SourcePath("stub"), llb.Readonly), + llb.AddMount("/tmp/tar", stubSt, llb.SourcePath("stub"), llb.Readonly), + llb.AddEnv("DALEC_HAS_DOCS", strconv.FormatBool(input.Spec.GetArtifacts(input.Target).HasDocs())), + llb.Args([]string{"/usr/bin/sh", "/tmp/dalec-cleanup.sh"}), + ).Root() + + // Use the worker's dpkg to remove dpkg from the target rootfs via --root=. + // This avoids the chicken-and-egg problem of dpkg removing itself. + st = input.Worker.Run( + dalec.WithConstraints(cleanupOpts...), + llb.AddMount("/tmp/dpkg-remove.sh", dpkgRemoveScriptSt, llb.SourcePath("dpkg-remove.sh"), llb.Readonly), + llb.Args([]string{"/bin/sh", "/tmp/dpkg-remove.sh"}), + ).AddMount("/target", st) + + return st +} diff --git a/test/linux_target_test.go b/test/linux_target_test.go index f9b7b2779..e269afa04 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -475,17 +475,17 @@ index 0000000..5260cb1 { Name: "Post-install symlinks should be created and have correct ownership", Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /non/existing/dir/src2'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir/src2)\" = \"/usr/bin/src2\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=0; COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /non/existing/dir/src2'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /non/existing/dir/src2)\" = \"/usr/bin/src2\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=0; COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, }, }, { Name: "Artifact symlinks should have correct ownership", Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /bin/owned-link2'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link2)\" = \"/usr/bin/src2/file2\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link2'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link2)\" = \"/usr/bin/src2/file2\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, }, }, }, @@ -607,12 +607,12 @@ index 0000000..5260cb1 { Name: "Artifact symlinks should have correct ownership", Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /bin/owned-link3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link3)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=0; COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'test -L /bin/owned-link4'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link4)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd nobody | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link4); [ \"$LINK_OWNER\" = \"$NEED_UID:0\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link3'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link3)\" = \"/usr/bin/src1\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=0; COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link4'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link4)\" = \"/usr/bin/src1\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^nobody /etc/passwd | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link4); [ \"$LINK_OWNER\" = \"$NEED_UID:0\" ]'"}, }, }, }, @@ -686,22 +686,22 @@ index 0000000..5260cb1 }, Steps: []dalec.TestStep{ { - Command: "/bin/sh -c 'cat /foo'", + Command: "/usr/bin/env bash -c 'cat /foo'", Stdout: dalec.CheckOutput{Equals: "hello world"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /nested/foo'", + Command: "/usr/bin/env bash -c 'cat /nested/foo'", Stdout: dalec.CheckOutput{Equals: "hello world nested"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /dir/foo'", + Command: "/usr/bin/env bash -c 'cat /dir/foo'", Stdout: dalec.CheckOutput{Equals: "hello from dir"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /nested/dir/foo'", + Command: "/usr/bin/env bash -c 'cat /nested/dir/foo'", Stdout: dalec.CheckOutput{Equals: "hello from nested dir"}, Stderr: dalec.CheckOutput{Empty: true}, }, @@ -738,7 +738,7 @@ index 0000000..5260cb1 [ ! -d debian ] && exit 0; # Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -f debian/postinst ] || (echo '#!/usr/bin/env bash' > debian/postinst; echo 'set -e' >> debian/postinst) [ -x debian/postinst ] || chmod +x debian/postinst cat >> debian/postinst << 'EOF' if [ "${DALEC_UPGRADE}" != "true" ]; then echo "Expected DALEC_UPGRADE to be \"true\", got \"${DALEC_UPGRADE}\""; exit 1; fi @@ -764,6 +764,191 @@ EOF skip.If(t, testConfig.Target.MinimalContainer == "", "skipping test as it is not supported for this config") t.Parallel() testContainerTarget(ctx, t, testConfig, testConfig.Target.MinimalContainer) + + t.Run("cleanup", func(t *testing.T) { + t.Parallel() + target := testConfig.Target.MinimalContainer + + testLinuxSpec := func(t *testing.T, spec dalec.Spec) dalec.Spec { + spec = testLinuxSpec(t, spec) + spec.Dependencies.Runtime = map[string]dalec.PackageConstraints{} + + return spec + } + + t.Run("removes_package_manager_binaries_when_unused", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + for _, bin := range []string{"/usr/bin/apt", "/usr/bin/apt-get", "/usr/bin/apt-cache", "/usr/bin/dpkg", "/usr/bin/tar"} { + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: bin}) + assert.ErrorContains(t, err, "no such file", "expected %q to be removed", bin) + } + }) + }) + + t.Run("removes_orphan_directories", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + // All directories that the cleanup script removes. + for _, dir := range []string{ + "/etc/apt", + "/etc/systemd", + "/usr/lib/apt", + "/usr/share/bash-completion", + "/usr/share/bug", + "/usr/share/debconf", + "/usr/share/lintian", + "/usr/share/locale", + "/var/cache/apt", + "/var/cache/debconf", + "/var/lib/apt", + "/var/lib/pam", + "/var/lib/systemd", + "/var/log", + } { + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: dir}) + assert.ErrorContains(t, err, "no such file", "expected %s to be removed", dir) + } + }) + }) + + t.Run("preserves_dpkg_status", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/var/lib/dpkg/status"}) + assert.NilError(t, err, "/var/lib/dpkg/status should be preserved for security scanners") + }) + }) + + t.Run("removes_docs_without_doc_artifacts", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + // No doc artifacts → docs should be cleaned up. + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + for _, dir := range []string{"/usr/share/doc", "/usr/share/man", "/usr/share/info"} { + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: dir}) + assert.ErrorContains(t, err, "no such file", "expected %s to be removed when no doc artifacts", dir) + } + }) + }) + + t.Run("preserves_docs_with_doc_artifacts", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Sources: map[string]dalec.Source{ + "README": { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "hello docs", + }, + }, + }, + }, + Artifacts: dalec.Artifacts{ + Docs: map[string]dalec.ArtifactConfig{ + "README": {}, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc"}) + assert.NilError(t, err, "/usr/share/doc should be preserved when spec has doc artifacts") + }) + }) + + t.Run("keeps_runtime_dependencies_functional", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "runtime dep binary works after cleanup", + Files: map[string]dalec.FileCheckOutput{ + "/usr/bin/curl": {}, + "/usr/bin/dpkg": {}, + }, + }, + }, + }) + + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "curl": {}, + "dpkg": {}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + solveT(ctx, t, gwc, sr) + }) + }) + }) + + t.Run("squash_produces_single_layer", func(t *testing.T) { + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(testConfig.Target.MinimalContainer)) + res := solveT(ctx, t, gwc, sr) + + dt, ok := res.Metadata[exptypes.ExporterImageConfigKey] + assert.Assert(t, ok, "missing image config in result metadata") + + var img dalec.DockerImageSpec + assert.NilError(t, json.Unmarshal(dt, &img)) + + assert.Check(t, len(img.RootFS.DiffIDs) <= 1, + "expected squashed image to have at most 1 layer, got %d", len(img.RootFS.DiffIDs)) + }) + }) }) t.Run("depsonly", func(t *testing.T) { @@ -1096,22 +1281,22 @@ echo "$BAR" > bar.txt }, Steps: []dalec.TestStep{ { - Command: "/bin/sh -c 'cat /foo'", + Command: "/usr/bin/env bash -c 'cat /foo'", Stdout: dalec.CheckOutput{Equals: "hello world"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /nested/foo'", + Command: "/usr/bin/env bash -c 'cat /nested/foo'", Stdout: dalec.CheckOutput{Equals: "hello world nested"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /dir/foo'", + Command: "/usr/bin/env bash -c 'cat /dir/foo'", Stdout: dalec.CheckOutput{Equals: "hello from dir"}, Stderr: dalec.CheckOutput{Empty: true}, }, { - Command: "/bin/sh -c 'cat /nested/dir/foo'", + Command: "/usr/bin/env bash -c 'cat /nested/dir/foo'", Stdout: dalec.CheckOutput{Equals: "hello from nested dir"}, Stderr: dalec.CheckOutput{Empty: true}, }, @@ -1161,18 +1346,18 @@ echo "$BAR" > bar.txt { Name: "Artifact symlinks should have correct ownership", Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /bin/owned-link'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link)\" = \"/usr/bin/src3\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'test -L /bin/owned-link2'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link2)\" = \"/usr/bin/src2/file2\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'test -L /bin/owned-link3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link3)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=0; COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'test -L /bin/owned-link4'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /bin/owned-link4)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd nobody | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link4); [ \"$LINK_OWNER\" = \"$NEED_UID:0\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link)\" = \"/usr/bin/src3\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link2'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link2)\" = \"/usr/bin/src2/file2\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link2); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link3'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link3)\" = \"/usr/bin/src1\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=0; COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /bin/owned-link4'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /bin/owned-link4)\" = \"/usr/bin/src1\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^nobody /etc/passwd | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /bin/owned-link4); [ \"$LINK_OWNER\" = \"$NEED_UID:0\" ]'"}, }, }, }, @@ -1202,7 +1387,7 @@ echo "$BAR" > bar.txt spec.Tests = append(spec.Tests, &dalec.TestSpec{ Name: "Test framework should be executed", Steps: []dalec.TestStep{ - {Command: "/bin/sh -c 'echo this command should fail; exit 42'"}, + {Command: "/usr/bin/env bash -c 'echo this command should fail; exit 42'"}, }, }) @@ -2750,8 +2935,8 @@ func main() { { Name: "Check data directory ownership in post-install", Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'ls -ld /usr/share/another_data_dir2 | grep -E \" myuser[[:space:]]+mygroup[[:space:]]\"'"}, - {Command: "/bin/bash -exc 'ls -l /usr/share/another_data_dir2/another_nested_data_file2 | grep -E \" myuser[[:space:]]+mygroup[[:space:]]\"'"}, + {Command: "/usr/bin/env bash -exc 'ls -ld /usr/share/another_data_dir2 | grep -E \" myuser[[:space:]]+mygroup[[:space:]]\"'"}, + {Command: "/usr/bin/env bash -exc 'ls -l /usr/share/another_data_dir2/another_nested_data_file2 | grep -E \" myuser[[:space:]]+mygroup[[:space:]]\"'"}, }, }, }, @@ -4816,7 +5001,7 @@ func testPrebuiltPackages(ctx context.Context, t *testing.T, testConfig testLinu "hello": { Inline: &dalec.SourceInline{ File: &dalec.SourceInlineFile{ - Contents: "#!/bin/sh\necho 'Hello from pre-built package'", + Contents: "#!/usr/bin/env bash\necho 'Hello from pre-built package'", Permissions: 0o755, }, }, @@ -5231,17 +5416,17 @@ func testContainerTarget(ctx context.Context, t *testing.T, testConfig testLinux "/non/existing/dir/src3": {}, }, Steps: []dalec.TestStep{ - {Command: "/bin/bash -exc 'test -L /src1'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /src1)\" = \"/usr/bin/src1\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /src1); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /src1'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /src1)\" = \"/usr/bin/src1\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=0; LINK_OWNER=$(stat -c \"%u:%g\" /src1); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, {Command: "/src1", Stdout: dalec.CheckOutput{Equals: "hello world\n"}, Stderr: dalec.CheckOutput{Empty: true}}, - {Command: "/bin/bash -exc 'test -L /non/existing/dir/src3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir/src3)\" = \"/usr/bin/src3\"'"}, - {Command: "/bin/bash -exc 'test -L /non/existing/dir2/src3'"}, - {Command: "/bin/bash -exc 'test \"$(readlink /non/existing/dir2/src3)\" = \"/usr/bin/src3\"'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, - {Command: "/bin/bash -exc 'NEED_UID=$(getent passwd need | cut -d: -f3); COFFEE_GID=$(getent group coffee | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir2/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'test -L /non/existing/dir/src3'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /non/existing/dir/src3)\" = \"/usr/bin/src3\"'"}, + {Command: "/usr/bin/env bash -exc 'test -L /non/existing/dir2/src3'"}, + {Command: "/usr/bin/env bash -exc 'test \"$(readlink /non/existing/dir2/src3)\" = \"/usr/bin/src3\"'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, + {Command: "/usr/bin/env bash -exc 'NEED_UID=$(grep ^need /etc/passwd | cut -d: -f3); COFFEE_GID=$(grep ^coffee /etc/group | cut -d: -f3); LINK_OWNER=$(stat -c \"%u:%g\" /non/existing/dir2/src3); [ \"$LINK_OWNER\" = \"$NEED_UID:$COFFEE_GID\" ]'"}, {Command: "/non/existing/dir/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, {Command: "/non/existing/dir2/src3", Stdout: dalec.CheckOutput{Equals: "goodbye\n"}, Stderr: dalec.CheckOutput{Empty: true}}, }, @@ -5307,7 +5492,7 @@ func testContainerTarget(ctx context.Context, t *testing.T, testConfig testLinux { Name: "Test framework should be executed", Steps: []dalec.TestStep{ - {Command: "/bin/sh -c 'echo this command should fail; exit 42'"}, + {Command: "/usr/bin/env bash -c 'echo this command should fail; exit 42'"}, }, }, }, @@ -5523,7 +5708,7 @@ func testContainerTarget(ctx context.Context, t *testing.T, testConfig testLinux [ ! -d debian ] && exit 0; # Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -f debian/postinst ] || (echo '#!/usr/bin/env bash' > debian/postinst; echo 'set -e' >> debian/postinst) [ -x debian/postinst ] || chmod +x debian/postinst cat >> debian/postinst << 'EOF' cat /etc/apt/sources.list.d/* @@ -5560,7 +5745,7 @@ EOF [ ! -d debian ] && exit 0; # Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -f debian/postinst ] || (echo '#!/usr/bin/env bash' > debian/postinst; echo 'set -e' >> debian/postinst) [ -x debian/postinst ] || chmod +x debian/postinst cat >> debian/postinst << 'EOF' grep debug=2 /etc/dpkg/dpkg.cfg.d/99-dalec-debug @@ -5612,7 +5797,7 @@ EOF [ ! -d debian ] && exit 0; # Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -f debian/postinst ] || (echo '#!/usr/bin/env bash' > debian/postinst; echo 'set -e' >> debian/postinst) [ -x debian/postinst ] || chmod +x debian/postinst cat >> debian/postinst << 'EOF' [ -s /etc/dpkg/dpkg.cfg.d/excludes ] && exit 1 @@ -5648,7 +5833,7 @@ EOF [ ! -d debian ] && exit 0; # Inject a custom postinst script to inspect the install environment -[ -f debian/postinst ] || (echo '#!/bin/sh' > debian/postinst; echo 'set -e' >> debian/postinst) +[ -f debian/postinst ] || (echo '#!/usr/bin/env bash' > debian/postinst; echo 'set -e' >> debian/postinst) [ -x debian/postinst ] || chmod +x debian/postinst cat >> debian/postinst << 'EOF' set -x @@ -5694,6 +5879,8 @@ func testLinuxSpec(t *testing.T, userSpec dalec.Spec) dalec.Spec { Dependencies: &dalec.PackageDependencies{ Runtime: map[string]dalec.PackageConstraints{ "coreutils": {}, + "bash": {}, + "grep": {}, }, }, } From 5554cc1e5eb663ef413df309f78a60cea9f50ba3 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Tue, 12 May 2026 13:34:27 +0200 Subject: [PATCH 3/6] wip: improve dependencies extraction Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 85 +++++++++---- test/linux_target_test.go | 173 ++++++++++++++++++++++++++ 2 files changed, 237 insertions(+), 21 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index f2b77fcdd..af7d2e3fd 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -193,25 +193,27 @@ essential_packages=$(dpkg-query -Wf '${Package} ${Essential}\n' | awk '$2 == "ye # the container, we define them here. bootstrap_extra_packages="passwd" +# Local spec-built .deb files. Passing these by path to apt-get (apt 1.1+ +# syntax — available on every distro dalec supports) lets apt parse the +# control files itself and resolve dependencies natively, including: +# - Pre-Depends (in addition to Depends) +# - alternatives ("pkg-a | pkg-b") — apt picks an installable option +# - virtual packages (Provides) — apt picks a real provider +# - version constraints — unsatisfiable ones cause the build to fail +# - architecture restrictions local_package_files=$(ls /spec-packages/*.deb) -# Get names of local packages so we can exclude them from apt-get install. -local_package_names=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Package 2>/dev/null; done | sort -u) - -# Extract dependencies of local packages, since we need to download those as well. -# -# Spec packages may depend on base packages, so we need to filter to only download remaining packages, since downloading local packages -# would fail. -dependencies_to_download=$(for f in ${local_package_files}; do dpkg-deb -f "${f}" Depends 2>/dev/null; done | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g' | grep -v '^$' | sort -u | grep -vxF "${local_package_names}" || true) - # Get the exact filenames apt needs by using --print-uris with an empty cache dir. # This forces apt to report ALL needed packages (not just uncached ones), giving # us exact filenames including correct version and architecture suffixes. # --print-uris output format: 'URL' filename size hash # We extract the second field (the filename). +# +# Local .deb paths are recognized as already-available and don't appear in +# --print-uris output, so the filenames here are only remote deps. needed_filenames=$(apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ -o Dir::Cache::Archives=/tmp \ - --yes --print-uris install ${essential_packages} ${bootstrap_extra_packages} ${dependencies_to_download} \ + --yes --print-uris install ${essential_packages} ${bootstrap_extra_packages} ${local_package_files} \ | grep '\.deb ' | awk '{print $2}') mkdir -p "${rootfs}${apt_archives}"/partial @@ -228,9 +230,11 @@ done # Download remaining needed packages directly into the rootfs cache. # apt skips packages already present, so only missing ones are fetched. +# Passing the local .deb paths anchors the install plan to the spec packages +# without re-fetching them. apt-get -o Dir::State::status="${rootfs}/var/lib/dpkg/status" \ -o Dir::Cache::Archives="${rootfs}${apt_archives}" \ - --yes --download-only install ${essential_packages} ${bootstrap_extra_packages} ${dependencies_to_download} + --yes --download-only install ${essential_packages} ${bootstrap_extra_packages} ${local_package_files} deb_files=$(ls "${rootfs}${apt_archives}"/*.deb) @@ -327,6 +331,15 @@ rm -f /var/lib/dpkg/info/libpam-runtime.prerm 2>/dev/null || true # Recursive dependency resolver: prints the transitive closure of installed # Depends/Pre-Depends starting from the given space-separated package list. +# +# Handles three tricky cases that a naive dpkg-query+sed pipeline gets wrong: +# - Alternatives ("pkg-a | pkg-b"): walk every option, keep whichever is +# installed (rather than blindly picking the first). +# - Virtual packages (Provides): if a dep name is not an installed package +# itself, look up installed packages whose Provides field lists it and +# keep those instead. Without this, virtual deps like "awk" silently +# drop their real provider (mawk/gawk) from the keep set. +# - Multi-arch qualifiers (":amd64"): stripped so the name matches. resolve_deps() { queue="$1" resolved="" @@ -338,17 +351,47 @@ resolve_deps() { resolved="${resolved} ${pkg}" + # Strip version constraints "(>= 1.0)", arch restrictions "[amd64]", + # whitespace, and multi-arch qualifiers ":amd64". Keep alternatives + # ("|") as-is; they are split below. deps=$(dpkg-query -W -f='${Depends}\n${Pre-Depends}\n' "${pkg}" 2>/dev/null \ - | tr ',' '\n' | sed 's/([^)]*)//g; s/|.*//; s/ //g; s/:.*//g' | grep -v '^$' | sort -u) - - for dep in ${deps}; do - if ! dpkg -s "${dep}" 2>/dev/null | grep -q '^Status: install ok installed'; then - continue - fi - if echo "${resolved}" | grep -qw "${dep}"; then - continue - fi - queue=$(printf '%s\n%s' "${queue}" "${dep}") + | tr ',' '\n' | sed 's/([^)]*)//g; s/\[[^]]*\]//g; s/[[:space:]]//g; s/:[a-z0-9-]*//g' | grep -v '^$' | sort -u) + + for dep_alt in ${deps}; do + # Walk all alternatives ("pkg-a|pkg-b") so we don't lose track + # of whichever option is actually installed. + for dep in $(echo "${dep_alt}" | tr '|' ' '); do + if [ -z "${dep}" ] || echo "${resolved}" | grep -qw "${dep}"; then + continue + fi + + # Real, directly-installed package. + if dpkg -s "${dep}" 2>/dev/null | grep -q '^Status: install ok installed'; then + queue=$(printf '%s\n%s' "${queue}" "${dep}") + continue + fi + + # Virtual package: find installed packages whose Provides + # field lists this name (with optional version constraint + # stripped) and keep them. + providers=$(dpkg-query -W -f='${Package}|${Provides}\n' 2>/dev/null \ + | awk -F'|' -v want="${dep}" ' + $2 == "" { next } + { + n = split($2, list, ",") + for (i = 1; i <= n; i++) { + name = list[i] + sub(/\(.*$/, "", name) + gsub(/^[ \t]+|[ \t]+$/, "", name) + if (name == want) { print $1; next } + } + } + ') + for prov in ${providers}; do + if echo "${resolved}" | grep -qw "${prov}"; then continue; fi + queue=$(printf '%s\n%s' "${queue}" "${prov}") + done + done done done echo "${resolved}" diff --git a/test/linux_target_test.go b/test/linux_target_test.go index e269afa04..cbb49fdcd 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -949,6 +949,179 @@ EOF "expected squashed image to have at most 1 layer, got %d", len(img.RootFS.DiffIDs)) }) }) + + // These tests exercise the bootstrap install script's lossy + // extraction of the spec-built .deb's `Depends:` field. The + // extraction pipeline strips version constraints, picks only the + // first option of any `pkg-a | pkg-b` alternative, does not parse + // arch restrictions, and never reads `Pre-Depends:`. The cases + // below pin down whether each of those simplifications actually + // causes user-visible failures end-to-end. + t.Run("bootstrap_dependency_extraction", func(t *testing.T) { + t.Parallel() + target := testConfig.Target.MinimalContainer + + t.Run("loose_version_constraint_resolves", func(t *testing.T) { + // The spec's Depends becomes `curl (>= 0.0.1)`. The + // bootstrap strips it to `curl` before invoking apt. + // apt installs the latest curl, which trivially satisfies + // the original constraint, so the end-to-end result is + // correct — the lossy extraction is benign here. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "curl is installed and runnable", + Files: map[string]dalec.FileCheckOutput{ + "/usr/bin/curl": {}, + }, + }, + }, + }) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "curl": {Version: []string{">= 0.0.1"}}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("multiple_version_constraints_dedupe", func(t *testing.T) { + // `Runtime: curl: { version: [">= 7", "<< 99"] }` makes + // dalec emit two comma-separated entries in Depends: + // `curl (>= 7), curl (<< 99)`. The bootstrap pipeline + // must dedupe both back to a single `curl` so the apt + // invocation does not repeat the name. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "curl is installed exactly once", + Files: map[string]dalec.FileCheckOutput{ + "/usr/bin/curl": {}, + }, + }, + }, + }) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "curl": {Version: []string{">= 7", "<< 99"}}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("unsatisfiable_version_constraint_fails_build", func(t *testing.T) { + // `curl (>= 99.0.0)` is impossible to satisfy from any + // real Debian/Ubuntu archive. The bootstrap passes the + // spec .deb path directly to apt-get install, which + // reads the constraint from the .deb's control file and + // refuses to find an installation plan — failing the + // build at the bootstrap step. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "curl": {Version: []string{">= 99.0.0"}}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + sr.Evaluate = true + _, err := gwc.Solve(ctx, sr) + assert.Assert(t, err != nil, "expected the bootstrap to fail because curl >= 99.0.0 is unsatisfiable") + }) + }) + + t.Run("runtime_dep_with_pre_depends_resolves_transitively", func(t *testing.T) { + // `apt` itself declares Pre-Depends on a handful of libs + // (libgcc-s1, libstdc++6 etc.). dalec never writes a + // Pre-Depends field into its own .deb — only Depends — + // so the bootstrap script never sees Pre-Depends in the + // spec package. The reviewer asked whether this is a + // real-world problem. + // + // In practice it is not, because the bootstrap hands the + // extracted package names to `apt-get install`, and apt + // is the one that recursively resolves Pre-Depends of + // any package it pulls in. This test verifies that path: + // a spec runtime-depending on `apt` builds cleanly and + // the resulting container has apt installed (which means + // its Pre-Depends were resolved correctly). + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "apt is installed (pre-depends resolved)", + Files: map[string]dalec.FileCheckOutput{ + "/usr/bin/apt": {}, + "/usr/bin/apt-get": {}, + }, + }, + }, + }) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "apt": {}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + solveT(ctx, t, gwc, sr) + }) + }) + + t.Run("virtual_package_runtime_dep_resolves", func(t *testing.T) { + // `awk` is a virtual package on Debian/Ubuntu, provided + // by `mawk`, `gawk`, and `original-awk`. apt resolves + // it via Provides when the bootstrap hands it the spec + // .deb path (rather than the bare extracted name). + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Tests: []*dalec.TestSpec{ + { + Name: "awk is installed via a provider", + Steps: []dalec.TestStep{ + { + Command: "/usr/bin/awk 'BEGIN{print \"ok\"}'", + Stdout: dalec.CheckOutput{Contains: []string{"ok"}}, + }, + }, + }, + }, + }) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "awk": {}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + solveT(ctx, t, gwc, sr) + }) + }) + }) }) t.Run("depsonly", func(t *testing.T) { From 630952a52c102e801f58e66169e5b017269bd5fd Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Thu, 14 May 2026 09:24:51 +0200 Subject: [PATCH 4/6] wip: keep systemd directories Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 114 +++++++++++++++++++++----- test/linux_target_test.go | 30 +++++++ 2 files changed, 124 insertions(+), 20 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index af7d2e3fd..4f53823df 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -408,6 +408,16 @@ done # in which case we keep them and their deps. keep_set=$(resolve_deps "$(echo ${keep_set} | tr ' ' '\n')") +# Persist the keep set so the worker dpkg-remove step (which runs against +# /target using --root=) can validate that every keep-set package is still +# installed after both purge passes. We can't reliably validate this from +# inside the container: purge_last may remove dpkg's own shared libraries +# (libmd, libc, libpam...) before we get to run dpkg-query, producing +# false "missing" verdicts. The worker has its own working dpkg + libs and +# can safely query the target rootfs via --root=/target. +mkdir -p /var/lib/dpkg +printf '%s\n' ${keep_set} > /var/lib/dpkg/.dalec-keep-set + # purge_last: cleanup tools (+ their deps) not in the keep set. These # survive the main purge so they remain available for it, then get purged # at the very end. @@ -454,7 +464,6 @@ fi # Remove leftover directories (after dpkg purge so maintainer scripts still work). cleanup_dirs=" /etc/apt -/etc/systemd /usr/lib/apt /usr/share/bash-completion /usr/share/bug @@ -465,10 +474,36 @@ cleanup_dirs=" /var/cache/debconf /var/lib/apt /var/lib/pam -/var/lib/systemd /var/log " +# Preserve /etc/systemd and /var/lib/systemd if the final image actually +# uses systemd. Otherwise, these are dead config / state directories that +# can be pruned. +# +# We check the dpkg status here (after purge_first has run) rather than +# only looking at keep_set, because keep_set tracks names from the spec +# .deb but systemd may also have arrived via a base-image dependency, +# Provides, or by being pulled in as a Recommends/Suggests. +# +# systemctl on PATH is also accepted as a pragmatic proxy: if the user +# has arranged for systemctl to be available (e.g. via a custom base +# image where systemd is set up differently), we treat the image as +# systemd-using. +keep_systemd=0 +if dpkg -s systemd 2>/dev/null | grep -q '^Status: install ok installed'; then + keep_systemd=1 +elif command -v systemctl >/dev/null 2>&1; then + keep_systemd=1 +fi + +if [ "${keep_systemd}" != "1" ]; then + cleanup_dirs="${cleanup_dirs} +/etc/systemd +/var/lib/systemd +" +fi + if [ "${DALEC_HAS_DOCS}" != "true" ]; then cleanup_dirs="${cleanup_dirs} /usr/share/doc @@ -501,18 +536,15 @@ if [ -n "${purge_last}" ]; then fi ` - // Script that runs on the worker to remove dpkg from the target rootfs. - // Using --root= lets the worker's own dpkg binary operate on the mounted rootfs - // without needing dpkg to exist inside the target. + // Script that runs on the worker to (a) optionally remove dpkg from the + // target rootfs and (b) validate the target's dpkg database after the + // in-container cleanup. Using --root= lets the worker's own dpkg binary + // operate on the mounted rootfs without depending on the target's own + // (possibly half-removed) dpkg/libraries. dpkgRemoveScript := `#!/bin/sh set -x -# Only proceed if the cleanup script signalled that dpkg should be removed. -if [ ! -f /target/var/lib/dpkg/.dalec-remove-dpkg ]; then - echo "dpkg is a runtime dependency, skipping removal" - exit 0 -fi -rm -f /target/var/lib/dpkg/.dalec-remove-dpkg +keep_set_file=/target/var/lib/dpkg/.dalec-keep-set # --force-remove-protected was added in dpkg 1.20.6; older releases don't # recognize it. The worker's dpkg may differ from the target's, so probe it. @@ -521,15 +553,57 @@ if dpkg --force-help 2>/dev/null | grep -qw remove-protected; then force_remove_protected="--force-remove-protected" fi -# Remove dpkg and any leftover packages from the target rootfs using the -# worker's dpkg binary. Use --purge to clean config-files entries too. -# /var/lib/dpkg/status is preserved because dpkg only removes files it owns, -# not the status database itself. -for pkg in $(dpkg --root=/target -l 2>/dev/null | awk '/^[irpu]/ && !/^ii/ {print $2}' || true); do - dpkg --root=/target --purge --force-depends --force-remove-essential ${force_remove_protected} "${pkg}" 2>/dev/null || true -done -if dpkg --root=/target -s dpkg 2>/dev/null | grep -q '^Status:.*installed'; then - dpkg --root=/target --purge --force-depends --force-remove-essential dpkg || true +# If the in-container cleanup signalled that dpkg should be removed, do so +# from the worker side using --root=/target. dpkg cannot purge itself from +# inside the container, hence this external step. +if [ -f /target/var/lib/dpkg/.dalec-remove-dpkg ]; then + rm -f /target/var/lib/dpkg/.dalec-remove-dpkg + + # Remove dpkg and any leftover packages from the target rootfs using + # the worker's dpkg binary. Use --purge to clean config-files entries + # too. /var/lib/dpkg/status is preserved because dpkg only removes + # files it owns, not the status database itself. + for pkg in $(dpkg --root=/target -l 2>/dev/null | awk '/^[irpu]/ && !/^ii/ {print $2}' || true); do + dpkg --root=/target --purge --force-depends --force-remove-essential ${force_remove_protected} "${pkg}" 2>/dev/null || true + done + if dpkg --root=/target -s dpkg 2>/dev/null | grep -q '^Status:.*installed'; then + dpkg --root=/target --purge --force-depends --force-remove-essential dpkg || true + fi +else + echo "dpkg is a runtime dependency, skipping removal" +fi + +# Validate the target's dpkg database now that all purges (both +# in-container and worker-side) are done. The in-container cleanup uses +# --force-depends and tolerates per-package purge failures because +# individual maintainer scripts often can't run in a stripped container. +# What we cannot tolerate is the build succeeding while: +# - dpkg --audit reports any package in an inconsistent state, or +# - any keep-set package ended up half-removed / config-failed / missing. +# Running these checks from the worker via --root=/target is reliable +# even when the target's own dpkg/libraries have just been removed. +audit_output=$(dpkg --root=/target --audit 2>/dev/null || true) +if [ -n "${audit_output}" ]; then + echo "ERROR: dpkg --audit reported inconsistent state in target rootfs:" >&2 + echo "${audit_output}" >&2 + exit 1 +fi + +if [ -f "${keep_set_file}" ]; then + while read -r pkg; do + [ -z "${pkg}" ] && continue + status=$(dpkg-query --root=/target -W -f='${db:Status-Status}\n' "${pkg}" 2>/dev/null || echo "missing") + case "${status}" in + installed) ;; + *) + echo "ERROR: keep-set package '${pkg}' is in state '${status}' after cleanup (expected 'installed')" >&2 + exit 1 + ;; + esac + done < "${keep_set_file}" + + # Remove the marker file so it does not leak into the final image. + rm -f "${keep_set_file}" fi ` diff --git a/test/linux_target_test.go b/test/linux_target_test.go index cbb49fdcd..eac7a7aa6 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -847,6 +847,36 @@ EOF }) }) + t.Run("preserves_systemd_dirs_when_systemd_is_installed", func(t *testing.T) { + // When the final image actually has the systemd package + // installed (whether requested directly or pulled in + // transitively), the cleanup script must not prune + // /etc/systemd or /var/lib/systemd — those directories + // hold unit files and runtime state required for + // systemd to function. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + spec.Dependencies = &dalec.PackageDependencies{ + Runtime: map[string]dalec.PackageConstraints{ + "systemd": {}, + }, + } + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + for _, dir := range []string{"/etc/systemd", "/var/lib/systemd"} { + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: dir}) + assert.NilError(t, err, "%s must be preserved when systemd is installed", dir) + } + }) + }) + t.Run("removes_docs_without_doc_artifacts", func(t *testing.T) { t.Parallel() ctx := startTestSpan(ctx, t) From 77065e1879b0b0fea402ce4fb5144d7c307d0a58 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Thu, 14 May 2026 11:58:41 +0200 Subject: [PATCH 5/6] wip: keep licenses Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 62 +++++++++++++++++++++-- test/linux_target_test.go | 71 ++++++++++++++++++++++++++- website/docs/container-only-builds.md | 23 +++++++++ 3 files changed, 151 insertions(+), 5 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index 4f53823df..518e93b22 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -128,7 +128,12 @@ func installPackagesInContainer(input buildContainerInput, ro []llb.RunOption) l // installation of a lot of things, including doc files. // This is mounting over that file with an empty file so that our test suite // passes (as it is looking at these files). - if !input.Spec.GetArtifacts(input.Target).HasDocs() { + // + // Licenses also install under /usr/share/doc on deb targets, so the + // excludes workaround is equally required when the spec has license + // artifacts (even if it has no docs or manpages). + artifacts := input.Spec.GetArtifacts(input.Target) + if !artifacts.HasDocs() && len(artifacts.Licenses) == 0 { return } @@ -474,7 +479,6 @@ cleanup_dirs=" /var/cache/debconf /var/lib/apt /var/lib/pam -/var/log " # Preserve /etc/systemd and /var/lib/systemd if the final image actually @@ -504,7 +508,7 @@ if [ "${keep_systemd}" != "1" ]; then " fi -if [ "${DALEC_HAS_DOCS}" != "true" ]; then +if [ "${DALEC_KEEP_USR_SHARE_DOC}" != "true" ]; then cleanup_dirs="${cleanup_dirs} /usr/share/doc /usr/share/man @@ -512,6 +516,29 @@ if [ "${DALEC_HAS_DOCS}" != "true" ]; then " fi +# Policy note: the conditional above is "all-or-nothing" for the entire +# /usr/share/doc, /usr/share/man, /usr/share/info trees. When the spec +# author opts in by declaring docs/manpages/licenses, dalec preserves +# these directories wholesale — which also retains docs and manpages +# shipped by the spec package's RUNTIME DEPENDENCIES. When the spec +# author opts out (no docs/manpages/licenses), the trees are pruned +# wholesale, taking dependency-owned docs and manpages with them. +# +# This is intentional for the minimal-container use case (the primary +# consumer of this code path): users who care about image size want all +# /usr/share/doc and /usr/share/man content gone, and users who declare +# their own docs/licenses are explicitly expressing "I want these paths +# to exist in the final image". A more granular policy (e.g. preserve +# only spec-owned files under those paths) would require either dpkg +# diversions per file or a post-install scan against a known manifest, +# both of which add significant complexity for marginal benefit in the +# minimal-image scenario. +# +# Spec authors who want to retain dependency-owned docs/manpages can +# declare any docs/manpages/licenses of their own to flip the toggle, or +# use the non-minimal container target where this cleanup does not run +# at all. + for d in ${cleanup_dirs}; do rm -rf "${d}" done @@ -534,6 +561,11 @@ fi if [ -n "${purge_last}" ]; then PATH="/tmp:${PATH}" dpkg --purge --force-depends --force-remove-essential ${force_remove_protected} ${purge_last} || true fi + +# Note: /var/log is intentionally NOT cleaned here. dpkg purges above +# write to /var/log/dpkg.log, and the subsequent worker dpkg-remove step +# may add more entries. The worker performs the final /var/log emptying +# at the very end so all log writes are captured. ` // Script that runs on the worker to (a) optionally remove dpkg from the @@ -605,6 +637,20 @@ if [ -f "${keep_set_file}" ]; then # Remove the marker file so it does not leak into the final image. rm -f "${keep_set_file}" fi + +# Empty /target/var/log but keep the directory itself. Many packages and +# runtime processes (logrotate, journald, syslog, libc's openlog(), +# application log files, etc.) expect /var/log to exist and will fail or +# crash if it is missing entirely. Removing only the contents keeps the +# disk savings while preserving the well-known mount/log point. +# +# This runs LAST — after both the in-container cleanup script's purges +# and the worker's optional dpkg-removal purges above. Both write to +# /target/var/log/dpkg.log, /target/var/log/apt/, etc., so emptying +# earlier would just see them repopulated. +if [ -d /target/var/log ]; then + find /target/var/log -mindepth 1 -delete 2>/dev/null || true +fi ` scriptSt := llb.Scratch().File(llb.Mkfile("cleanup.sh", 0o755, []byte(script)), cleanupOpts...) @@ -615,13 +661,21 @@ fi stubSt := llb.Scratch().File(llb.Mkfile("stub", 0o755, []byte("#!/bin/sh\nexit 1\n")), cleanupOpts...) // Run the main cleanup inside the container (purges everything except dpkg). + // + // DALEC_KEEP_USR_SHARE_DOC drives whether /usr/share/doc, /usr/share/man, + // and /usr/share/info are preserved. On deb targets, dalec installs + // license artifacts under /usr/share/doc//, so we must preserve + // those paths whenever the spec has docs, manpages, OR licenses. + artifacts := input.Spec.GetArtifacts(input.Target) + keepUsrShareDoc := artifacts.HasDocs() || len(artifacts.Licenses) > 0 + st = st.Run( dalec.WithConstraints(cleanupOpts...), llb.AddMount("/tmp/dalec-cleanup.sh", scriptSt, llb.SourcePath("cleanup.sh"), llb.Readonly), llb.AddMount("/tmp/dalec-spec-packages", input.SpecPackages, llb.Readonly), llb.AddMount("/tmp/diff", stubSt, llb.SourcePath("stub"), llb.Readonly), llb.AddMount("/tmp/tar", stubSt, llb.SourcePath("stub"), llb.Readonly), - llb.AddEnv("DALEC_HAS_DOCS", strconv.FormatBool(input.Spec.GetArtifacts(input.Target).HasDocs())), + llb.AddEnv("DALEC_KEEP_USR_SHARE_DOC", strconv.FormatBool(keepUsrShareDoc)), llb.Args([]string{"/usr/bin/sh", "/tmp/dalec-cleanup.sh"}), ).Root() diff --git a/test/linux_target_test.go b/test/linux_target_test.go index eac7a7aa6..4ac7a09ff 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -822,7 +822,6 @@ EOF "/var/lib/apt", "/var/lib/pam", "/var/lib/systemd", - "/var/log", } { _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: dir}) assert.ErrorContains(t, err, "no such file", "expected %s to be removed", dir) @@ -830,6 +829,34 @@ EOF }) }) + t.Run("empties_var_log_but_keeps_directory", func(t *testing.T) { + // /var/log itself must remain as a directory: packages + // and runtime processes (logrotate, journald, syslog, + // libc's openlog(), various applications) assume it + // exists and may fail or crash if it doesn't. Cleanup + // should empty its contents but never remove the + // directory entry. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{}) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + stat, err := ref.StatFile(ctx, gwclient.StatRequest{Path: "/var/log"}) + assert.NilError(t, err, "/var/log directory must be preserved") + assert.Assert(t, stat.IsDir(), "/var/log must remain a directory, got mode %o", stat.Mode) + + entries, err := ref.ReadDir(ctx, gwclient.ReadDirRequest{Path: "/var/log"}) + assert.NilError(t, err) + assert.Equal(t, len(entries), 0, "/var/log should be empty after cleanup, got %d entries", len(entries)) + }) + }) + t.Run("preserves_dpkg_status", func(t *testing.T) { t.Parallel() ctx := startTestSpan(ctx, t) @@ -929,6 +956,48 @@ EOF }) }) + t.Run("preserves_usr_share_doc_with_only_license_artifacts", func(t *testing.T) { + // On deb targets, dalec installs license artifacts under + // /usr/share/doc//. A spec that declares licenses + // but no docs or manpages must still keep /usr/share/doc + // — otherwise the license files get swept away by the + // cleanup pass and the resulting image ships without + // the legally required attribution. + t.Parallel() + ctx := startTestSpan(ctx, t) + + spec := testLinuxSpec(t, dalec.Spec{ + Sources: map[string]dalec.Source{ + "LICENSE": { + Inline: &dalec.SourceInline{ + File: &dalec.SourceInlineFile{ + Contents: "MIT-licensed\n", + }, + }, + }, + }, + Artifacts: dalec.Artifacts{ + Licenses: map[string]dalec.ArtifactConfig{ + "LICENSE": {}, + }, + }, + }) + + testEnv.RunTest(ctx, t, func(ctx context.Context, gwc gwclient.Client) { + sr := newSolveRequest(withSpec(ctx, t, &spec), withBuildTarget(target)) + res := solveT(ctx, t, gwc, sr) + ref, err := res.SingleRef() + assert.NilError(t, err) + + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc"}) + assert.NilError(t, err, "/usr/share/doc must be preserved when spec has license artifacts (deb installs licenses under /usr/share/doc//)") + + // Spot-check that the license file itself made it into the final image. + _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc/" + spec.Name + "/LICENSE"}) + assert.NilError(t, err, "license artifact must survive cleanup") + }) + }) + t.Run("keeps_runtime_dependencies_functional", func(t *testing.T) { t.Parallel() ctx := startTestSpan(ctx, t) diff --git a/website/docs/container-only-builds.md b/website/docs/container-only-builds.md index ca8797ac8..b934a78d3 100644 --- a/website/docs/container-only-builds.md +++ b/website/docs/container-only-builds.md @@ -50,3 +50,26 @@ docker build -t my-minimal-image:0.1.0 --build-arg BUILDKIT_SYNTAX=ghcr.io/proje ``` ::: + +## Cleanup policy for Debian/Ubuntu minimal images + +Debian- and Ubuntu-based minimal container targets (for example `trixie/container`, `bookworm/container`, `noble/container`) run a post-install cleanup pass that strips the image down to just what the spec needs. The policy is intentionally aggressive and **all-or-nothing per directory tree**: + +- **`/usr/share/doc`, `/usr/share/man`, `/usr/share/info`** — preserved *only* when the spec declares at least one [`docs`](artifacts.md#docs), [`manpages`](artifacts.md#manpages), or [`licenses`](artifacts.md#licenses) artifact. If preserved, **all** dependency-owned content under those paths is also retained (the cleanup does not attempt to filter dependency files). If pruned, **all** content under those paths is removed, including any manpages or `copyright` files shipped by runtime dependencies. +- **`/etc/systemd`, `/var/lib/systemd`** — preserved only when the final image actually has the `systemd` package installed (or `systemctl` on `PATH`). +- **`/var/log`** — the directory itself is always preserved, but its contents are emptied. Many runtime processes assume `/var/log` exists. +- **Package manager state and caches** (`/etc/apt`, `/var/cache/apt`, `/var/lib/apt`, `/usr/lib/apt`, `/var/lib/pam`, `/var/cache/debconf`, `/usr/share/{bash-completion,bug,debconf,lintian,locale}`) — always removed. + +### Implication for spec authors + +If you want the final image to ship with manpages, the changelog, or copyright files for **any** package — your own or a dependency's — declare at least one `docs`, `manpages`, or `licenses` artifact in your spec. A single license file is enough to flip the toggle: + +```yaml +artifacts: + licenses: + LICENSE: +``` + +This preserves `/usr/share/doc` wholesale, so dependency-shipped `copyright` files (and any other dependency-owned content under the doc/man/info trees) will also remain in the image. + +If you specifically want a leaner image with no dependency-owned docs but you need your own license files present, that is still the result of declaring `licenses` — the cleanup script does not currently support a more granular policy. If you need full control over which files are kept, use the non-minimal container target instead (for example `trixie/testing/container`), where this cleanup pass does not run. From 654fcb056fe965fde30ea464d83ec6c988d884e1 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 15 May 2026 14:32:12 +0200 Subject: [PATCH 6/6] wip; remaining fixes Signed-off-by: Mateusz Gozdek --- targets/linux/deb/distro/container.go | 154 +++++++++++++++++++++++--- test/linux_target_test.go | 42 +++++-- 2 files changed, 173 insertions(+), 23 deletions(-) diff --git a/targets/linux/deb/distro/container.go b/targets/linux/deb/distro/container.go index 518e93b22..40015c4af 100644 --- a/targets/linux/deb/distro/container.go +++ b/targets/linux/deb/distro/container.go @@ -328,9 +328,26 @@ func cleanupBootstrapContainer(st llb.State, input buildContainerInput, opts ... cleanupOpts := append(opts, dalec.ProgressGroup("Cleanup Bootstrap Container")) script := `#!/bin/sh - set -x +# Append /tmp to PATH so the no-op diff/tar stubs mounted there by the +# Go caller (see stubSt below) are picked up by any maintainer script +# that calls 'diff' or 'tar' without an absolute path AFTER the real +# tools have been purged. These stubs need to be reachable for BOTH +# purge passes (purge_first and purge_last) — the first pass purges +# most of the system and frequently triggers prerm/postrm scripts +# that exec diff or tar; if those tools have already been removed and +# the stubs are not on PATH, the maintainer scripts crash and leave +# packages in an inconsistent state. +# +# IMPORTANT: /tmp is APPENDED, not prepended. dpkg-deb internally +# execs 'tar' to read .deb control archives, so prepending /tmp would +# make every dpkg-deb invocation in this script (including the +# keep_set seeding right below) fail with 'tar subprocess returned +# error exit status 1' and leave us with an empty keep_set. +PATH="${PATH}:/tmp" +export PATH + # Remove problematic maintainer scripts that cause infinite loops during purge. rm -f /var/lib/dpkg/info/libpam-runtime.prerm 2>/dev/null || true @@ -403,16 +420,59 @@ resolve_deps() { } # Packages from the user's spec — the starting point of the keep set. +# +# Seed with TWO sources, then take the transitive closure of both: +# +# 1. The spec package names themselves (read from each .deb's Package +# field). resolve_deps will walk their dpkg-query Depends and find +# everything they require post-install. +# +# 2. The Depends + Pre-Depends fields read directly from each spec +# .deb's control data. This is the critical safety net: if +# anything goes wrong with the spec package's installed state in +# dpkg's database (e.g. half-configured, missing entirely from +# the status DB, or dpkg-query returning empty Depends for any +# reason), resolve_deps walking only from the package name would +# drop the user's runtime deps from the keep set and they'd be +# purged. Reading Depends straight from the .deb sidesteps any +# installed-state pathologies. keep_set="" for f in $(ls /tmp/dalec-spec-packages/*.deb 2>/dev/null); do keep_set="${keep_set} $(dpkg-deb -f "${f}" Package)" + + # Pull the runtime Depends + Pre-Depends from the .deb control + # itself and normalize them the same way resolve_deps normalizes + # dpkg-query output (strip version constraints, arch restrictions, + # whitespace, multi-arch qualifiers; keep '|' alternatives so they + # are split into individual names below). + raw_deps=$(dpkg-deb -f "${f}" Depends Pre-Depends 2>/dev/null \ + | sed 's/^[A-Za-z-]*: *//' \ + | tr ',' '\n' \ + | sed 's/([^)]*)//g; s/\[[^]]*\]//g; s/[[:space:]]//g; s/:[a-z0-9-]*//g' \ + | grep -v '^$' | sort -u) + for dep_alt in ${raw_deps}; do + for dep in $(echo "${dep_alt}" | tr '|' ' '); do + [ -z "${dep}" ] && continue + keep_set="${keep_set} ${dep}" + done + done done -# Full transitive closure of spec packages. Cleanup tools end up here only -# if a spec package actually depends on them (directly or transitively), -# in which case we keep them and their deps. +# Full transitive closure of the seed set. Cleanup tools end up here +# only if a spec package actually depends on them (directly or +# transitively), in which case we keep them and their deps. keep_set=$(resolve_deps "$(echo ${keep_set} | tr ' ' '\n')") +# Surface the resolved keep set in build logs for diagnostic purposes. +# A small keep_set (just the spec package itself, no transitive deps) +# is a legitimate outcome for specs that declare no runtime deps and +# whose generated .deb's Depends field is empty — those builds intend +# the cleanup to purge everything except the spec package. We log the +# resolved set so unexpected smallness is at least visible to anyone +# triaging "the binary I expected is missing" issues, but we do NOT +# fail the build here. +echo "DALEC keep_set (resolved): ${keep_set}" + # Persist the keep set so the worker dpkg-remove step (which runs against # /target using --root=) can validate that every keep-set package is still # installed after both purge passes. We can't reliably validate this from @@ -420,8 +480,21 @@ keep_set=$(resolve_deps "$(echo ${keep_set} | tr ' ' '\n')") # (libmd, libc, libpam...) before we get to run dpkg-query, producing # false "missing" verdicts. The worker has its own working dpkg + libs and # can safely query the target rootfs via --root=/target. +# +# Only the names that correspond to actually-installed packages are +# persisted. The seed includes the raw Depends names (e.g. 'awk') which +# resolve_deps then follows via Provides to a real installed package +# (e.g. 'mawk'); both end up in ${keep_set}, but the worker can only +# audit the latter — dpkg-query against the virtual name 'awk' would +# return 'not-installed' and produce a false-positive validation +# failure even though the build is correct. mkdir -p /var/lib/dpkg -printf '%s\n' ${keep_set} > /var/lib/dpkg/.dalec-keep-set +: > /var/lib/dpkg/.dalec-keep-set +for pkg in ${keep_set}; do + if dpkg-query -W -f='${db:Status-Status}\n' "${pkg}" 2>/dev/null | grep -qx installed; then + printf '%s\n' "${pkg}" >> /var/lib/dpkg/.dalec-keep-set + fi +done # purge_last: cleanup tools (+ their deps) not in the keep set. These # survive the main purge so they remain available for it, then get purged @@ -432,7 +505,13 @@ purge_last="" # maintainer scripts, etc.) but not necessarily wanted in the final image. # If a spec package transitively depends on any of these, it (and its full # dependency tree) stays in the keep set; otherwise it gets purged at the end. -for pkg in dpkg dash coreutils base-files libc-bin grep; do +# +# findutils provides /usr/bin/find and /usr/bin/xargs, which many +# packages' prerm/postrm scripts shell out to during the first purge +# pass (e.g. libstdc++6's prerm uses 'find … | xargs' to clear ld.so +# cache entries). Purging findutils early causes those scripts to +# exit 127 and leaves their owning packages in a half-removed state. +for pkg in dpkg dash coreutils base-files libc-bin grep findutils; do if echo "${keep_set}" | grep -qw "${pkg}"; then continue; fi # dpkg can't purge itself from inside the container; signal the worker @@ -463,6 +542,33 @@ for pkg in $(dpkg-query -W -f='${Package}\n' | sed 's/:.*//g'); do done if [ -n "${purge_first}" ]; then + # Strip prerm/postrm scripts of packages we're about to purge. + # Many of them (libpam-modules, libpam0g, anything debconf-aware) + # unconditionally exec helpers like /usr/share/debconf/frontend + # that may already have been purged earlier in this same pass — + # dpkg purges in alphabetical order, not dependency order, so + # debconf often goes away before its consumers. When that happens, + # the script returns exit 127, dpkg flags the package as failed, + # and the worker-side dpkg --audit reports the resulting + # 'config-files' state as inconsistent and fails the build. + # + # Skipping the *rm scripts is safe in this context: + # - cleanup_dirs and the final layer squash wipe whatever state + # a postrm would have unwound. + # - The packages being purged are explicitly not in the final + # image, so nothing depends on the unwind side-effects. + # + # Multi-arch packages have files named ${pkg}:${arch}.{pre,post}rm, + # which is why we use a shell glob in addition to the bare name. + for pkg in ${purge_first}; do + rm -f "/var/lib/dpkg/info/${pkg}.prerm" \ + "/var/lib/dpkg/info/${pkg}.postrm" 2>/dev/null || true + for f in /var/lib/dpkg/info/${pkg}:*.prerm \ + /var/lib/dpkg/info/${pkg}:*.postrm; do + [ -e "${f}" ] && rm -f "${f}" + done + done + dpkg --purge --force-depends --force-remove-essential ${purge_first} || true fi @@ -490,14 +596,36 @@ cleanup_dirs=" # .deb but systemd may also have arrived via a base-image dependency, # Provides, or by being pulled in as a Recommends/Suggests. # -# systemctl on PATH is also accepted as a pragmatic proxy: if the user -# has arranged for systemctl to be available (e.g. via a custom base -# image where systemd is set up differently), we treat the image as -# systemd-using. +# Detection covers three signals, in order: +# 1. The systemd PID-1 binary itself is on disk. This is the most +# reliable indicator because dpkg-deb --extract unpacks files +# regardless of whether postinst later succeeds. In stripped / +# minimal containers systemd's postinst frequently fails (no init, +# no D-Bus, can't enable units), leaving the package in +# 'half-configured' state — but the binary is present and +# /etc/systemd still matters. +# +# We deliberately do NOT check for /usr/lib/systemd/system as a +# proxy: many non-systemd packages (e2fsprogs, init-system-helpers, +# dbus, etc.) ship unit files there, so its presence does not imply +# that systemd itself is installed. +# 2. dpkg-query reports any installed-ish state for the systemd +# package (covers the half-configured case explicitly, in case the +# binary lives in a non-standard location). +# 3. systemctl on PATH — pragmatic fallback for custom base images +# where systemd is set up by other means. keep_systemd=0 -if dpkg -s systemd 2>/dev/null | grep -q '^Status: install ok installed'; then +if [ -x /usr/lib/systemd/systemd ] || [ -x /lib/systemd/systemd ]; then keep_systemd=1 -elif command -v systemctl >/dev/null 2>&1; then +fi +if [ "${keep_systemd}" != "1" ]; then + case "$(dpkg-query -W -f='${db:Status-Status}\n' systemd 2>/dev/null)" in + installed|half-configured|triggers-awaited|triggers-pending) + keep_systemd=1 + ;; + esac +fi +if [ "${keep_systemd}" != "1" ] && command -v systemctl >/dev/null 2>&1; then keep_systemd=1 fi @@ -559,7 +687,7 @@ if dpkg --force-help 2>/dev/null | grep -qw remove-protected; then fi if [ -n "${purge_last}" ]; then - PATH="/tmp:${PATH}" dpkg --purge --force-depends --force-remove-essential ${force_remove_protected} ${purge_last} || true + dpkg --purge --force-depends --force-remove-essential ${force_remove_protected} ${purge_last} || true fi # Note: /var/log is intentionally NOT cleaned here. dpkg purges above diff --git a/test/linux_target_test.go b/test/linux_target_test.go index 4ac7a09ff..dbd970734 100644 --- a/test/linux_target_test.go +++ b/test/linux_target_test.go @@ -928,12 +928,13 @@ EOF t.Parallel() ctx := startTestSpan(ctx, t) + const readmeContents = "hello docs" spec := testLinuxSpec(t, dalec.Spec{ Sources: map[string]dalec.Source{ "README": { Inline: &dalec.SourceInline{ File: &dalec.SourceInlineFile{ - Contents: "hello docs", + Contents: readmeContents, }, }, }, @@ -951,8 +952,15 @@ EOF ref, err := res.SingleRef() assert.NilError(t, err) - _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc"}) - assert.NilError(t, err, "/usr/share/doc should be preserved when spec has doc artifacts") + // Verify the spec-declared doc artifact survives cleanup + // in its expected on-disk location with intact contents. + // A bare StatFile on /usr/share/doc would pass even if + // cleanup accidentally emptied the directory but left + // the mountpoint behind. + docPath := "/usr/share/doc/" + spec.Name + "/README" + got, err := ref.ReadFile(ctx, gwclient.ReadRequest{Filename: docPath}) + assert.NilError(t, err, "spec doc artifact must be present at %s after cleanup", docPath) + assert.Equal(t, string(got), readmeContents, "spec doc artifact contents must be intact at %s", docPath) }) }) @@ -966,12 +974,13 @@ EOF t.Parallel() ctx := startTestSpan(ctx, t) + const licenseContents = "MIT-licensed\n" spec := testLinuxSpec(t, dalec.Spec{ Sources: map[string]dalec.Source{ "LICENSE": { Inline: &dalec.SourceInline{ File: &dalec.SourceInlineFile{ - Contents: "MIT-licensed\n", + Contents: licenseContents, }, }, }, @@ -989,12 +998,14 @@ EOF ref, err := res.SingleRef() assert.NilError(t, err) - _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc"}) - assert.NilError(t, err, "/usr/share/doc must be preserved when spec has license artifacts (deb installs licenses under /usr/share/doc//)") - - // Spot-check that the license file itself made it into the final image. - _, err = ref.StatFile(ctx, gwclient.StatRequest{Path: "/usr/share/doc/" + spec.Name + "/LICENSE"}) - assert.NilError(t, err, "license artifact must survive cleanup") + // Verify the actual license artifact survives cleanup + // with intact contents. Checking only that + // /usr/share/doc exists would pass even if cleanup + // emptied the directory but kept the mountpoint. + licensePath := "/usr/share/doc/" + spec.Name + "/LICENSE" + got, err := ref.ReadFile(ctx, gwclient.ReadRequest{Filename: licensePath}) + assert.NilError(t, err, "license artifact must survive cleanup at %s", licensePath) + assert.Equal(t, string(got), licenseContents, "license artifact contents must be intact at %s", licensePath) }) }) @@ -1002,6 +1013,17 @@ EOF t.Parallel() ctx := startTestSpan(ctx, t) + // Asserts that spec-declared runtime deps survive the + // cleanup pass as files on disk. + // + // End-to-end exec validation of a runtime dep is + // covered by the sibling + // virtual_package_runtime_dep_resolves test, which + // actually runs /usr/bin/awk after cleanup and checks + // its stdout. That test catches missing dynamic + // linkers, broken shared library closures, and other + // runtime-only regressions that a bare StatFile check + // would silently miss. spec := testLinuxSpec(t, dalec.Spec{ Tests: []*dalec.TestSpec{ {