Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions internal/cli/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ func buildCmd() *cobra.Command {
var includePaths []string
var ignoreSignatures bool
var sizeLimits options.SizeLimits
var extraPythonPackages []string
var extraPythonIndexes []string

cmd := &cobra.Command{
Use: "build",
Expand Down Expand Up @@ -119,6 +121,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin
build.WithIncludePaths(includePaths),
build.WithIgnoreSignatures(ignoreSignatures),
build.WithSizeLimits(sizeLimits),
build.WithExtraEcosystemPackages("python", extraPythonPackages),
build.WithExtraEcosystemIndexes("python", extraPythonIndexes),
)
},
}
Expand All @@ -139,6 +143,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin
cmd.Flags().StringVar(&lockfile, "lockfile", "", "a path to .lock.json file (e.g. produced by apko lock) that constraints versions of packages to the listed ones (default '' means no additional constraints)")
cmd.Flags().StringSliceVar(&includePaths, "include-paths", []string{}, "Additional include paths where to look for input files (config, base image, etc.). By default apko will search for paths only in workdir. Include paths may be absolute, or relative. Relative paths are interpreted relative to workdir. For adding extra paths for packages, use --repository-append.")
cmd.Flags().BoolVar(&ignoreSignatures, "ignore-signatures", false, "ignore repository signature verification")
cmd.Flags().StringSliceVar(&extraPythonPackages, "ecosystem-python-package-append", []string{}, "extra Python packages to include (e.g., flask==3.0.0)")
cmd.Flags().StringSliceVar(&extraPythonIndexes, "ecosystem-python-index-append", []string{}, "extra Python package index URLs to use")
addClientLimitFlags(cmd, &sizeLimits)
return cmd
}
Expand Down
26 changes: 26 additions & 0 deletions internal/cli/lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
apkfs "chainguard.dev/apko/pkg/apk/fs"
"chainguard.dev/apko/pkg/build"
"chainguard.dev/apko/pkg/build/types"
"chainguard.dev/apko/pkg/ecosystem"
_ "chainguard.dev/apko/pkg/ecosystem/python"
pkglock "chainguard.dev/apko/pkg/lock"
)

Expand Down Expand Up @@ -245,6 +247,30 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt
}
}

// Resolve ecosystem packages
for name, ecoConfig := range ic.Contents.Ecosystems {
installer, ok := ecosystem.Get(name)
if !ok {
return fmt.Errorf("unknown ecosystem: %s", name)
}
for _, arch := range archs {
resolved, err := installer.Resolve(ctx, ecoConfig, arch, "glibc", auth.DefaultAuthenticators)
if err != nil {
return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err)
}
for _, pkg := range resolved {
lock.Contents.EcosystemPackages = append(lock.Contents.EcosystemPackages, pkglock.LockEcosystemPkg{
Ecosystem: pkg.Ecosystem,
Name: pkg.Name,
Version: pkg.Version,
URL: pkg.URL,
Checksum: pkg.Checksum,
Architecture: arch.ToAPK(),
})
}
}
}

// Sort keyrings by name for reproducible lock files
sort.Slice(lock.Contents.Keyrings, func(i, j int) bool {
return lock.Contents.Keyrings[i].Name < lock.Contents.Keyrings[j].Name
Expand Down
6 changes: 6 additions & 0 deletions pkg/build/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
apkfs "chainguard.dev/apko/pkg/apk/fs"
"chainguard.dev/apko/pkg/baseimg"
"chainguard.dev/apko/pkg/build/types"
"chainguard.dev/apko/pkg/ecosystem"
"chainguard.dev/apko/pkg/options"
"chainguard.dev/apko/pkg/paths"
"chainguard.dev/apko/pkg/s6"
Expand All @@ -64,6 +65,11 @@ type Context struct {
fs apkfs.FullFS
apk *apk.APK
baseimg *baseimg.BaseImage

// ecosystemPkgs holds resolved ecosystem packages with InstalledSize
// populated after installation. Used by buildLayers to create
// separate layers for ecosystem packages.
ecosystemPkgs []ecosystem.ResolvedPackage
}

func (bc *Context) Summarize(ctx context.Context) {
Expand Down
19 changes: 19 additions & 0 deletions pkg/build/build_implementation.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"encoding/json"
"fmt"
"io"
"maps"
"os"
"path/filepath"
"runtime"
Expand All @@ -36,6 +37,8 @@ import (
ldsocache "chainguard.dev/apko/internal/ldso-cache"
"chainguard.dev/apko/pkg/apk/apk"
apkfs "chainguard.dev/apko/pkg/apk/fs"
"chainguard.dev/apko/pkg/ecosystem"
_ "chainguard.dev/apko/pkg/ecosystem/python" // Register python ecosystem installer.
"chainguard.dev/apko/pkg/lock"
"chainguard.dev/apko/pkg/options"
)
Expand Down Expand Up @@ -177,6 +180,22 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error)
}
}

// Install ecosystem packages (python, etc.) after APK packages so that
// the language runtime is available for version detection.
if len(bc.ic.Contents.Ecosystems) > 0 {
env, ecoPkgs, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch, bc.o.Auth)
if err != nil {
return nil, fmt.Errorf("installing ecosystem packages: %w", err)
}
bc.ecosystemPkgs = ecoPkgs
if len(env) > 0 {
if bc.ic.Environment == nil {
bc.ic.Environment = make(map[string]string)
}
maps.Copy(bc.ic.Environment, env)
}
}

// For now adding additional accounts is banned when using base image. On the other hand, we don't want to
// wipe out the users set in base.
// If one wants to add a support for adding additional users they would need to look into this piece of code.
Expand Down
95 changes: 74 additions & 21 deletions pkg/build/layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"os"
"path"
"slices"
"strings"

"chainguard.dev/apko/pkg/apk/apk"
apkfs "chainguard.dev/apko/pkg/apk/fs"
Expand All @@ -50,10 +51,10 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) {
return nil, fmt.Errorf("building filesystem: %w", err)
}

pkgs := make([]*apk.Package, 0, len(diffs))
apkPkgs := make([]*apk.Package, 0, len(diffs))
pkgToDiff := map[*apk.Package][]byte{}
for _, pkgDiff := range diffs {
pkgs = append(pkgs, pkgDiff.Package)
apkPkgs = append(apkPkgs, pkgDiff.Package)
pkgToDiff[pkgDiff.Package] = pkgDiff.Diff
}

Expand All @@ -69,11 +70,28 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) {
return nil, err
}

// Use our layering strategy to partition packages into a set of Budget groups.
groups, err := groupByOriginAndSize(pkgs, bc.ic.Layering.Budget)
// Group APK packages by origin/replaces.
apkGroups, err := groupAPKByOrigin(apkPkgs)
if err != nil {
return nil, fmt.Errorf("grouping packages: %w", err)
return nil, fmt.Errorf("grouping apk packages: %w", err)
}

// Create a separate group for each ecosystem package.
// Each gets its own group since ecosystem packages are independently versioned
// and don't have APK concepts like origin or replaces.
ecoGroups := make([]*group, 0, len(bc.ecosystemPkgs))
for _, ep := range bc.ecosystemPkgs {
owner := ep.OwnerName()
ecoGroups = append(ecoGroups, &group{
owners: []string{owner},
size: ep.InstalledSize,
tiebreaker: owner,
})
}

// Combine all groups and apply the shared budget.
apkGroups = append(apkGroups, ecoGroups...)
groups := applyBudget(apkGroups, bc.ic.Layering.Budget)
log.Infof("Building %d layers with budget %d", len(groups), bc.ic.Layering.Budget)

for i, g := range groups {
Expand All @@ -82,6 +100,13 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) {
for _, pkg := range g.pkgs {
log.Infof(" - %s=%s", pkg.Name, pkg.Version)
}
for _, owner := range g.owners {
// Ecosystem owners are namespaced with a colon (e.g. "python:flask"),
// APK owners are bare package names logged above via g.pkgs.
if strings.Contains(owner, ":") {
log.Infof(" - %s", owner)
}
}
}

// Then partition that single fs.FS into multiple layers based on our layering strategy.
Expand Down Expand Up @@ -117,6 +142,16 @@ func replacesGroup(rep string, g *group) (bool, error) {
}

func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) {
groups, err := groupAPKByOrigin(pkgs)
if err != nil {
return nil, err
}
return applyBudget(groups, budget), nil
}

// groupAPKByOrigin groups APK packages by origin and merges replaces relationships.
// It populates both pkgs and owners on each group. Does not apply budget.
func groupAPKByOrigin(pkgs []*apk.Package) ([]*group, error) {
// First, we're going to group packages by their origin.
byOrigin := map[string]*group{}
for _, pkg := range pkgs {
Expand All @@ -131,6 +166,7 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) {
}

g.pkgs = append(g.pkgs, pkg)
g.owners = append(g.owners, pkg.Name)
}

// Then we need to merge any packages that replace each other.
Expand Down Expand Up @@ -189,9 +225,8 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) {
}
}

// Now we need to pick the best groups to keep.
// First pass we'll set the size of each group to the sum of the installed size of all its packages.
groups := make([]*group, 0, budget)
// Compute sizes and deduplicate groups.
groups := make([]*group, 0)
seen := map[*group]struct{}{}
for v := range maps.Values(byOrigin) {
if _, ok := seen[v]; ok {
Expand All @@ -207,7 +242,14 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) {
}
}

// Then we'll sort by the size and take the top $budget, merging the remainders.
return groups, nil
}

// applyBudget sorts groups by size descending and merges anything beyond
// the budget into a single overflow group. It also sorts owners/packages
// within each group for consistency.
func applyBudget(groups []*group, budget int) []*group {
// Sort by the size and take the top $budget, merging the remainders.
slices.SortFunc(groups, func(a, b *group) int {
return cmp.Or(
cmp.Compare(b.size, a.size), // Descending size.
Expand All @@ -223,19 +265,27 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) {
groups = append(groups, merge(remainder...))
}

// Sort packages too just so they're in a consistent order.
// Sort packages and owners for consistent order.
for _, g := range groups {
slices.SortFunc(g.pkgs, func(a, b *apk.Package) int {
return cmp.Compare(a.Name, b.Name)
})
slices.Sort(g.owners)
}

return groups, nil
return groups
}

type group struct {
// pkgs holds APK packages in this group (used for installed DB splitting).
pkgs []*apk.Package

// owners holds all owner names in this group.
// For APK packages this is the package name, for ecosystem packages
// this is the owner string (e.g. "python:flask").
// Used by splitLayers to route files to the correct layer writer.
owners []string

size uint64

// This is silly but in the event that two groups have identical size,
Expand All @@ -247,6 +297,7 @@ func merge(groups ...*group) *group {
merged := &group{}
for _, g := range groups {
merged.pkgs = slices.Concat(merged.pkgs, g.pkgs)
merged.owners = slices.Concat(merged.owners, g.owners)
merged.size += g.size
merged.tiebreaker = max(merged.tiebreaker, g.tiebreaker)
}
Expand All @@ -256,8 +307,8 @@ func merge(groups ...*group) *group {
func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToDiff map[*apk.Package][]byte, tmpdir string) ([]v1.Layer, error) {
buf := make([]byte, 1<<20)

// We'll create a writer for each layer and a map to quickly access the writer given a package or group.
packageToWriter := map[string]*layerWriter{}
// We'll create a writer for each layer and a map to quickly access the writer given an owner name or group.
ownerToWriter := map[string]*layerWriter{}
groupToWriter := map[*group]*layerWriter{}

for _, g := range groups {
Expand All @@ -270,8 +321,8 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD
w := newLayerWriter(f)
groupToWriter[g] = w

for _, pkg := range g.pkgs {
packageToWriter[pkg.Name] = w
for _, owner := range g.owners {
ownerToWriter[owner] = w
}
}

Expand Down Expand Up @@ -314,15 +365,17 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD
// By default, all files go into the top layer.
w := top

// However, if a file implements an extension interface that tells us what package owns it,
// However, if a file implements an extension interface that tells us who owns it,
// we can use that to determine which layer it belongs to (if any).
if pkger, ok := f.info.(interface {
Package() *apk.Package
// Owner() returns the APK package name for APK-installed files, or the
// ecosystem owner string (e.g. "python:flask") for ecosystem files.
if ownr, ok := f.info.(interface {
Owner() string
}); ok {
if pkg := pkger.Package(); pkg != nil {
w, ok = packageToWriter[pkg.Name]
if name := ownr.Owner(); name != "" {
w, ok = ownerToWriter[name]
if !ok {
panic(fmt.Errorf("packageToWriter[%q] missing", pkg.Name))
panic(fmt.Errorf("ownerToWriter[%q] missing", name))
}
}
}
Expand Down
Loading
Loading