Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 127 additions & 3 deletions loader/include.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ package loader

import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"sync"

"github.com/compose-spec/compose-go/v2/dotenv"
interp "github.com/compose-spec/compose-go/v2/interpolation"
Expand All @@ -30,6 +34,114 @@ import (
"github.com/compose-spec/compose-go/v2/types"
)

// includeCache memoizes loaded include models for the duration of a single
// project load. A file reached through more than one include path (a "diamond"
// in the include graph) was previously parsed and recursively expanded once per
// path, which is quadratic-to-exponential on deep graphs. The cache parses and
// expands each distinct include only once.
//
// The key captures everything that determines the loaded model — the resolved
// file paths, the project directory, and the effective environment — so a cache
// hit is equivalent to a fresh load even when the same file is included with a
// different env_file or project_directory. Each consumer gets a fresh deep copy,
// so importResources (and later normalization) never mutates a cached entry or a
// sibling branch that shares it.
//
// Cycle-safe: an include cycle is intrinsic to a node's subtree (the back-edge
// is in the fixed set of files the node includes), so it is detected on the
// node's first load, which fails before the node can be cached.
type includeCache struct {
mu sync.Mutex
entries map[string]map[string]any
}

type includeCacheKey struct{}

// getOrCreateIncludeCache returns the include cache carried by ctx, creating one
// (and a derived context) on first use so that all sibling and descendant
// includes of a single load share it.
func getOrCreateIncludeCache(ctx context.Context) (*includeCache, context.Context) {
if c, ok := ctx.Value(includeCacheKey{}).(*includeCache); ok {
return c, ctx
}
c := &includeCache{entries: map[string]map[string]any{}}
return c, context.WithValue(ctx, includeCacheKey{}, c)
}

func (c *includeCache) get(key string) (map[string]any, bool) {
c.mu.Lock()
defer c.mu.Unlock()
if m, ok := c.entries[key]; ok {
return deepCopyMapping(m), true
}
return nil, false
}

func (c *includeCache) put(key string, model map[string]any) {
c.mu.Lock()
defer c.mu.Unlock()
c.entries[key] = deepCopyMapping(model)
}

// includeKey hashes the inputs that determine an included model. Two include
// entries with the same key load identical content — including identical
// relative paths, so a cached model is reuse-safe in the caller's context.
//
// workingDir (the relative base the included model's paths are resolved against)
// is part of the key: the same file reached through two include parents can have
// a different relative base (e.g. "a/b" vs "b"), which yields models with
// different relative paths. Keying on it avoids reusing a model whose paths the
// caller would then rebase incorrectly.
func includeKey(paths []string, workingDir, projectDir string, env types.Mapping) string {
h := sha256.New()
for _, p := range paths {
_, _ = h.Write([]byte(p))
_, _ = h.Write([]byte{0})
}
_, _ = h.Write([]byte{1})
_, _ = h.Write([]byte(workingDir))
_, _ = h.Write([]byte{1})
_, _ = h.Write([]byte(projectDir))
_, _ = h.Write([]byte{1})
keys := make([]string, 0, len(env))
for k := range env {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
_, _ = h.Write([]byte(k))
_, _ = h.Write([]byte{0})
_, _ = h.Write([]byte(env[k]))
_, _ = h.Write([]byte{0})
}
Comment on lines +97 to +116

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The single-byte separators written here ([]byte{0} on lines 99, 113, 115 and []byte{1} on lines 101, 103, 105) aren't unambiguous against env values that contain those bytes. types.Mapping is map[string]string populated from .env files /
process env, where embedded NULs aren't forbidden. Two distinct (paths, env) tuples can then produce the same byte stream and hash to the same key, wrong cached model served, no error surfaced.

Toy example: env = {"A\x00B": "X"} and env = {"A": "B\x00X"} both serialize to …A\x00B\x00X\x00 through the loop on lines 111-116.

Length-prefixing each field removes the ambiguity:

write := func(s string) {
    fmt.Fprintf(h, "%d:", len(s))
    _, _ = h.Write([]byte(s))
}
for _, p := range paths { write(p) }
write(workingDir)
write(projectDir)
for _, k := range keys { write(k); write(env[k]) }

While you're here, a short comment explaining why Substitute / TypeCastMapping are intentionally excluded from the key (invariant across includes within a single load) would help a future contributor who adds a per-subtree option not silently introduce collisions, like

// Note: Substitute and TypeCastMapping are intentionally excluded from the key.
// They are invariant across includes within a single Load (cloned unchanged from
// the top-level options at the call site above). If a future option allows them
// to vary per include, they must be folded into the key.

return hex.EncodeToString(h.Sum(nil))
}

// deepCopyMapping returns a deep copy of a generic YAML mapping (the shape of a
// not-yet-typed compose model: nested map[string]any / []any / scalars).
func deepCopyMapping(m map[string]any) map[string]any {
out := make(map[string]any, len(m))
for k, v := range m {
out[k] = deepCopyValue(v)
}
return out
}

func deepCopyValue(v any) any {
switch t := v.(type) {
case map[string]any:
return deepCopyMapping(t)
case []any:
out := make([]any, len(t))
for i, e := range t {
out[i] = deepCopyValue(e)
}
return out
default:
return v
}
}

// loadIncludeConfig parse the required config from raw yaml
func loadIncludeConfig(source any) ([]types.IncludeConfig, error) {
if source == nil {
Expand Down Expand Up @@ -57,6 +169,8 @@ func ApplyInclude(ctx context.Context, workingDir string, environment types.Mapp
return err
}

cache, ctx := getOrCreateIncludeCache(ctx)

for _, r := range includeConfig {
for _, listener := range options.Listeners {
listener("include", map[string]any{
Expand Down Expand Up @@ -151,9 +265,19 @@ func ApplyInclude(ctx context.Context, workingDir string, environment types.Mapp
LookupValue: config.LookupEnv,
TypeCastMapping: options.Interpolate.TypeCastMapping,
}
imported, err := loadYamlModel(ctx, config, loadOptions, &cycleTracker{}, included)
if err != nil {
return err
// Memoize by the inputs that determine the loaded model so a file
// reached through several include paths is parsed and expanded once.
// The merge into `model` still runs for every occurrence (a copy is
// handed out), so any same-file `extends` in the including file still
// resolves and the result is identical to loading it each time.
key := includeKey(r.Path, config.WorkingDir, r.ProjectDirectory, config.Environment)
imported, ok := cache.get(key)
if !ok {
imported, err = loadYamlModel(ctx, config, loadOptions, &cycleTracker{}, included)
if err != nil {
return err
}
cache.put(key, imported)
Comment on lines +274 to +280

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a silent change to the public Listener contract here.

When cache.get(key) returns ok at line 274, the if !ok block (lines 275-281) is skipped, so loadYamlModel(ctx, config, loadOptions, &cycleTracker{}, included) on line 276 does not run on cache hits. The extends events emitted from inside
that call path, see opts.ProcessEvent("extends", v) at loader/extends.go:76 and opts.ProcessEvent("extends", map[string]any{"service": ref}) at extends.go:79, are therefore not re-emitted for any subsequent diamond-traversal of the same included file. Compare with the include listener at include.go:175, which fires per occurrence (it's emitted in the outer loop, before the cache lookup): two events documented in the same pipeline now behave asymmetrically.

That isn't a cosmetic observability change:

  • Listener is part of the public API (loader/loader.go:109), and downstream tools wire into it via cli/options.go:83.
  • The existing test TestLoadExtendsListener at loader/extends_test.go:440 asserts assert.Equal(t, extendsCount, 3), which establishes the contract: one event per resolved extends. Diamond include topologies break that invariant after this PR,
    and silently, because no test in this repo combines diamond includes with extends listeners.
  • Any consumer counting extends events for telemetry, dependency tracking, audit, or progress reporting will see different counts depending on the include topology , exactly the kind of regression that surfaces months later as a confused user report.

I'd suggest preserving the contract by recording events on first load and replaying them on cache hit.

}
err = importResources(imported, model, processor)
if err != nil {
Expand Down
45 changes: 45 additions & 0 deletions loader/include_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package loader

import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -243,3 +244,47 @@ func createFileSubDir(t *testing.T, rootDir, subDir, content, fileName string) {
path := filepath.Join(subDirPath, fileName)
assert.NilError(t, os.WriteFile(path, []byte(content), 0o600))
}

// TestIncludeDiamondDedup builds a deep "diamond" include graph where every
// level includes the next level twice. Without include memoization the leaf is
// loaded 2^depth times (exponential); the cache loads each distinct file once.
// A depth that is trivial when deduplicated (and astronomically large when not)
// makes this both a correctness and a non-flaky performance regression test.
func TestIncludeDiamondDedup(t *testing.T) {
dir := t.TempDir()
const depth = 24 // 2^24 ~= 16.7M leaf loads without dedup
for i := 0; i < depth; i++ {
content := fmt.Sprintf("include:\n - path: ./level%d.yaml\n - path: ./level%d.yaml\n", i+1, i+1)
assert.NilError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", i)), []byte(content), 0o600))
}
leaf := "services:\n leaf:\n image: busybox\n"
assert.NilError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", depth)), []byte(leaf), 0o600))

p, err := LoadWithContext(context.TODO(), types.ConfigDetails{
WorkingDir: dir,
ConfigFiles: []types.ConfigFile{{Filename: filepath.Join(dir, "level0.yaml")}},
}, withProjectName("diamond", true))
assert.NilError(t, err)
_, err = p.GetService("leaf")
assert.NilError(t, err)
}
Comment on lines +253 to +270

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice regression test, but the only assertions are assert.NilError(t, err) on line 267 and the GetService("leaf") check on lines 268-269. If the cache regresses, the LoadWithContext call on line 263 will spin on 2^24 leaf loads and the test goroutine stays parked on that line — loader/ doesn't check ctx.Done() anywhere, so a context.WithTimeout wouldn't help. The test then hangs until the default go test timeout (10 minutes), and fails with a generic timeout message, slow CI feedback, unclear failure mode.

A fast, descriptive fail requires running the load in a goroutine and selecting on a timeout. The leaked goroutine on timeout is fine, t.Fatal ends the test and the process exits soon after.

type result struct {
    p   *types.Project
    err error
}

// LoaderWithContext don't properly support context today, so the timeout below is consumed by the
// select, not by LoadWithContext. Two separate contexts keep that intent clear.                                                                                                                                                                           
timeout, cancel := context.WithTimeout(t.Context(), 5*time.Second)                                                                                                                                                                                         
defer cancel()                                                                                                                                                                                                                                             
                                                                                                                                                                                                                                                           
done := make(chan result, 1)
go func() {                                                                                                                                                                                                                                                
    p, err := LoadWithContext(t.Context(), types.ConfigDetails{
        WorkingDir:  dir,                                                                                                                                                                                                                                  
        ConfigFiles: []types.ConfigFile{{Filename: filepath.Join(dir, "level0.yaml")}},
    }, withProjectName("diamond", true))                                                                                                                                                                                                                   
    done <- result{p, err}
}()                                                                                                                                                                                                                                                        
                                                                                                                                                                                                                                                           
select {
case r := <-done:                                                                                                                                                                                                                                          
    assert.NilError(t, r.err)
    _, err := r.p.GetService("leaf")
    assert.NilError(t, err)
case <-timeout.Done():
    t.Fatal("diamond include did not complete within 5s — cache likely not working")                                                                                                                                                                       
}

5s is generous; the current run completes in ~0.18s on my machine.


func BenchmarkIncludeDiamond(b *testing.B) {
dir := b.TempDir()
const depth = 16
for i := 0; i < depth; i++ {
content := fmt.Sprintf("include:\n - path: ./level%d.yaml\n - path: ./level%d.yaml\n", i+1, i+1)
_ = os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", i)), []byte(content), 0o600)
}
_ = os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", depth)), []byte("services:\n leaf:\n image: busybox\n"), 0o600)
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := LoadWithContext(context.TODO(), types.ConfigDetails{
WorkingDir: dir,
ConfigFiles: []types.ConfigFile{{Filename: filepath.Join(dir, "level0.yaml")}},
}, withProjectName("diamond", true))
if err != nil {
b.Fatal(err)
}
}
}