Skip to content

Commit e8f62dd

Browse files
committed
loader: memoize includes to avoid re-expanding diamond include graphs
ApplyInclude re-parses and recursively re-expands an included file once per include path that reaches it. When the same file is reached through more than one path (a "diamond" in the include graph) this is quadratic-to-exponential: a 24-level doubling graph loads the leaf 2^24 times. Monorepos that aggregate per-target / per-project compose fragments hit this in practice (an ~80-service federation took ~55s in `docker compose config`). Memoize each loaded include model for the duration of a single load, keyed on every input that determines it — resolved paths, working dir, project dir, and effective environment — and hand out a deep copy on each hit. The merge into the parent (importResources) still runs for every occurrence, so a same-file `extends` in the including file still resolves and the result is identical to loading each time; only the parse + recursive expansion is shared. Keying on the working dir matters: the same file reached through two parents can have a different relative base, yielding models with different relative paths; reusing across bases would let the caller rebase an already-resolved path. Cycle-safe: an include cycle is intrinsic to a node's subtree, so it is detected on the node's first load, before it can be cached. Adds a deep-diamond regression test (times out without the cache) and a benchmark. Signed-off-by: Davi de Castro Reis <davi@davi.eng.br>
1 parent d48021c commit e8f62dd

2 files changed

Lines changed: 172 additions & 3 deletions

File tree

loader/include.go

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@ package loader
1818

1919
import (
2020
"context"
21+
"crypto/sha256"
22+
"encoding/hex"
2123
"fmt"
2224
"os"
2325
"path/filepath"
26+
"sort"
2427
"strings"
28+
"sync"
2529

2630
"github.com/compose-spec/compose-go/v2/dotenv"
2731
interp "github.com/compose-spec/compose-go/v2/interpolation"
@@ -30,6 +34,114 @@ import (
3034
"github.com/compose-spec/compose-go/v2/types"
3135
)
3236

37+
// includeCache memoizes loaded include models for the duration of a single
38+
// project load. A file reached through more than one include path (a "diamond"
39+
// in the include graph) was previously parsed and recursively expanded once per
40+
// path, which is quadratic-to-exponential on deep graphs. The cache parses and
41+
// expands each distinct include only once.
42+
//
43+
// The key captures everything that determines the loaded model — the resolved
44+
// file paths, the project directory, and the effective environment — so a cache
45+
// hit is equivalent to a fresh load even when the same file is included with a
46+
// different env_file or project_directory. Each consumer gets a fresh deep copy,
47+
// so importResources (and later normalization) never mutates a cached entry or a
48+
// sibling branch that shares it.
49+
//
50+
// Cycle-safe: an include cycle is intrinsic to a node's subtree (the back-edge
51+
// is in the fixed set of files the node includes), so it is detected on the
52+
// node's first load, which fails before the node can be cached.
53+
type includeCache struct {
54+
mu sync.Mutex
55+
entries map[string]map[string]any
56+
}
57+
58+
type includeCacheKey struct{}
59+
60+
// getOrCreateIncludeCache returns the include cache carried by ctx, creating one
61+
// (and a derived context) on first use so that all sibling and descendant
62+
// includes of a single load share it.
63+
func getOrCreateIncludeCache(ctx context.Context) (*includeCache, context.Context) {
64+
if c, ok := ctx.Value(includeCacheKey{}).(*includeCache); ok {
65+
return c, ctx
66+
}
67+
c := &includeCache{entries: map[string]map[string]any{}}
68+
return c, context.WithValue(ctx, includeCacheKey{}, c)
69+
}
70+
71+
func (c *includeCache) get(key string) (map[string]any, bool) {
72+
c.mu.Lock()
73+
defer c.mu.Unlock()
74+
if m, ok := c.entries[key]; ok {
75+
return deepCopyMapping(m), true
76+
}
77+
return nil, false
78+
}
79+
80+
func (c *includeCache) put(key string, model map[string]any) {
81+
c.mu.Lock()
82+
defer c.mu.Unlock()
83+
c.entries[key] = deepCopyMapping(model)
84+
}
85+
86+
// includeKey hashes the inputs that determine an included model. Two include
87+
// entries with the same key load identical content — including identical
88+
// relative paths, so a cached model is reuse-safe in the caller's context.
89+
//
90+
// workingDir (the relative base the included model's paths are resolved against)
91+
// is part of the key: the same file reached through two include parents can have
92+
// a different relative base (e.g. "a/b" vs "b"), which yields models with
93+
// different relative paths. Keying on it avoids reusing a model whose paths the
94+
// caller would then rebase incorrectly.
95+
func includeKey(paths []string, workingDir, projectDir string, env types.Mapping) string {
96+
h := sha256.New()
97+
for _, p := range paths {
98+
_, _ = h.Write([]byte(p))
99+
_, _ = h.Write([]byte{0})
100+
}
101+
_, _ = h.Write([]byte{1})
102+
_, _ = h.Write([]byte(workingDir))
103+
_, _ = h.Write([]byte{1})
104+
_, _ = h.Write([]byte(projectDir))
105+
_, _ = h.Write([]byte{1})
106+
keys := make([]string, 0, len(env))
107+
for k := range env {
108+
keys = append(keys, k)
109+
}
110+
sort.Strings(keys)
111+
for _, k := range keys {
112+
_, _ = h.Write([]byte(k))
113+
_, _ = h.Write([]byte{0})
114+
_, _ = h.Write([]byte(env[k]))
115+
_, _ = h.Write([]byte{0})
116+
}
117+
return hex.EncodeToString(h.Sum(nil))
118+
}
119+
120+
// deepCopyMapping returns a deep copy of a generic YAML mapping (the shape of a
121+
// not-yet-typed compose model: nested map[string]any / []any / scalars).
122+
func deepCopyMapping(m map[string]any) map[string]any {
123+
out := make(map[string]any, len(m))
124+
for k, v := range m {
125+
out[k] = deepCopyValue(v)
126+
}
127+
return out
128+
}
129+
130+
func deepCopyValue(v any) any {
131+
switch t := v.(type) {
132+
case map[string]any:
133+
return deepCopyMapping(t)
134+
case []any:
135+
out := make([]any, len(t))
136+
for i, e := range t {
137+
out[i] = deepCopyValue(e)
138+
}
139+
return out
140+
default:
141+
return v
142+
}
143+
}
144+
33145
// loadIncludeConfig parse the required config from raw yaml
34146
func loadIncludeConfig(source any) ([]types.IncludeConfig, error) {
35147
if source == nil {
@@ -57,6 +169,8 @@ func ApplyInclude(ctx context.Context, workingDir string, environment types.Mapp
57169
return err
58170
}
59171

172+
cache, ctx := getOrCreateIncludeCache(ctx)
173+
60174
for _, r := range includeConfig {
61175
for _, listener := range options.Listeners {
62176
listener("include", map[string]any{
@@ -151,9 +265,19 @@ func ApplyInclude(ctx context.Context, workingDir string, environment types.Mapp
151265
LookupValue: config.LookupEnv,
152266
TypeCastMapping: options.Interpolate.TypeCastMapping,
153267
}
154-
imported, err := loadYamlModel(ctx, config, loadOptions, &cycleTracker{}, included)
155-
if err != nil {
156-
return err
268+
// Memoize by the inputs that determine the loaded model so a file
269+
// reached through several include paths is parsed and expanded once.
270+
// The merge into `model` still runs for every occurrence (a copy is
271+
// handed out), so any same-file `extends` in the including file still
272+
// resolves and the result is identical to loading it each time.
273+
key := includeKey(r.Path, config.WorkingDir, r.ProjectDirectory, config.Environment)
274+
imported, ok := cache.get(key)
275+
if !ok {
276+
imported, err = loadYamlModel(ctx, config, loadOptions, &cycleTracker{}, included)
277+
if err != nil {
278+
return err
279+
}
280+
cache.put(key, imported)
157281
}
158282
err = importResources(imported, model, processor)
159283
if err != nil {

loader/include_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package loader
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"os"
2223
"path/filepath"
2324
"runtime"
@@ -243,3 +244,47 @@ func createFileSubDir(t *testing.T, rootDir, subDir, content, fileName string) {
243244
path := filepath.Join(subDirPath, fileName)
244245
assert.NilError(t, os.WriteFile(path, []byte(content), 0o600))
245246
}
247+
248+
// TestIncludeDiamondDedup builds a deep "diamond" include graph where every
249+
// level includes the next level twice. Without include memoization the leaf is
250+
// loaded 2^depth times (exponential); the cache loads each distinct file once.
251+
// A depth that is trivial when deduplicated (and astronomically large when not)
252+
// makes this both a correctness and a non-flaky performance regression test.
253+
func TestIncludeDiamondDedup(t *testing.T) {
254+
dir := t.TempDir()
255+
const depth = 24 // 2^24 ~= 16.7M leaf loads without dedup
256+
for i := 0; i < depth; i++ {
257+
content := fmt.Sprintf("include:\n - path: ./level%d.yaml\n - path: ./level%d.yaml\n", i+1, i+1)
258+
assert.NilError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", i)), []byte(content), 0o600))
259+
}
260+
leaf := "services:\n leaf:\n image: busybox\n"
261+
assert.NilError(t, os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", depth)), []byte(leaf), 0o600))
262+
263+
p, err := LoadWithContext(context.TODO(), types.ConfigDetails{
264+
WorkingDir: dir,
265+
ConfigFiles: []types.ConfigFile{{Filename: filepath.Join(dir, "level0.yaml")}},
266+
}, withProjectName("diamond", true))
267+
assert.NilError(t, err)
268+
_, err = p.GetService("leaf")
269+
assert.NilError(t, err)
270+
}
271+
272+
func BenchmarkIncludeDiamond(b *testing.B) {
273+
dir := b.TempDir()
274+
const depth = 16
275+
for i := 0; i < depth; i++ {
276+
content := fmt.Sprintf("include:\n - path: ./level%d.yaml\n - path: ./level%d.yaml\n", i+1, i+1)
277+
_ = os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", i)), []byte(content), 0o600)
278+
}
279+
_ = os.WriteFile(filepath.Join(dir, fmt.Sprintf("level%d.yaml", depth)), []byte("services:\n leaf:\n image: busybox\n"), 0o600)
280+
b.ResetTimer()
281+
for n := 0; n < b.N; n++ {
282+
_, err := LoadWithContext(context.TODO(), types.ConfigDetails{
283+
WorkingDir: dir,
284+
ConfigFiles: []types.ConfigFile{{Filename: filepath.Join(dir, "level0.yaml")}},
285+
}, withProjectName("diamond", true))
286+
if err != nil {
287+
b.Fatal(err)
288+
}
289+
}
290+
}

0 commit comments

Comments
 (0)