Skip to content

Commit ed70eb9

Browse files
authored
Clean up Python mutator temp dir after run (#5522)
## Why Found during a full-repo review of the CLI. When `DATABRICKS_BUNDLE_TMP` is not set, the Python mutator creates a temp directory per run for the files it exchanges with the Python subprocess and never removes it. Every `bundle validate`/`deploy` on a bundle that uses Python leaves a `-python*` directory in the system temp dir, and its `input.json` contains the full serialized bundle configuration. ## Changes Before, the temp directory leaked on every run; now it is removed once the subprocess output has been consumed. `createCacheDir` returns a cleanup function alongside the directory, and the mutator defers it. When the user sets `DATABRICKS_BUNDLE_TMP`, the directory is a user-chosen location and is kept for inspection, same as before. ## Test plan - [x] New unit test `TestCreateCacheDir` in `bundle/config/mutator/python` covering both branches: temp dir removed by cleanup, `DATABRICKS_BUNDLE_TMP` dir kept - [x] `go test ./bundle/config/mutator/python/` passes - [x] Acceptance test `TestAccept/bundle/python/resolve-variable` passes for all four variants (both engines, both `databricks-bundles` versions), exercising the real subprocess flow with cleanup in place - [x] `./task fmt-q`, `./task lint-q`, `./task checks` pass This pull request and its description were written by Isaac.
1 parent caa3d62 commit ed70eb9

2 files changed

Lines changed: 44 additions & 6 deletions

File tree

bundle/config/mutator/python/python_mutator.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -238,10 +238,11 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
238238
return dyn.InvalidValue, fmt.Errorf("failed to get Python interpreter path: %w", err)
239239
}
240240

241-
cacheDir, err := createCacheDir(ctx)
241+
cacheDir, cleanup, err := createCacheDir(ctx)
242242
if err != nil {
243243
return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err)
244244
}
245+
defer cleanup()
245246

246247
rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{
247248
cacheDir: cacheDir,
@@ -315,7 +316,8 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno
315316
return mutateDiags
316317
}
317318

318-
func createCacheDir(ctx context.Context) (string, error) {
319+
// createCacheDir returns the directory for input/output files of the Python subprocess, and a cleanup function.
320+
func createCacheDir(ctx context.Context) (string, func(), error) {
319321
// b.LocalStateDir doesn't work because target isn't yet selected
320322

321323
// support the same env variable as in b.LocalStateDir
@@ -325,13 +327,20 @@ func createCacheDir(ctx context.Context) (string, error) {
325327

326328
err := os.MkdirAll(cacheDir, 0o700)
327329
if err != nil {
328-
return "", err
330+
return "", nil, err
329331
}
330332

331-
return cacheDir, nil
333+
// the user picked this location explicitly, keep the files for inspection
334+
return cacheDir, func() {}, nil
335+
}
336+
337+
cacheDir, err := os.MkdirTemp("", "-python")
338+
if err != nil {
339+
return "", nil, err
332340
}
333341

334-
return os.MkdirTemp("", "-python")
342+
// input.json contains the full serialized bundle configuration; don't leave it behind in the system temp dir
343+
return cacheDir, func() { _ = os.RemoveAll(cacheDir) }, nil
335344
}
336345

337346
func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) {

bundle/config/mutator/python/python_mutator_test.go

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020

2121
"github.com/databricks/cli/bundle"
2222
"github.com/databricks/cli/bundle/config"
23+
"github.com/databricks/cli/internal/testutil"
2324
"github.com/databricks/cli/libs/cmdio"
2425
"github.com/databricks/cli/libs/process"
2526
"github.com/stretchr/testify/assert"
@@ -471,6 +472,34 @@ func TestStrictNormalize(t *testing.T) {
471472
assert.True(t, strictDiags.HasError())
472473
}
473474

475+
func TestCreateCacheDir(t *testing.T) {
476+
testutil.CleanupEnvironment(t)
477+
478+
t.Run("DATABRICKS_BUNDLE_TMP is set", func(t *testing.T) {
479+
tempDir := t.TempDir()
480+
t.Setenv(env.TempDirVariable, tempDir)
481+
482+
cacheDir, cleanup, err := createCacheDir(t.Context())
483+
require.NoError(t, err)
484+
require.Equal(t, filepath.Join(tempDir, "default", "python"), cacheDir)
485+
486+
cleanup()
487+
488+
// user-specified directories are kept for inspection
489+
assert.DirExists(t, cacheDir)
490+
})
491+
492+
t.Run("DATABRICKS_BUNDLE_TMP is not set", func(t *testing.T) {
493+
cacheDir, cleanup, err := createCacheDir(t.Context())
494+
require.NoError(t, err)
495+
require.DirExists(t, cacheDir)
496+
497+
cleanup()
498+
499+
assert.NoDirExists(t, cacheDir)
500+
})
501+
}
502+
474503
func TestExplainProcessErr(t *testing.T) {
475504
stderr := "/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')\n"
476505
expected := `/home/test/.venv/bin/python3: Error while finding module specification for 'databricks.bundles.build' (ModuleNotFoundError: No module named 'databricks')
@@ -501,7 +530,7 @@ func withProcessStub(t *testing.T, args []string, output, diagnostics, locations
501530
t.Setenv(env.TempDirVariable, t.TempDir())
502531

503532
// after we override env variable, we always get the same cache dir as mutator
504-
cacheDir, err := createCacheDir(ctx)
533+
cacheDir, _, err := createCacheDir(ctx)
505534
require.NoError(t, err)
506535

507536
inputPath := filepath.Join(cacheDir, "input.json")

0 commit comments

Comments
 (0)