Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
4066dd6
direct: store serialized_dashboard/serialized_space in state as conte…
shreyas-goenka Jun 15, 2026
c3ea69c
acceptance: assert state-stores-hash and API-gets-content invariants …
shreyas-goenka Jun 19, 2026
6c396a3
bundle/direct: clean up temp state file on bind compaction error paths
shreyas-goenka Jun 19, 2026
fd65c7f
acceptance: render serialized_dashboard/serialized_space state hash a…
shreyas-goenka Jun 22, 2026
f747ba4
Revert genie_spaces changes to scope this PR to dashboards
shreyas-goenka Jun 22, 2026
b6a6afa
direct: replace CompactState adapter method with a resources.yml decl…
shreyas-goenka Jun 22, 2026
3fc40ad
direct: clarify the compact-saved-state comment in CalculatePlan
shreyas-goenka Jun 22, 2026
6361754
direct: don't compact remote state in the diff; clearer hash token; i…
shreyas-goenka Jun 22, 2026
8854393
direct: make hashed_in_state orthogonal to ignore_remote_changes
shreyas-goenka Jun 22, 2026
85af285
acceptance: cover dashboard hashed-state correctness (plan->apply rou…
shreyas-goenka Jun 22, 2026
e2cc859
direct: add libs/hash and simplify hashed_in_state to field names
shreyas-goenka Jun 24, 2026
bc44376
acceptance: cover the --plan update cycle for dashboard hashed state
shreyas-goenka Jun 24, 2026
58037af
direct: pin the legacy full-content state migration invariant
shreyas-goenka Jun 24, 2026
a3c6424
direct: document the top-level-field constraint of CompactState's sha…
shreyas-goenka Jun 29, 2026
b5b7fbc
libs/hash: rename JSON -> OfJSON and drop the fingerprintToHash wrapper
shreyas-goenka Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"etag": [ETAG],
"parent_path": "/Workspace/Users/[USERNAME]/.bundle/test-bundle-[UNIQUE_NAME]/default/resources",
"published": true,
"serialized_dashboard": "{\"pages\":[{\"displayName\":\"Page One\",\"name\":\"02724bf2\"}]}",
"serialized_dashboard": "sha256:[ALPHANUMID]",
"warehouse_id": "[TEST_DEFAULT_WAREHOUSE_ID]"
}
}
Expand Down
2 changes: 1 addition & 1 deletion acceptance/bundle/migrate/dashboards/out.new_state.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"etag": "[NUMID]",
"parent_path": "/Workspace/Users/[USERNAME]",
"published": true,
"serialized_dashboard": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n",
"serialized_dashboard": "sha256:[ALPHANUMID]",
"warehouse_id": "123456"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
"serialized_dashboard": {
"action": "skip",
"reason": "etag_based",
"old": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n",
"new": "{\"pages\":[{\"name\":\"02724bf2\",\"displayName\":\"Dashboard test bundle-deploy-dashboard\"}]}\n",
"remote": "{\"pages\":[{\"displayName\":\"Dashboard test bundle-deploy-dashboard\",\"name\":\"02724bf2\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}\n"
"old": "sha256:[ALPHANUMID]",
"new": "sha256:[ALPHANUMID]",
"remote": "sha256:[ALPHANUMID]"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
"serialized_dashboard": {
"action": "skip",
"reason": "etag_based",
"old": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n",
"new": "{\n \"pages\": [\n {\n \"displayName\": \"New Page\",\n \"layout\": [\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 0\n },\n \"widget\": {\n \"name\": \"82eb9107\",\n \"textbox_spec\": \"# I'm a title\"\n }\n },\n {\n \"position\": {\n \"height\": 2,\n \"width\": 6,\n \"x\": 0,\n \"y\": 2\n },\n \"widget\": {\n \"name\": \"ffa6de4f\",\n \"textbox_spec\": \"Text\"\n }\n }\n ],\n \"name\": \"fdd21a3c\"\n }\n ]\n}\n",
"remote": "{}\n"
"old": "sha256:[ALPHANUMID]",
"new": "sha256:[ALPHANUMID]",
"remote": "sha256:[ALPHANUMID]"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
"serialized_dashboard": {
"action": "skip",
"reason": "etag_based",
"old": "{ }\n",
"new": "{ }\n",
"remote": "{}\n"
"old": "sha256:[ALPHANUMID]",
"new": "sha256:[ALPHANUMID]",
"remote": "sha256:[ALPHANUMID]"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@
"serialized_dashboard": {
"action": "skip",
"reason": "etag_based",
"old": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}",
"new": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\"}]}",
"remote": "{\"pages\":[{\"displayName\":\"Test Dashboard\",\"name\":\"test-page\",\"pageType\":\"PAGE_TYPE_CANVAS\"}]}"
"old": "sha256:[ALPHANUMID]",
"new": "sha256:[ALPHANUMID]",
"remote": "sha256:[ALPHANUMID]"
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions acceptance/bundle/test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@ New = 'os/[OS]'
[[Repls]]
Old = ' cicd/github'
New = ''

# serialized_dashboard / serialized_space are persisted in direct-engine state as a
# content hash. Collapse the 64-char digest to a single token, at a low Order so it
# runs before the generic [NUMID] and [DASHBOARD_ID] rules that would otherwise split
# the hex into pieces.
[[Repls]]
Old = 'sha256:[0-9a-f]{64}'
New = 'sha256:[ALPHANUMID]'
Order = -1
18 changes: 14 additions & 4 deletions bundle/direct/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ func (d *DeploymentUnit) Deploy(ctx context.Context, db *dstate.DeploymentState,
}
}

// saveState compacts the state (replacing fields declared hashed_in_state with content
// hashes, see dresources.CompactState) before persisting it.
func (d *DeploymentUnit) saveState(db *dstate.DeploymentState, id string, newState any) error {
compacted, err := dresources.CompactState(d.Adapter.ResourceConfig(), newState)
if err != nil {
return fmt.Errorf("compacting state: %w", err)
}
return db.SaveState(d.ResourceKey, id, compacted, d.DependsOn)
}

func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState, newState any) error {
var newID string
var remoteState any
Expand All @@ -75,7 +85,7 @@ func (d *DeploymentUnit) Create(ctx context.Context, db *dstate.DeploymentState,
return err
}

err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn)
err = d.saveState(db, newID, newState)
if err != nil {
return fmt.Errorf("saving state after creating id=%s: %w", newID, err)
}
Expand Down Expand Up @@ -146,7 +156,7 @@ func (d *DeploymentUnit) Update(ctx context.Context, db *dstate.DeploymentState,
return err
}

err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn)
err = d.saveState(db, id, newState)
if err != nil {
return fmt.Errorf("saving state id=%s: %w", id, err)
}
Expand Down Expand Up @@ -190,7 +200,7 @@ func (d *DeploymentUnit) UpdateWithID(ctx context.Context, db *dstate.Deployment
return err
}

err = db.SaveState(d.ResourceKey, newID, newState, d.DependsOn)
err = d.saveState(db, newID, newState)
if err != nil {
return fmt.Errorf("saving state id=%s: %w", oldID, err)
}
Expand Down Expand Up @@ -252,7 +262,7 @@ func (d *DeploymentUnit) Resize(ctx context.Context, db *dstate.DeploymentState,
return fmt.Errorf("resizing id=%s: %w", id, err)
}

err = db.SaveState(d.ResourceKey, id, newState, d.DependsOn)
err = d.saveState(db, id, newState)
if err != nil {
return fmt.Errorf("saving state id=%s: %w", id, err)
}
Expand Down
14 changes: 13 additions & 1 deletion bundle/direct/bind.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/deployplan"
"github.com/databricks/cli/bundle/direct/dresources"
"github.com/databricks/cli/bundle/direct/dstate"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/structs/structaccess"
Expand Down Expand Up @@ -145,13 +146,24 @@ func (b *DeploymentBundle) Bind(ctx context.Context, client *databricks.Workspac
}
}

adapter, err := b.getAdapterForKey(resourceKey)
if err != nil {
os.Remove(tmpStatePath)
return nil, err
}
compacted, err := dresources.CompactState(adapter.ResourceConfig(), sv.Value)
if err != nil {
os.Remove(tmpStatePath)
return nil, fmt.Errorf("compacting state: %w", err)
}

err = b.StateDB.Open(ctx, tmpStatePath, dstate.WithRecovery(true), dstate.WithWrite(true))
if err != nil {
os.Remove(tmpStatePath)
return nil, err
}

err = b.StateDB.SaveState(resourceKey, resourceID, sv.Value, dependsOn)
err = b.StateDB.SaveState(resourceKey, resourceID, compacted, dependsOn)
if err != nil {
os.Remove(tmpStatePath)
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion bundle/direct/bundle_apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa
logdiag.LogError(ctx, fmt.Errorf("state entry not found for %q", resourceKey))
return false
}
err = b.StateDB.SaveState(resourceKey, id, sv.Value, entry.DependsOn)
err = d.saveState(&b.StateDB, id, sv.Value)
} else {
// TODO: redo calcDiff to downgrade planned action if possible (?)
err = d.Deploy(ctx, &b.StateDB, sv.Value, action, entry)
Expand Down
40 changes: 38 additions & 2 deletions bundle/direct/bundle_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,28 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks
return false
}

// Replace the hashed_in_state fields with their "sha256:..." hash in the state we
// just read off disk, so the diff below compares like-for-like.
//
// We only ever keep a content hash for big fields like serialized_dashboard, never
// the full contents. The new config we diff against (localState, below) is hashed
// too, so the saved side has to be hashed as well or the two could never match. The
// state we read might still hold the full, un-hashed contents though: either it was
// written by an older CLI from before this change, or the field was only just added
// to hashed_in_state. Hashing it here, on read, lines the two sides up so an
// unchanged resource correctly shows "no change".
//
// This only changes the in-memory copy used for the diff. The on-disk entry keeps
// its full contents until the resource is next saved (which rewrites it as a hash),
// so no state_version bump or explicit migration is needed.
//
// See https://github.com/databricks/cli/pull/5609
savedState, err = dresources.CompactState(adapter.ResourceConfig(), savedState)
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: compacting saved state: %w", errorPrefix, err))
return false
}

// Note, currently we're diffing static structs, not dynamic value.
// This means for fields that contain references like ${resources.group.foo.id} we do one of the following:
// for strings: comparing unresolved string like "${resoures.group.foo.id}" with actual object id. As long as IDs do not have ${...} format we're good.
Expand All @@ -208,7 +230,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks
logdiag.LogError(ctx, fmt.Errorf("%s: internal error: no state cache entry found for %q", errorPrefix, resourceKey))
return false
}
localDiff, err := structdiff.GetStructDiff(savedState, sv.Value, adapter.KeyedSlices())

// Compact a copy for comparison only; sv.Value keeps the full contents, which
// the deploy sends to the API.
localState, err := dresources.CompactState(adapter.ResourceConfig(), sv.Value)
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: compacting local state: %w", errorPrefix, err))
return false
}
localDiff, err := structdiff.GetStructDiff(savedState, localState, adapter.KeyedSlices())
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: diffing local state: %w", errorPrefix, err))
return false
Expand Down Expand Up @@ -241,7 +271,13 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks
return false
}

remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, sv.Value, adapter.KeyedSlices())
remoteStateComparable, err = dresources.CompactState(adapter.ResourceConfig(), remoteStateComparable)
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: compacting remote state id=%q: %w", errorPrefix, dbentry.ID, err))
return false
}

remoteDiff, err = structdiff.GetStructDiff(remoteStateComparable, localState, adapter.KeyedSlices())
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: diffing remote state: %w", errorPrefix, err))
return false
Expand Down
11 changes: 11 additions & 0 deletions bundle/direct/dresources/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ If the API may return a slice's elements in a different order between calls (e.g
The state struct is serialized to JSON and persisted between deploys. Backward incompatible changes will result in a drift, which depending
on field behaviour might result in recreate. See dstate/migrate.go on how to handle state migration.

## hashed_in_state: storing large fields as content hashes

Declare a field under `hashed_in_state` in `resources.yml` when it holds large content that is only ever compared for equality and never read back from state. The engine then persists only a `sha256:<hex>` content hash for that field (via `CompactState`, applied both before saving state and to every value entering the diff), so stored and compared values share one form. The full contents stay in the plan's `new_state` and are sent to the API on every deploy, so the deploy is unaffected.

A field qualifies only if **all** of the following hold:
- it is declared `ignore_remote_changes` (so it is never meaningfully compared against the remote value — typically `etag_based` drift detection),
- it is not read back from state by any code path (e.g. not consumed raw by `OverrideChangeDesc` or by state export), and
- it can be large (a small field gains nothing — the hash placeholder is ~70 bytes).

`dashboards.serialized_dashboard` uses this: it inlines a file's contents into config, detects drift via `etag`, and always sends the full contents to the API from the plan's `new_state`. As a result the plan reports the field as a hash (`sha256:...`) rather than full content. No state version bump is needed: legacy full-content state is hashed on read for comparison and rewritten compactly on the next save. Add a test asserting the field is declared both `hashed_in_state` and `ignore_remote_changes` to guard the invariant.

## OverrideChangeDesc

Use `OverrideChangeDesc` only as a last resort when `resources.yml` settings cannot express the needed logic. Skipping an action with `change.Action = deployplan.Skip` in `OverrideChangeDesc` creates a silent no-op: the plan shows no change even if the user's config differs from remote. Document the skip reason clearly in both the comment and `change.Reason`.
Expand Down
7 changes: 7 additions & 0 deletions bundle/direct/dresources/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ type ResourceLifecycleConfig struct {
// BackendDefaults: fields where the backend may set defaults.
// When old and new are nil but remote is set, and the remote value matches allowed values (if specified), the change is skipped.
BackendDefaults []BackendDefaultRule `yaml:"backend_defaults,omitempty"`

// HashedInState: fields persisted to state as a content hash ("sha256:<hex>")
// instead of their full contents. Only valid for large, equality-only fields
// that are never read back from state (e.g. dashboards' serialized_dashboard,
// which is ignore_remote_changes and re-sent from config on every deploy).
HashedInState []FieldRule `yaml:"hashed_in_state,omitempty"`
}

// Config is the root configuration structure for resource lifecycle behavior.
Expand All @@ -91,6 +97,7 @@ var empty = ResourceLifecycleConfig{
NormalizeCase: nil,
NormalizeSlash: nil,
BackendDefaults: nil,
HashedInState: nil,
}

func mustParseConfig(data []byte) func() *Config {
Expand Down
50 changes: 50 additions & 0 deletions bundle/direct/dresources/dashboard_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package dresources

import (
"encoding/json"
"strings"
"testing"

"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/structs/structpath"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -23,3 +25,51 @@ func TestDashboardState_JSONSerialization_PublishedField(t *testing.T) {

assert.Contains(t, string(data), `"published":true`)
}

func TestDashboardCompactState(t *testing.T) {
state := &DashboardState{
DashboardConfig: resources.DashboardConfig{
DisplayName: "test-dashboard",
Etag: "etag-123",
SerializedDashboard: `{"pages":[{"name":"p1"}]}`,
},
}

out, err := CompactState(GetResourceConfig("dashboards"), state)
require.NoError(t, err)
compacted := out.(*DashboardState)

// serialized_dashboard is replaced by a content hash; other fields are preserved.
require.IsType(t, "", compacted.SerializedDashboard)
assert.True(t, strings.HasPrefix(compacted.SerializedDashboard.(string), stateHashPrefix))
assert.Equal(t, "test-dashboard", compacted.DisplayName)
assert.Equal(t, "etag-123", compacted.Etag)

// The original state is not mutated.
assert.Equal(t, `{"pages":[{"name":"p1"}]}`, state.SerializedDashboard)

// Compacting is idempotent.
out2, err := CompactState(GetResourceConfig("dashboards"), compacted)
require.NoError(t, err)
assert.Equal(t, compacted.SerializedDashboard, out2.(*DashboardState).SerializedDashboard)
}

// TestDashboardSerializedDashboardStateRules guards the SHA-only invariant. The field
// must be declared hashed_in_state (so it is persisted as a hash) and, because the hash
// can never equal the full-content remote value, it must also be ignore_remote_changes.
func TestDashboardSerializedDashboardStateRules(t *testing.T) {
cfg := GetResourceConfig("dashboards")
path := structpath.NewStringKey(nil, "serialized_dashboard")

hasRule := func(rules []FieldRule) bool {
for _, rule := range rules {
if path.HasPatternPrefix(rule.Field) {
return true
}
}
return false
}

assert.True(t, hasRule(cfg.HashedInState), "serialized_dashboard must be declared hashed_in_state")
assert.True(t, hasRule(cfg.IgnoreRemoteChanges), "serialized_dashboard must be ignore_remote_changes for SHA-only state to be correct")
}
7 changes: 7 additions & 0 deletions bundle/direct/dresources/resources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,13 @@ resources:
- field: dataset_schema
reason: input_only

hashed_in_state:
# serialized_dashboard holds the inlined dashboard JSON (often megabytes). It is
# ignore_remote_changes (etag_based) and re-sent from config on every deploy, so it
# is never read back from state; persist only its content hash to keep state small.
- field: serialized_dashboard
reason: large_content

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

omit reason here. It does not show up in plan.


genie_spaces:
ignore_remote_changes:
# serialized_space locally (structured YAML) and remotely (JSON string) will differ
Expand Down
Loading
Loading