Skip to content

Commit 44e4bc0

Browse files
Track bundle resource counts and state sizes in telemetry
Adds a typed `resources_metadata` field to the bundle deploy telemetry event, capturing per resource type: - count of resources declared in the bundle configuration (replacing the deprecated DatabricksBundleDeployEvent.resource_*_count fields, still populated during the transition) - max / mean / median state size in bytes across resources of that type plus the whole simulated state file size and the effective deploy engine ("direct" or "terraform"). State sizes are computed from the bundle configuration, not by reading the on-disk state file. Each resource's typed config is run through the direct engine's adapter.PrepareState -- the same transformation direct uses to derive the value it persists to resources.json -- and marshaled with the indented encoding dstate.SaveState uses. Two consequences: - the numbers are engine-independent, so direct and terraform deploys of the same logical bundle report identical sizes (tfstate is never read); - for direct deploys each per-resource size equals len(entry.State) on disk byte-for-byte, and the file size matches resources.json up to the Lineage/Serial fields. The feature is one isolated module (bundle/phases/resources_metadata.go) plus one line at the telemetry-emission call site. Telemetry never fails a deploy: all parse/adapter errors are logged at debug level and treated as missing data. The universe proto (DatabricksBundleDeployEvent.resources_metadata, BundleResourcesMetadata, ResourceMetadata) already exists, so this is ingested rather than dropped. Co-authored-by: Isaac
1 parent 30db14f commit 44e4bc0

6 files changed

Lines changed: 460 additions & 0 deletions

File tree

acceptance/bundle/telemetry/deploy/out.telemetry.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,26 @@
3535
"[UUID]",
3636
"[UUID]"
3737
],
38+
"resources_metadata": {
39+
"state_engine": "[ENGINE]",
40+
"state_file_size_bytes": SMALL_INT,
41+
"resources": [
42+
{
43+
"resource_type": "jobs",
44+
"count": 3,
45+
"state_size_max_bytes": SMALL_INT,
46+
"state_size_mean_bytes": SMALL_INT,
47+
"state_size_median_bytes": SMALL_INT
48+
},
49+
{
50+
"resource_type": "pipelines",
51+
"count": 2,
52+
"state_size_max_bytes": SMALL_INT,
53+
"state_size_mean_bytes": SMALL_INT,
54+
"state_size_median_bytes": SMALL_INT
55+
}
56+
]
57+
},
3858
"experimental": {
3959
"configuration_file_count": 1,
4060
"variable_count": 0,

acceptance/bundle/telemetry/test.toml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,28 @@ New = '[OS]'
2020
[[Repls]]
2121
Old = '"local_cache_measurements_ms": \[[^\]]*\]'
2222
New = '"local_cache_measurements_ms": [...redacted...]'
23+
24+
# Normalize the deployment engine name in resources_metadata so the same
25+
# fixture covers both DATABRICKS_BUNDLE_ENGINE=direct and =terraform runs.
26+
[[Repls]]
27+
Old = '"state_engine": "(direct|terraform)"'
28+
New = '"state_engine": "[ENGINE]"'
29+
30+
# Normalize byte-size measurements in resources_metadata to placeholders.
31+
# Exact byte counts depend on resource state JSON formatting and would
32+
# make these golden files brittle across SDK changes.
33+
[[Repls]]
34+
Old = '"state_file_size_bytes": \d+'
35+
New = '"state_file_size_bytes": SMALL_INT'
36+
37+
[[Repls]]
38+
Old = '"state_size_max_bytes": \d+'
39+
New = '"state_size_max_bytes": SMALL_INT'
40+
41+
[[Repls]]
42+
Old = '"state_size_mean_bytes": \d+'
43+
New = '"state_size_mean_bytes": SMALL_INT'
44+
45+
[[Repls]]
46+
Old = '"state_size_median_bytes": \d+'
47+
New = '"state_size_median_bytes": SMALL_INT'
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
package phases
2+
3+
import (
4+
"cmp"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"slices"
9+
10+
"github.com/databricks/cli/bundle"
11+
"github.com/databricks/cli/bundle/config/engine"
12+
"github.com/databricks/cli/bundle/direct/dresources"
13+
"github.com/databricks/cli/bundle/direct/dstate"
14+
"github.com/databricks/cli/libs/dyn"
15+
"github.com/databricks/cli/libs/log"
16+
"github.com/databricks/cli/libs/telemetry/protos"
17+
)
18+
19+
// collectResourcesMetadata builds a BundleResourcesMetadata for the deploy.
20+
//
21+
// State sizes are computed by running each resource's typed config through
22+
// the direct engine's adapter.PrepareState — the same transformation direct
23+
// uses to derive the value it persists to resources.json — and marshaling
24+
// each entry with dstate.SaveState's encoding (MarshalIndent(" ", " ")).
25+
// The whole-file size is then computed by assembling those entries into a
26+
// dstate.Database and marshaling it the way DeploymentState.unlockedSave
27+
// writes it (MarshalIndent("", " ")). So:
28+
//
29+
// - Under DATABRICKS_BUNDLE_ENGINE=direct, per-resource sizes equal
30+
// len(entry.State) on disk byte-for-byte, and state_file_size_bytes
31+
// matches the resources.json file size to within a few bytes (only
32+
// Lineage and Serial may differ, which we set to "" / 0 here).
33+
// - Under =terraform, the same computation runs against the bundle config,
34+
// producing identical numbers for the same logical bundle. tfstate is
35+
// never read.
36+
//
37+
// Returns nil when the bundle declares no resources.
38+
func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata {
39+
counts, sizesByType, fileSize := collectResourceCountsAndSizes(ctx, b)
40+
if len(counts) == 0 {
41+
return nil
42+
}
43+
44+
types := unionKeys(counts, sizesByType)
45+
slices.Sort(types)
46+
47+
resources := make([]protos.ResourceMetadata, 0, len(types))
48+
for _, t := range types {
49+
sizes := sizesByType[t]
50+
slices.SortFunc(sizes, func(a, b int64) int { return cmp.Compare(a, b) })
51+
resources = append(resources, protos.ResourceMetadata{
52+
ResourceType: t,
53+
Count: counts[t],
54+
StateSizeMaxBytes: statMax(sizes),
55+
StateSizeMeanBytes: statMean(sizes),
56+
StateSizeMedianBytes: statMedian(sizes),
57+
})
58+
}
59+
60+
return &protos.BundleResourcesMetadata{
61+
StateEngine: resolveDeployEngine(ctx, b),
62+
StateFileSizeBytes: fileSize,
63+
Resources: resources,
64+
}
65+
}
66+
67+
// collectResourceCountsAndSizes walks the bundle config and assembles a
68+
// dstate.Database with each resource's PrepareState'd value, then marshals
69+
// that database the way direct writes resources.json. Returns per-type
70+
// counts, per-type per-resource byte lengths, and the byte length of the
71+
// whole simulated state file.
72+
func collectResourceCountsAndSizes(ctx context.Context, b *bundle.Bundle) (map[string]int64, map[string][]int64, int64) {
73+
counts := make(map[string]int64)
74+
sizesByType := make(map[string][]int64)
75+
76+
adapters := getAdapters(ctx, b)
77+
db := dstate.NewDatabase("", 0)
78+
79+
pattern := dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey())
80+
_, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
81+
if len(p) < 3 {
82+
return v, nil
83+
}
84+
resourceType := p[1].Key()
85+
counts[resourceType]++
86+
87+
stateBytes, err := resourceStateBytes(b, adapters, p, resourceType)
88+
if err != nil {
89+
log.Debugf(ctx, "resources-metadata telemetry: %s: %s", p, err)
90+
return v, nil
91+
}
92+
sizesByType[resourceType] = append(sizesByType[resourceType], int64(len(stateBytes)))
93+
db.State[p.String()] = dstate.ResourceEntry{
94+
ID: extractResourceID(v),
95+
State: stateBytes,
96+
}
97+
return v, nil
98+
})
99+
if err != nil {
100+
log.Debugf(ctx, "resources-metadata telemetry: failed to walk config resources: %s", err)
101+
}
102+
103+
var fileSize int64
104+
if len(db.State) > 0 {
105+
raw, mErr := json.MarshalIndent(db, "", " ")
106+
if mErr != nil {
107+
log.Debugf(ctx, "resources-metadata telemetry: failed to marshal database envelope: %s", mErr)
108+
} else {
109+
fileSize = int64(len(raw))
110+
}
111+
}
112+
return counts, sizesByType, fileSize
113+
}
114+
115+
// resourceStateBytes derives the bytes direct would store for one resource:
116+
// GetResourceConfig (typed) → adapter.PrepareState → MarshalIndent with the
117+
// same prefix/indent direct uses in dstate.SaveState. Falls back to marshaling
118+
// the typed config when no adapter is registered for the resource type
119+
// (e.g., a type the direct engine doesn't yet support).
120+
func resourceStateBytes(b *bundle.Bundle, adapters map[string]*dresources.Adapter, p dyn.Path, resourceType string) ([]byte, error) {
121+
cfg, err := b.Config.GetResourceConfig(p.String())
122+
if err != nil {
123+
return nil, fmt.Errorf("get config: %w", err)
124+
}
125+
126+
target := cfg
127+
if adapter, ok := adapters[resourceType]; ok {
128+
state, err := adapter.PrepareState(cfg)
129+
if err != nil {
130+
return nil, fmt.Errorf("prepare state: %w", err)
131+
}
132+
target = state
133+
}
134+
135+
// dstate.SaveState writes resource state with MarshalIndent using these
136+
// exact prefix/indent arguments; matching them here means each resource's
137+
// byte length equals len(entry.State) on disk for direct deploys.
138+
raw, err := json.MarshalIndent(target, " ", " ")
139+
if err != nil {
140+
return nil, fmt.Errorf("marshal: %w", err)
141+
}
142+
return raw, nil
143+
}
144+
145+
// extractResourceID returns the resource's ID string from its dyn.Value entry,
146+
// or "" if not yet set. Each resources.<type>.<name> entry has an "id" field
147+
// populated post-deploy (via BaseResource.ID).
148+
func extractResourceID(v dyn.Value) string {
149+
idVal, err := dyn.Get(v, "id")
150+
if err != nil || idVal.Kind() != dyn.KindString {
151+
return ""
152+
}
153+
return idVal.MustString()
154+
}
155+
156+
// getAdapters returns adapters initialized for PrepareState. If the bundle
157+
// already has them initialized (direct engine path), reuse them. Otherwise,
158+
// build a fresh set with a nil workspace client — PrepareState is a pure
159+
// transformation that doesn't touch the client.
160+
func getAdapters(ctx context.Context, b *bundle.Bundle) map[string]*dresources.Adapter {
161+
if b.DeploymentBundle.Adapters != nil {
162+
return b.DeploymentBundle.Adapters
163+
}
164+
adapters, err := dresources.InitAll(nil)
165+
if err != nil {
166+
log.Debugf(ctx, "resources-metadata telemetry: failed to init adapters: %s", err)
167+
return nil
168+
}
169+
return adapters
170+
}
171+
172+
// resolveDeployEngine returns the effective deploy engine ("direct" or
173+
// "terraform"). Mirrors cmd/bundle/utils.ResolveEngineSetting but is inlined
174+
// here to avoid a layering import (bundle/phases must not depend on cmd/).
175+
func resolveDeployEngine(ctx context.Context, b *bundle.Bundle) string {
176+
if b.Config.Bundle.Engine != engine.EngineNotSet {
177+
return string(b.Config.Bundle.Engine.ThisOrDefault())
178+
}
179+
envEngine, _ := engine.FromEnv(ctx)
180+
return string(envEngine.ThisOrDefault())
181+
}
182+
183+
func unionKeys(a map[string]int64, b map[string][]int64) []string {
184+
seen := make(map[string]struct{}, len(a)+len(b))
185+
for k := range a {
186+
seen[k] = struct{}{}
187+
}
188+
for k := range b {
189+
seen[k] = struct{}{}
190+
}
191+
out := make([]string, 0, len(seen))
192+
for k := range seen {
193+
out = append(out, k)
194+
}
195+
return out
196+
}
197+
198+
func statMax(sortedSizes []int64) int64 {
199+
if len(sortedSizes) == 0 {
200+
return 0
201+
}
202+
return sortedSizes[len(sortedSizes)-1]
203+
}
204+
205+
func statMean(sortedSizes []int64) int64 {
206+
if len(sortedSizes) == 0 {
207+
return 0
208+
}
209+
var total int64
210+
for _, s := range sortedSizes {
211+
total += s
212+
}
213+
return total / int64(len(sortedSizes))
214+
}
215+
216+
func statMedian(sortedSizes []int64) int64 {
217+
if len(sortedSizes) == 0 {
218+
return 0
219+
}
220+
return sortedSizes[(len(sortedSizes)-1)/2]
221+
}

0 commit comments

Comments
 (0)