Skip to content

Commit 8a52a00

Browse files
Track bundle resource counts and state file sizes in telemetry
Adds a new typed BundleResourcesMetadata struct under BundleDeployExperimental, capturing per-resource-type metadata for a bundle deploy: - count of resources of each type declared in the bundle configuration - max, mean, median state size in bytes across resources of that type - whole state file size on disk - deployment engine ("direct" or "terraform") For Terraform deployments the tfstate is translated to the direct- engine representation (via the existing TerraformToGroupName map) before sizing so per-type stats are comparable across engines. The new count field replaces the deprecated DatabricksBundleDeployEvent .resource_*_count fields; both are populated during the transition. The Go mirror marks the deprecated Resource*Count fields with a "// Deprecated:" comment. Measurement is performed at telemetry-emission time by reading the on-disk state file once, so this lands as a single isolated module (bundle/phases/resources_metadata.go) with one new line at the call site — no instrumentation in deploy mutators, state-mgmt code, or bundle.Metrics. To remove: delete the new module and revert one line in telemetry.go plus the proto/Go field. Requires the new resources_metadata field on BundleDeployExperimental from the universe PR. Lumberjack drops unknown fields, so the two PRs can land in either order.
1 parent e1d2a5c commit 8a52a00

4 files changed

Lines changed: 446 additions & 0 deletions

File tree

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
package phases
2+
3+
import (
4+
"cmp"
5+
"context"
6+
"encoding/json"
7+
"errors"
8+
"io/fs"
9+
"os"
10+
"path/filepath"
11+
"slices"
12+
"strings"
13+
14+
"github.com/databricks/cli/bundle"
15+
"github.com/databricks/cli/bundle/deploy/terraform"
16+
"github.com/databricks/cli/bundle/direct/dstate"
17+
"github.com/databricks/cli/libs/dyn"
18+
"github.com/databricks/cli/libs/log"
19+
"github.com/databricks/cli/libs/telemetry/protos"
20+
tfjson "github.com/hashicorp/terraform-json"
21+
)
22+
23+
// collectResourcesMetadata builds a BundleResourcesMetadata for the deploy:
24+
// per-resource-type counts come from the bundle configuration (matching the
25+
// semantics of the deprecated DatabricksBundleDeployEvent.resource_*_count
26+
// fields), and state-size statistics come from the on-disk deployment state
27+
// file. For Terraform deployments the tfstate is translated to the direct-
28+
// engine representation before sizing so per-type stats are comparable across
29+
// engines.
30+
//
31+
// Returns nil only on a complete absence of signal (no resources declared and
32+
// no readable state). Telemetry must never fail a deploy — all parse errors
33+
// are logged at debug level and treated as missing data.
34+
//
35+
// This file is the sole site of resource-metadata telemetry logic. To remove
36+
// the feature: delete this file and its companion test, revert the call site
37+
// in telemetry.go, and revert the ResourcesMetadata field in
38+
// libs/telemetry/protos/bundle_deploy.go.
39+
func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata {
40+
counts := countResourcesByType(ctx, b)
41+
42+
engine, fileSize, sizesByType := readStateForMetadata(ctx, b)
43+
44+
if len(counts) == 0 && len(sizesByType) == 0 && fileSize == 0 {
45+
return nil
46+
}
47+
48+
types := unionKeys(counts, sizesByType)
49+
slices.Sort(types)
50+
51+
resources := make([]protos.ResourceMetadata, 0, len(types))
52+
for _, t := range types {
53+
sizes := sizesByType[t]
54+
slices.SortFunc(sizes, func(a, b int64) int { return cmp.Compare(a, b) })
55+
resources = append(resources, protos.ResourceMetadata{
56+
ResourceType: t,
57+
Count: counts[t],
58+
StateSizeMaxBytes: statMax(sizes),
59+
StateSizeMeanBytes: statMean(sizes),
60+
StateSizeMedianBytes: statMedian(sizes),
61+
})
62+
}
63+
64+
return &protos.BundleResourcesMetadata{
65+
StateEngine: engine,
66+
StateFileSizeBytes: fileSize,
67+
Resources: resources,
68+
}
69+
}
70+
71+
// countResourcesByType walks the bundle config and counts top-level resources
72+
// at "resources.<type>.<name>". Returns map[type]count.
73+
func countResourcesByType(ctx context.Context, b *bundle.Bundle) map[string]int64 {
74+
out := make(map[string]int64)
75+
pattern := dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey())
76+
_, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
77+
if len(p) >= 2 {
78+
out[p[1].Key()]++
79+
}
80+
return v, nil
81+
})
82+
if err != nil {
83+
log.Debugf(ctx, "resources-metadata telemetry: failed to walk config resources: %s", err)
84+
}
85+
return out
86+
}
87+
88+
// readStateForMetadata reads whichever local state file exists (direct
89+
// preferred, then terraform) and returns engine name, whole-file size, and
90+
// per-resource-type sizes. Returns ("", 0, nil) if no state is present or if
91+
// the bundle isn't far enough through initialization to have a target
92+
// selected (which is required to compute state file paths).
93+
func readStateForMetadata(ctx context.Context, b *bundle.Bundle) (string, int64, map[string][]int64) {
94+
if b.Target == nil {
95+
return "", 0, nil
96+
}
97+
98+
if _, localPath := b.StateFilenameDirect(ctx); localPath != "" {
99+
raw, err := readStateFile(localPath)
100+
if err == nil && raw != nil {
101+
return "direct", int64(len(raw)), parseDirectStateSizes(ctx, raw)
102+
}
103+
if err != nil {
104+
log.Debugf(ctx, "resources-metadata telemetry: skipping direct state at %s: %s", localPath, err)
105+
}
106+
}
107+
108+
if _, localPath := b.StateFilenameTerraform(ctx); localPath != "" {
109+
raw, err := readStateFile(localPath)
110+
if errors.Is(err, fs.ErrNotExist) {
111+
altPath := terraformCacheStatePath(ctx, b)
112+
if altPath != localPath && altPath != "" {
113+
raw, err = readStateFile(altPath)
114+
}
115+
}
116+
if err == nil && raw != nil {
117+
return "terraform", int64(len(raw)), parseTerraformStateSizes(ctx, raw)
118+
}
119+
if err != nil {
120+
log.Debugf(ctx, "resources-metadata telemetry: skipping terraform state at %s: %s", localPath, err)
121+
}
122+
}
123+
124+
return "", 0, nil
125+
}
126+
127+
func readStateFile(path string) ([]byte, error) {
128+
if path == "" {
129+
return nil, nil
130+
}
131+
raw, err := os.ReadFile(path)
132+
if errors.Is(err, fs.ErrNotExist) {
133+
return nil, nil
134+
}
135+
return raw, err
136+
}
137+
138+
func terraformCacheStatePath(ctx context.Context, b *bundle.Bundle) string {
139+
dir, err := terraform.Dir(ctx, b)
140+
if err != nil {
141+
return ""
142+
}
143+
return filepath.Join(dir, "terraform.tfstate")
144+
}
145+
146+
func parseDirectStateSizes(ctx context.Context, raw []byte) map[string][]int64 {
147+
var db dstate.Database
148+
if err := json.Unmarshal(raw, &db); err != nil {
149+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse direct state: %s", err)
150+
return nil
151+
}
152+
byType := make(map[string][]int64)
153+
for key, entry := range db.State {
154+
t := resourceTypeFromKey(key)
155+
if t == "" {
156+
continue
157+
}
158+
byType[t] = append(byType[t], int64(len(entry.State)))
159+
}
160+
return byType
161+
}
162+
163+
func parseTerraformStateSizes(ctx context.Context, raw []byte) map[string][]int64 {
164+
var state struct {
165+
Version int `json:"version"`
166+
Resources []struct {
167+
Type string `json:"type"`
168+
Mode tfjson.ResourceMode `json:"mode"`
169+
Instances []struct {
170+
Attributes json.RawMessage `json:"attributes"`
171+
} `json:"instances"`
172+
} `json:"resources"`
173+
}
174+
if err := json.Unmarshal(raw, &state); err != nil {
175+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse terraform state: %s", err)
176+
return nil
177+
}
178+
byType := make(map[string][]int64)
179+
for _, resource := range state.Resources {
180+
if resource.Mode != tfjson.ManagedResourceMode {
181+
continue
182+
}
183+
groupName, ok := terraform.TerraformToGroupName[resource.Type]
184+
if !ok {
185+
continue
186+
}
187+
for _, instance := range resource.Instances {
188+
byType[groupName] = append(byType[groupName], int64(len(instance.Attributes)))
189+
}
190+
}
191+
return byType
192+
}
193+
194+
// resourceTypeFromKey extracts the resource type from a direct-engine state
195+
// key. Direct-engine keys are of the form "resources.<type>.<name>" or
196+
// "resources.<type>.<name>.<sub>" (for permissions/grants/secret_acls).
197+
// Returns "" for keys that don't match.
198+
func resourceTypeFromKey(key string) string {
199+
parts := strings.SplitN(key, ".", 4)
200+
if len(parts) < 3 || parts[0] != "resources" {
201+
return ""
202+
}
203+
if len(parts) == 4 {
204+
// Sub-resources like permissions / grants / secret_acls live at
205+
// "resources.<parent>.<name>.<sub>". Track them under the sub-resource
206+
// type so they aggregate across resource families.
207+
return parts[3]
208+
}
209+
return parts[1]
210+
}
211+
212+
func unionKeys(a map[string]int64, b map[string][]int64) []string {
213+
seen := make(map[string]struct{}, len(a)+len(b))
214+
for k := range a {
215+
seen[k] = struct{}{}
216+
}
217+
for k := range b {
218+
seen[k] = struct{}{}
219+
}
220+
out := make([]string, 0, len(seen))
221+
for k := range seen {
222+
out = append(out, k)
223+
}
224+
return out
225+
}
226+
227+
func statMax(sortedSizes []int64) int64 {
228+
if len(sortedSizes) == 0 {
229+
return 0
230+
}
231+
return sortedSizes[len(sortedSizes)-1]
232+
}
233+
234+
func statMean(sortedSizes []int64) int64 {
235+
if len(sortedSizes) == 0 {
236+
return 0
237+
}
238+
var total int64
239+
for _, s := range sortedSizes {
240+
total += s
241+
}
242+
return total / int64(len(sortedSizes))
243+
}
244+
245+
func statMedian(sortedSizes []int64) int64 {
246+
if len(sortedSizes) == 0 {
247+
return 0
248+
}
249+
return sortedSizes[(len(sortedSizes)-1)/2]
250+
}

0 commit comments

Comments
 (0)