|
| 1 | +package phases |
| 2 | + |
| 3 | +import ( |
| 4 | + "cmp" |
| 5 | + "context" |
| 6 | + "encoding/json" |
| 7 | + "fmt" |
| 8 | + "slices" |
| 9 | + |
| 10 | + "github.com/databricks/cli/bundle" |
| 11 | + "github.com/databricks/cli/bundle/config/engine" |
| 12 | + "github.com/databricks/cli/bundle/direct/dresources" |
| 13 | + "github.com/databricks/cli/bundle/direct/dstate" |
| 14 | + "github.com/databricks/cli/libs/dyn" |
| 15 | + "github.com/databricks/cli/libs/log" |
| 16 | + "github.com/databricks/cli/libs/telemetry/protos" |
| 17 | +) |
| 18 | + |
| 19 | +// collectResourcesMetadata builds a BundleResourcesMetadata for the deploy. |
| 20 | +// |
| 21 | +// State sizes are computed by running each resource's typed config through |
| 22 | +// the direct engine's adapter.PrepareState — the same transformation direct |
| 23 | +// uses to derive the value it persists to resources.json — and marshaling |
| 24 | +// each entry with dstate.SaveState's encoding (MarshalIndent(" ", " ")). |
| 25 | +// The whole-file size is then computed by assembling those entries into a |
| 26 | +// dstate.Database and marshaling it the way DeploymentState.unlockedSave |
| 27 | +// writes it (MarshalIndent("", " ")). So: |
| 28 | +// |
| 29 | +// - Under DATABRICKS_BUNDLE_ENGINE=direct, per-resource sizes equal |
| 30 | +// len(entry.State) on disk byte-for-byte, and state_file_size_bytes |
| 31 | +// matches the resources.json file size to within a few bytes (only |
| 32 | +// Lineage and Serial may differ, which we set to "" / 0 here). |
| 33 | +// - Under =terraform, the same computation runs against the bundle config, |
| 34 | +// producing identical numbers for the same logical bundle. tfstate is |
| 35 | +// never read. |
| 36 | +// |
| 37 | +// Returns nil when the bundle declares no resources. |
| 38 | +func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata { |
| 39 | + counts, sizesByType, fileSize := collectResourceCountsAndSizes(ctx, b) |
| 40 | + if len(counts) == 0 { |
| 41 | + return nil |
| 42 | + } |
| 43 | + |
| 44 | + types := unionKeys(counts, sizesByType) |
| 45 | + slices.Sort(types) |
| 46 | + |
| 47 | + resources := make([]protos.ResourceMetadata, 0, len(types)) |
| 48 | + for _, t := range types { |
| 49 | + sizes := sizesByType[t] |
| 50 | + slices.SortFunc(sizes, func(a, b int64) int { return cmp.Compare(a, b) }) |
| 51 | + resources = append(resources, protos.ResourceMetadata{ |
| 52 | + ResourceType: t, |
| 53 | + Count: counts[t], |
| 54 | + StateSizeMaxBytes: statMax(sizes), |
| 55 | + StateSizeMeanBytes: statMean(sizes), |
| 56 | + StateSizeMedianBytes: statMedian(sizes), |
| 57 | + }) |
| 58 | + } |
| 59 | + |
| 60 | + return &protos.BundleResourcesMetadata{ |
| 61 | + StateEngine: resolveDeployEngine(ctx, b), |
| 62 | + StateFileSizeBytes: fileSize, |
| 63 | + Resources: resources, |
| 64 | + } |
| 65 | +} |
| 66 | + |
| 67 | +// collectResourceCountsAndSizes walks the bundle config and assembles a |
| 68 | +// dstate.Database with each resource's PrepareState'd value, then marshals |
| 69 | +// that database the way direct writes resources.json. Returns per-type |
| 70 | +// counts, per-type per-resource byte lengths, and the byte length of the |
| 71 | +// whole simulated state file. |
| 72 | +func collectResourceCountsAndSizes(ctx context.Context, b *bundle.Bundle) (map[string]int64, map[string][]int64, int64) { |
| 73 | + counts := make(map[string]int64) |
| 74 | + sizesByType := make(map[string][]int64) |
| 75 | + |
| 76 | + adapters := getAdapters(ctx, b) |
| 77 | + db := dstate.NewDatabase("", 0) |
| 78 | + |
| 79 | + pattern := dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey()) |
| 80 | + _, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { |
| 81 | + if len(p) < 3 { |
| 82 | + return v, nil |
| 83 | + } |
| 84 | + resourceType := p[1].Key() |
| 85 | + counts[resourceType]++ |
| 86 | + |
| 87 | + stateBytes, err := resourceStateBytes(b, adapters, p, resourceType) |
| 88 | + if err != nil { |
| 89 | + log.Debugf(ctx, "resources-metadata telemetry: %s: %s", p, err) |
| 90 | + return v, nil |
| 91 | + } |
| 92 | + sizesByType[resourceType] = append(sizesByType[resourceType], int64(len(stateBytes))) |
| 93 | + db.State[p.String()] = dstate.ResourceEntry{ |
| 94 | + ID: extractResourceID(v), |
| 95 | + State: stateBytes, |
| 96 | + } |
| 97 | + return v, nil |
| 98 | + }) |
| 99 | + if err != nil { |
| 100 | + log.Debugf(ctx, "resources-metadata telemetry: failed to walk config resources: %s", err) |
| 101 | + } |
| 102 | + |
| 103 | + var fileSize int64 |
| 104 | + if len(db.State) > 0 { |
| 105 | + raw, mErr := json.MarshalIndent(db, "", " ") |
| 106 | + if mErr != nil { |
| 107 | + log.Debugf(ctx, "resources-metadata telemetry: failed to marshal database envelope: %s", mErr) |
| 108 | + } else { |
| 109 | + fileSize = int64(len(raw)) |
| 110 | + } |
| 111 | + } |
| 112 | + return counts, sizesByType, fileSize |
| 113 | +} |
| 114 | + |
| 115 | +// resourceStateBytes derives the bytes direct would store for one resource: |
| 116 | +// GetResourceConfig (typed) → adapter.PrepareState → MarshalIndent with the |
| 117 | +// same prefix/indent direct uses in dstate.SaveState. Falls back to marshaling |
| 118 | +// the typed config when no adapter is registered for the resource type |
| 119 | +// (e.g., a type the direct engine doesn't yet support). |
| 120 | +func resourceStateBytes(b *bundle.Bundle, adapters map[string]*dresources.Adapter, p dyn.Path, resourceType string) ([]byte, error) { |
| 121 | + cfg, err := b.Config.GetResourceConfig(p.String()) |
| 122 | + if err != nil { |
| 123 | + return nil, fmt.Errorf("get config: %w", err) |
| 124 | + } |
| 125 | + |
| 126 | + target := cfg |
| 127 | + if adapter, ok := adapters[resourceType]; ok { |
| 128 | + state, err := adapter.PrepareState(cfg) |
| 129 | + if err != nil { |
| 130 | + return nil, fmt.Errorf("prepare state: %w", err) |
| 131 | + } |
| 132 | + target = state |
| 133 | + } |
| 134 | + |
| 135 | + // dstate.SaveState writes resource state with MarshalIndent using these |
| 136 | + // exact prefix/indent arguments; matching them here means each resource's |
| 137 | + // byte length equals len(entry.State) on disk for direct deploys. |
| 138 | + raw, err := json.MarshalIndent(target, " ", " ") |
| 139 | + if err != nil { |
| 140 | + return nil, fmt.Errorf("marshal: %w", err) |
| 141 | + } |
| 142 | + return raw, nil |
| 143 | +} |
| 144 | + |
| 145 | +// extractResourceID returns the resource's ID string from its dyn.Value entry, |
| 146 | +// or "" if not yet set. Each resources.<type>.<name> entry has an "id" field |
| 147 | +// populated post-deploy (via BaseResource.ID). |
| 148 | +func extractResourceID(v dyn.Value) string { |
| 149 | + idVal, err := dyn.Get(v, "id") |
| 150 | + if err != nil || idVal.Kind() != dyn.KindString { |
| 151 | + return "" |
| 152 | + } |
| 153 | + return idVal.MustString() |
| 154 | +} |
| 155 | + |
| 156 | +// getAdapters returns adapters initialized for PrepareState. If the bundle |
| 157 | +// already has them initialized (direct engine path), reuse them. Otherwise, |
| 158 | +// build a fresh set with a nil workspace client — PrepareState is a pure |
| 159 | +// transformation that doesn't touch the client. |
| 160 | +func getAdapters(ctx context.Context, b *bundle.Bundle) map[string]*dresources.Adapter { |
| 161 | + if b.DeploymentBundle.Adapters != nil { |
| 162 | + return b.DeploymentBundle.Adapters |
| 163 | + } |
| 164 | + adapters, err := dresources.InitAll(nil) |
| 165 | + if err != nil { |
| 166 | + log.Debugf(ctx, "resources-metadata telemetry: failed to init adapters: %s", err) |
| 167 | + return nil |
| 168 | + } |
| 169 | + return adapters |
| 170 | +} |
| 171 | + |
| 172 | +// resolveDeployEngine returns the effective deploy engine ("direct" or |
| 173 | +// "terraform"). Mirrors cmd/bundle/utils.ResolveEngineSetting but is inlined |
| 174 | +// here to avoid a layering import (bundle/phases must not depend on cmd/). |
| 175 | +func resolveDeployEngine(ctx context.Context, b *bundle.Bundle) string { |
| 176 | + if b.Config.Bundle.Engine != engine.EngineNotSet { |
| 177 | + return string(b.Config.Bundle.Engine.ThisOrDefault()) |
| 178 | + } |
| 179 | + envEngine, _ := engine.FromEnv(ctx) |
| 180 | + return string(envEngine.ThisOrDefault()) |
| 181 | +} |
| 182 | + |
| 183 | +func unionKeys(a map[string]int64, b map[string][]int64) []string { |
| 184 | + seen := make(map[string]struct{}, len(a)+len(b)) |
| 185 | + for k := range a { |
| 186 | + seen[k] = struct{}{} |
| 187 | + } |
| 188 | + for k := range b { |
| 189 | + seen[k] = struct{}{} |
| 190 | + } |
| 191 | + out := make([]string, 0, len(seen)) |
| 192 | + for k := range seen { |
| 193 | + out = append(out, k) |
| 194 | + } |
| 195 | + return out |
| 196 | +} |
| 197 | + |
| 198 | +func statMax(sortedSizes []int64) int64 { |
| 199 | + if len(sortedSizes) == 0 { |
| 200 | + return 0 |
| 201 | + } |
| 202 | + return sortedSizes[len(sortedSizes)-1] |
| 203 | +} |
| 204 | + |
| 205 | +func statMean(sortedSizes []int64) int64 { |
| 206 | + if len(sortedSizes) == 0 { |
| 207 | + return 0 |
| 208 | + } |
| 209 | + var total int64 |
| 210 | + for _, s := range sortedSizes { |
| 211 | + total += s |
| 212 | + } |
| 213 | + return total / int64(len(sortedSizes)) |
| 214 | +} |
| 215 | + |
| 216 | +func statMedian(sortedSizes []int64) int64 { |
| 217 | + if len(sortedSizes) == 0 { |
| 218 | + return 0 |
| 219 | + } |
| 220 | + return sortedSizes[(len(sortedSizes)-1)/2] |
| 221 | +} |
0 commit comments