Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"state_engine": "direct",
"resources": [
{
"resource_type": "jobs",
"count": 3,
"state_size_max_bytes": 256,
"state_size_mean_bytes": 254,
"state_size_median_bytes": 254
},
{
"resource_type": "pipelines",
"count": 2,
"state_size_max_bytes": 205,
"state_size_mean_bytes": 205,
"state_size_median_bytes": 205
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
null
12 changes: 11 additions & 1 deletion acceptance/bundle/telemetry/deploy/script
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,19 @@ trace cat out.requests.txt | jq 'select(has("path") and .path == "/telemetry-ext
title "Assert that mutator execution times are being recorded"
trace cat telemetry.json | jq ' .entry.databricks_cli_log.bundle_deploy_event.experimental.bundle_mutator_execution_time_ms | length > 0'

# resources_metadata is only emitted for direct deploys (it is derived from the
# direct engine's state), so it diverges across the DATABRICKS_BUNDLE_ENGINE
# matrix. Capture it in a per-engine file (terraform produces "null") and omit
# it from the engine-agnostic out.telemetry.txt below. Per-resource sizes are
# deterministic for a fixed config and asserted exactly. state_file_size_bytes
# is dropped from the golden because it is os.Stat of resources.json, whose
# header embeds the CLI version string (0.0.0-dev+<sha> on linux/macos vs
# 0.0.0-dev on windows) — it is still emitted in real telemetry.
cat telemetry.json | jq '.entry.databricks_cli_log.bundle_deploy_event.resources_metadata | if . then del(.state_file_size_bytes) else . end' > out.resources_metadata.$DATABRICKS_BUNDLE_ENGINE.txt

# bundle_mutator_execution_time_ms can have variable number of entries depending upon the runtime of the mutators. Thus we omit it from
# being asserted here.
cat telemetry.json | jq 'del(.entry.databricks_cli_log.bundle_deploy_event.experimental.bundle_mutator_execution_time_ms)' > out.telemetry.txt
cat telemetry.json | jq 'del(.entry.databricks_cli_log.bundle_deploy_event.experimental.bundle_mutator_execution_time_ms, .entry.databricks_cli_log.bundle_deploy_event.resources_metadata)' > out.telemetry.txt

cmd_exec_id=$(extract_command_exec_id.py)
deployment_id=$(cat .databricks/bundle/default/deployment.json | jq -r .id)
Expand Down
7 changes: 7 additions & 0 deletions bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/databricks/cli/bundle/direct"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/cli/bundle/statemgmt/resourcestate"
"github.com/databricks/cli/libs/auth"
"github.com/databricks/cli/libs/cache"
"github.com/databricks/cli/libs/fileset"
Expand Down Expand Up @@ -55,6 +56,12 @@ type Metrics struct {
PythonUpdatedResourcesCount int64
ExecutionTimes []protos.IntMapEntry
LocalCacheMeasurementsMs []protos.IntMapEntry // Local cache measurements stored as milliseconds

// ResourceState is the direct engine's per-resource deployment state
// captured right after the deploy. It carries each resource's state-size in
// bytes so deploy telemetry can be derived without re-reading or re-parsing
// the state file. Nil for terraform deploys.
ResourceState resourcestate.ExportedResourcesMap
}

// SetBoolValue sets the value of a boolean metric.
Expand Down
5 changes: 3 additions & 2 deletions bundle/direct/dstate/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,9 @@ func ExportStateFromData(data Database) resourcestate.ExportedResourcesMap {
}

result[key] = resourcestate.ResourceState{
ID: entry.ID,
ETag: etag,
ID: entry.ID,
ETag: etag,
StateSizeBytes: len(entry.State),
}
}
return result
Expand Down
4 changes: 4 additions & 0 deletions bundle/phases/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ func deployCore(ctx context.Context, b *bundle.Bundle, plan *deployplan.Plan, ta
if targetEngine.IsDirect() {
b.DeploymentBundle.Apply(ctx, b.WorkspaceClient(ctx), plan, direct.MigrateMode(false))
state, err = b.DeploymentBundle.StateDB.Finalize(ctx)
// Capture the finalized state for deploy telemetry. It carries each
// resource's state-size in bytes (from the WAL replay Finalize just
// did), so telemetry needs no extra read or parse of the state file.
b.Metrics.ResourceState = state
} else {
bundle.ApplyContext(ctx, b, terraform.Apply())
state, err = terraform.ParseResourcesState(ctx, b)
Expand Down
107 changes: 107 additions & 0 deletions bundle/phases/resources_metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package phases

import (
"context"
"os"
"slices"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/statemgmt/resourcestate"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/telemetry/protos"
)

// directEngine is the only engine for which resources_metadata is populated.
const directEngine = "direct"

// collectResourcesMetadata builds a BundleResourcesMetadata from the per-resource
// state sizes captured during the deploy.
//
// Only direct deploys are measured. b.Metrics.ResourceState is the direct
// engine's finalized state, populated in deployCore from the WAL replay the
// deploy already performs; each entry carries StateSizeBytes (len of the JSON
// blob stored in resources.json). So no marshalling, file read, or JSON parsing
// happens here — sizes are read straight off the in-memory map. The whole-file
// size comes from a single os.Stat (no parse). Returns nil for terraform
// deploys (ResourceState is nil) and when no resources are in state.
func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata {
state := b.Metrics.ResourceState
if len(state) == 0 {
return nil
}

resources := resourceMetadataFromState(state)
if len(resources) == 0 {
return nil
}

return &protos.BundleResourcesMetadata{
StateEngine: directEngine,
StateFileSizeBytes: directStateFileSize(ctx, b),
Resources: resources,
}
}

// resourceMetadataFromState groups the deployment state by resource type and
// computes per-type count and max/mean/median state size. Sizes are sorted per
// type (needed for the median).
func resourceMetadataFromState(state resourcestate.ExportedResourcesMap) []protos.ResourceMetadata {
sizesByType := make(map[string][]int64)
for key, rs := range state {
t := config.GetResourceTypeFromKey(key)
if t == "" {
continue
}
sizesByType[t] = append(sizesByType[t], int64(rs.StateSizeBytes))
}

types := make([]string, 0, len(sizesByType))
for t := range sizesByType {
types = append(types, t)
}
slices.Sort(types)

resources := make([]protos.ResourceMetadata, 0, len(types))
for _, t := range types {
sizes := sizesByType[t]
slices.Sort(sizes)
resources = append(resources, protos.ResourceMetadata{
ResourceType: t,
Count: int64(len(sizes)),
StateSizeMaxBytes: statMax(sizes),
StateSizeMeanBytes: statMean(sizes),
StateSizeMedianBytes: statMedian(sizes),
})
}
return resources
}

// statMax/statMean/statMedian operate on a slice already sorted ascending.
func statMax(sortedSizes []int64) int64 {
return sortedSizes[len(sortedSizes)-1]
}

func statMean(sortedSizes []int64) int64 {
var total int64
for _, s := range sortedSizes {
total += s
}
return total / int64(len(sortedSizes))
}

func statMedian(sortedSizes []int64) int64 {
return sortedSizes[(len(sortedSizes)-1)/2]
}

// directStateFileSize returns the size in bytes of the direct engine's
// resources.json via a single stat (no read/parse), or 0 if it can't be stat'd.
func directStateFileSize(ctx context.Context, b *bundle.Bundle) int64 {
_, localPath := b.StateFilenameDirect(ctx)
info, err := os.Stat(localPath)
if err != nil {
log.Debugf(ctx, "resources-metadata telemetry: cannot stat direct state at %s: %s", localPath, err)
return 0
}
return info.Size()
}
55 changes: 55 additions & 0 deletions bundle/phases/resources_metadata_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package phases

import (
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/statemgmt/resourcestate"
"github.com/databricks/cli/libs/telemetry/protos"
"github.com/stretchr/testify/assert"
)

func TestResourceMetadataFromState_GroupsByType(t *testing.T) {
state := resourcestate.ExportedResourcesMap{
"resources.jobs.foo": {StateSizeBytes: 20},
"resources.jobs.bar": {StateSizeBytes: 10},
"resources.jobs.foo.permissions": {StateSizeBytes: 2},
"resources.pipelines.qux": {StateSizeBytes: 14},
}

got := resourceMetadataFromState(state)

// Sorted by resource type. Sub-resources (permissions) group under
// "<parent>.permissions" per config.GetResourceTypeFromKey. jobs median is
// the lower-middle of sorted [10,20] -> index (2-1)/2 = 0 -> 10.
assert.Equal(t, []protos.ResourceMetadata{
{ResourceType: "jobs", Count: 2, StateSizeMaxBytes: 20, StateSizeMeanBytes: 15, StateSizeMedianBytes: 10},
{ResourceType: "jobs.permissions", Count: 1, StateSizeMaxBytes: 2, StateSizeMeanBytes: 2, StateSizeMedianBytes: 2},
{ResourceType: "pipelines", Count: 1, StateSizeMaxBytes: 14, StateSizeMeanBytes: 14, StateSizeMedianBytes: 14},
}, got)
}

func TestStatHelpers(t *testing.T) {
assert.Equal(t, int64(3), statMax([]int64{1, 2, 3}))
assert.Equal(t, int64(2), statMean([]int64{1, 2, 3}))
assert.Equal(t, int64(2), statMedian([]int64{1, 2, 3}))
// Lower-middle for even count: sorted [1,2,3,4] -> index (4-1)/2 = 1 -> 2.
assert.Equal(t, int64(2), statMedian([]int64{1, 2, 3, 4}))
}

func TestResourceMetadataFromState_SkipsNonResourceKeys(t *testing.T) {
state := resourcestate.ExportedResourcesMap{
"resources.jobs.foo": {StateSizeBytes: 5},
"bogus": {StateSizeBytes: 99},
}
got := resourceMetadataFromState(state)
assert.Equal(t, []protos.ResourceMetadata{
{ResourceType: "jobs", Count: 1, StateSizeMaxBytes: 5, StateSizeMeanBytes: 5, StateSizeMedianBytes: 5},
}, got)
}

func TestCollectResourcesMetadata_NilWhenNoState(t *testing.T) {
// Terraform deploys leave Metrics.ResourceState nil.
b := &bundle.Bundle{}
assert.Nil(t, collectResourcesMetadata(t.Context(), b))
}
2 changes: 2 additions & 0 deletions bundle/phases/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) {
ResourceClusterIDs: clusterIds,
ResourceDashboardIDs: dashboardIds,

ResourcesMetadata: collectResourcesMetadata(ctx, b),

Experimental: &protos.BundleDeployExperimental{
BundleMode: mode,
ConfigurationFileCount: b.Metrics.ConfigurationFileCount,
Expand Down
5 changes: 5 additions & 0 deletions bundle/statemgmt/resourcestate/resourcestate.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ type ResourceState struct {

// For dashboards
ETag string

// Size in bytes of the resource's serialized state blob. Populated by the
// direct engine (len of the JSON stored in resources.json) for deploy
// telemetry; left zero by the terraform path.
StateSizeBytes int
}

// ExportedResourcesMap stores relevant attributes from terraform/direct state for all resources
Expand Down
37 changes: 37 additions & 0 deletions libs/telemetry/protos/bundle_deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ type BundleDeployEvent struct {
ResourceClusterIDs []string `json:"resource_cluster_ids,omitempty"`
ResourceDashboardIDs []string `json:"resource_dashboard_ids,omitempty"`

// Per-resource-type metadata (counts and state-size statistics).
ResourcesMetadata *BundleResourcesMetadata `json:"resources_metadata,omitempty"`

Experimental *BundleDeployExperimental `json:"experimental,omitempty"`
}

Expand Down Expand Up @@ -88,6 +91,40 @@ type BundleDeployExperimental struct {
LocalCacheMeasurementsMs []IntMapEntry `json:"local_cache_measurements_ms,omitempty"`
}

// BundleResourcesMetadata mirrors the universe proto. Per-resource-type
// state-size metadata for one bundle deployment.
//
// Only direct deploys are measured: the direct engine persists each resource's
// state as a JSON blob in resources.json, so sizes are read off directly as
// len(state) — no serialization happens at telemetry time. Terraform stores
// state in a different shape and is not collected (the field is absent there).
type BundleResourcesMetadata struct {
// Always "direct"; terraform deploys do not populate this message.
StateEngine string `json:"state_engine,omitempty"`

// Size in bytes of the direct engine's resources.json state file on disk.
StateFileSizeBytes int64 `json:"state_file_size_bytes,omitempty"`

// One entry per resource type present in the deployment state.
Resources []ResourceMetadata `json:"resources,omitempty"`
}

// ResourceMetadata holds metadata about resources of a single type within one
// bundle deployment.
type ResourceMetadata struct {
// Resource type name: "jobs", "pipelines", "schemas", ...
ResourceType string `json:"resource_type,omitempty"`

// Number of resources of this type tracked in the deployment state.
Count int64 `json:"count,omitempty"`

// State-size statistics across resources of this type, each measured as
// len(state) of the JSON blob stored in resources.json.
StateSizeMaxBytes int64 `json:"state_size_max_bytes,omitempty"`
StateSizeMeanBytes int64 `json:"state_size_mean_bytes,omitempty"`
StateSizeMedianBytes int64 `json:"state_size_median_bytes,omitempty"`
}

type BoolMapEntry struct {
Key string `json:"key,omitempty"`
Value bool `json:"value"`
Expand Down
Loading