Skip to content

Commit b512028

Browse files
authored
Add telemetry event for SSH tunnel connections (#4881)
## Changes Add a `SshTunnelEvent` telemetry event that fires once per `ssh connect` / `ssh proxy` / IDE-mode invocation in `experimental/ssh/internal/client/client.go`. Captured fields: - `compute_type` — dedicated cluster or serverless - `accelerator_type` — GPU accelerator for serverless (empty for dedicated) - `ide_type` — IDE that initiated the connection (e.g. `vscode`, `cursor`), empty for raw SSH / proxy - `client_mode` — `SSH_CLIENT`, `PROXY`, or `IDE` - `is_reconnect` — true when invoked with `ServerMetadata` (a follow-up connection re-using an existing server) - `auto_start_cluster` — whether `--auto-start-cluster` was set - `server_start_time_ms` — time spent inside `ensureSSHServerIsRunning`; 0 on reconnect - `is_success` — set to true only after the connection is fully established New files: - `libs/telemetry/protos/ssh_tunnel.go` — event struct and the `SshTunnelComputeType` / `SshTunnelClientMode` string-enum types, following the same pattern as `BundleMode` in `enum.go`. - Wires the event into `DatabricksCliLog` in `libs/telemetry/protos/frontend_log.go`. Emission is done from a `defer` registered before the first early-return path inside `Run`, so the event fires on every exit (cluster-state failure, binary-upload failure, metadata-parse failure, success) rather than only the original two explicit call sites. ## Why We have no visibility into how the experimental SSH tunnel feature is being used. We want to answer basic questions before promoting it out of experimental: - Dedicated vs serverless split - IDE breakdown (raw SSH client vs proxy vs IDE-initiated, and which IDEs) - Success rate and where failures cluster - How long server startup takes on serverless (cold start signal) The `defer` pattern is deliberate: an event emitted only on the happy path would under-report failures and bias all of the above metrics. ## Tests - `task test-exp-ssh` passes on linux/macos/windows (all green in CI). - `task lint` clean. - Verified locally that the event is recorded on the in-memory telemetry logger for both dedicated and serverless invocations, including failure paths (cluster-state check failure, binary upload failure). No new test files: this repo has no existing tests asserting on telemetry event payloads — `BundleInitEvent` and `BundleDeployEvent` are emitted untested as well (`bundle/phases/telemetry.go`, `libs/template/writer.go`). Adding a fixture-based test only for `SshTunnelEvent` would be inconsistent with the rest of the codebase; that work should land as a separate PR if we want it. No `NEXT_CHANGELOG.md` entry: the feature is under `experimental/` and the change is internal telemetry, not a user-visible behavior change.
1 parent 2f4a983 commit b512028

3 files changed

Lines changed: 91 additions & 0 deletions

File tree

experimental/ssh/internal/client/client.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
"github.com/databricks/cli/libs/auth"
3030
"github.com/databricks/cli/libs/cmdio"
3131
"github.com/databricks/cli/libs/log"
32+
"github.com/databricks/cli/libs/telemetry"
33+
"github.com/databricks/cli/libs/telemetry/protos"
3234
"github.com/databricks/databricks-sdk-go"
3335
"github.com/databricks/databricks-sdk-go/retries"
3436
"github.com/databricks/databricks-sdk-go/service/compute"
@@ -265,6 +267,13 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
265267
}
266268
}
267269

270+
isReconnect := opts.ServerMetadata != ""
271+
var serverStartTimeMs int64
272+
isSuccess := false
273+
defer func() {
274+
logSshTunnelEvent(ctx, opts, isSuccess, isReconnect, serverStartTimeMs)
275+
}()
276+
268277
// Only check cluster state for dedicated clusters
269278
if !opts.IsServerlessMode() {
270279
cmdio.LogString(ctx, "Checking cluster state...")
@@ -311,6 +320,7 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
311320
if err != nil {
312321
return fmt.Errorf("failed to upload ssh-tunnel binaries: %w", err)
313322
}
323+
serverStartTime := time.Now()
314324
userName, serverPort, clusterID, err = ensureSSHServerIsRunning(ctx, client, version, secretScopeName, opts)
315325
if err != nil {
316326
if opts.IsServerlessMode() && opts.Accelerator == "" && errors.Is(err, errServerMetadata) {
@@ -319,6 +329,7 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
319329
}
320330
return fmt.Errorf("failed to ensure that ssh server is running: %w", err)
321331
}
332+
serverStartTimeMs = time.Since(serverStartTime).Milliseconds()
322333
} else {
323334
// Metadata format: "<user_name>,<port>,<cluster_id>"
324335
metadata := strings.Split(opts.ServerMetadata, ",")
@@ -355,6 +366,8 @@ func Run(ctx context.Context, client *databricks.WorkspaceClient, opts ClientOpt
355366
cmdio.LogString(ctx, "Connected!")
356367
}
357368

369+
isSuccess = true
370+
358371
if opts.ProxyMode {
359372
return runSSHProxy(ctx, client, serverPort, clusterID, opts)
360373
} else if opts.IDE != "" {
@@ -728,3 +741,33 @@ func ensureSSHServerIsRunning(ctx context.Context, client *databricks.WorkspaceC
728741

729742
return userName, serverPort, effectiveClusterID, nil
730743
}
744+
745+
func logSshTunnelEvent(ctx context.Context, opts ClientOptions, isSuccess, isReconnect bool, serverStartTimeMs int64) {
746+
computeType := protos.SshTunnelComputeTypeDedicated
747+
if opts.IsServerlessMode() {
748+
computeType = protos.SshTunnelComputeTypeServerless
749+
}
750+
751+
var clientMode protos.SshTunnelClientMode
752+
switch {
753+
case opts.ProxyMode:
754+
clientMode = protos.SshTunnelClientModeProxy
755+
case opts.IDE != "":
756+
clientMode = protos.SshTunnelClientModeIDE
757+
default:
758+
clientMode = protos.SshTunnelClientModeSSH
759+
}
760+
761+
telemetry.Log(ctx, protos.DatabricksCliLog{
762+
SshTunnelEvent: &protos.SshTunnelEvent{
763+
ComputeType: computeType,
764+
AcceleratorType: opts.Accelerator,
765+
IdeType: opts.IDE,
766+
ClientMode: clientMode,
767+
IsReconnect: isReconnect,
768+
AutoStartCluster: opts.AutoStartCluster,
769+
ServerStartTimeMs: serverStartTimeMs,
770+
IsSuccess: isSuccess,
771+
},
772+
})
773+
}

libs/telemetry/protos/frontend_log.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ type DatabricksCliLog struct {
1919
CliTestEvent *CliTestEvent `json:"cli_test_event,omitempty"`
2020
BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"`
2121
BundleDeployEvent *BundleDeployEvent `json:"bundle_deploy_event,omitempty"`
22+
SshTunnelEvent *SshTunnelEvent `json:"ssh_tunnel_event,omitempty"`
2223
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package protos
2+
3+
type SshTunnelComputeType string
4+
5+
const (
6+
SshTunnelComputeTypeUnspecified SshTunnelComputeType = "TYPE_UNSPECIFIED"
7+
SshTunnelComputeTypeDedicated SshTunnelComputeType = "DEDICATED"
8+
SshTunnelComputeTypeServerless SshTunnelComputeType = "SERVERLESS"
9+
)
10+
11+
type SshTunnelClientMode string
12+
13+
const (
14+
SshTunnelClientModeUnspecified SshTunnelClientMode = "TYPE_UNSPECIFIED"
15+
SshTunnelClientModeSSH SshTunnelClientMode = "SSH_CLIENT"
16+
SshTunnelClientModeProxy SshTunnelClientMode = "PROXY"
17+
SshTunnelClientModeIDE SshTunnelClientMode = "IDE"
18+
)
19+
20+
// SshTunnelEvent is emitted when a user establishes an SSH tunnel connection
21+
// via the Databricks CLI.
22+
type SshTunnelEvent struct {
23+
// Type of compute: dedicated cluster or serverless.
24+
ComputeType SshTunnelComputeType `json:"compute_type,omitempty"`
25+
26+
// GPU accelerator type for serverless compute.
27+
AcceleratorType string `json:"accelerator_type,omitempty"`
28+
29+
// IDE that initiated the connection (e.g., "vscode", "cursor").
30+
IdeType string `json:"ide_type,omitempty"`
31+
32+
// How the client is used: SSH client, proxy mode, or IDE mode.
33+
ClientMode SshTunnelClientMode `json:"client_mode,omitempty"`
34+
35+
// Whether this is a reconnection to an existing session.
36+
IsReconnect bool `json:"is_reconnect,omitempty"`
37+
38+
// Whether the cluster was auto-started by the CLI.
39+
AutoStartCluster bool `json:"auto_start_cluster,omitempty"`
40+
41+
// Time in milliseconds spent starting the SSH server.
42+
// Zero if server was already running.
43+
ServerStartTimeMs int64 `json:"server_start_time_ms"`
44+
45+
// Whether the connection was successful.
46+
IsSuccess bool `json:"is_success,omitempty"`
47+
}

0 commit comments

Comments
 (0)