From b7a21f18d2b54cdc87b4d044bb170511aea4c1d1 Mon Sep 17 00:00:00 2001 From: Divyansh Vijayvergia Date: Tue, 9 Jun 2026 18:13:13 +0000 Subject: [PATCH] Make WorkspaceClient.dbutils lazy via cached_property + tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Regenerated ``databricks/sdk/__init__.py`` with the updated template (imports ``functools.cached_property``, drops the eager ``self._dbutils = _make_dbutils(self._config)`` from ``__init__``, emits ``dbutils`` as a ``@cached_property`` that calls ``_make_dbutils`` on first access). Adds four ``tests/test_client.py`` tests that lock in the contract: - ``dbutils`` is a ``functools.cached_property`` descriptor on ``WorkspaceClient``. - ``WorkspaceClient.__init__`` does not invoke ``_make_dbutils``. - The first ``ws.dbutils`` read invokes ``_make_dbutils`` once; subsequent reads return the cached value without re-invoking. - Constructing ``WorkspaceClient`` on a faked Spark Connect runtime (whose ``dbruntime`` raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` on any namespace materialization) succeeds without importing ``databricks.sdk.runtime`` at all — the durable sidestep of databricks/dbt-databricks#1252. Complements #1469 (which catches the same failure at runtime-module import time as a defense-in-depth fallback). --- databricks/sdk/__init__.py | 9 ++++-- tests/test_client.py | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 4c911e26f..9e3215152 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -3,6 +3,7 @@ import json import logging +from functools import cached_property from typing import List, Optional import databricks.sdk.core as client @@ -359,7 +360,6 @@ def __init__( custom_headers=custom_headers, ) self._config = config.copy() - self._dbutils = _make_dbutils(self._config) self._api_client = client.ApiClient(self._config) serving_endpoints = ServingEndpointsExt(self._api_client) self._access_control = pkg_iam.AccessControlAPI(self._api_client) @@ -511,9 +511,12 @@ def config(self) -> client.Config: def api_client(self) -> client.ApiClient: return self._api_client - @property + @cached_property def dbutils(self) -> dbutils.RemoteDbUtils: - return self._dbutils + # Lazy so consumers that never touch ``dbutils`` (e.g. dbt-databricks) do not pay + # the cost of building it — and, on Spark Connect runtimes, do not hit the legacy + # ``SparkContext`` path that ``databricks.sdk.runtime`` materializes on import. + return _make_dbutils(self._config) @property def access_control(self) -> pkg_iam.AccessControlAPI: diff --git a/tests/test_client.py b/tests/test_client.py index 7eaf308f1..c616662b4 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,3 +1,6 @@ +import functools +import sys +import types from unittest.mock import create_autospec import pytest @@ -15,3 +18,65 @@ def test_autospec_fails_on_setting_unknown_property(): w = create_autospec(WorkspaceClient, spec_set=True) with pytest.raises(AttributeError): w.bar = 1 + + +def test_dbutils_is_a_cached_property(): + """``dbutils`` is a ``functools.cached_property`` so consumers that never read it + pay no build cost — and, on Spark Connect runtimes, never touch the legacy + ``SparkContext`` path that ``databricks.sdk.runtime`` materializes on import.""" + descriptor = WorkspaceClient.__dict__["dbutils"] + assert isinstance(descriptor, functools.cached_property) + + +def test_workspace_client_init_does_not_build_dbutils(config, mocker): + """Constructing a ``WorkspaceClient`` must not invoke ``_make_dbutils``.""" + spy = mocker.patch("databricks.sdk._make_dbutils") + + WorkspaceClient(config=config) + + spy.assert_not_called() + + +def test_dbutils_first_access_builds_exactly_once(config, mocker): + """First read of ``.dbutils`` calls ``_make_dbutils`` once; subsequent reads + return the cached value without re-invoking.""" + sentinel = object() + spy = mocker.patch("databricks.sdk._make_dbutils", return_value=sentinel) + ws = WorkspaceClient(config=config) + + first = ws.dbutils + assert spy.call_count == 1 + assert first is sentinel + + second = ws.dbutils + assert spy.call_count == 1 # still 1 — cached_property short-circuits via __dict__ + assert second is sentinel + + +def test_workspace_client_constructs_on_spark_connect_without_touching_runtime(monkeypatch, config): + """End-to-end Layer 2 win: with the lazy property, ``WorkspaceClient(config=...)`` + on a Spark Connect cluster succeeds without ever importing + ``databricks.sdk.runtime`` — so the legacy ``SparkContext`` materialization that + raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` is never even attempted. + + Faked ``dbruntime`` raises on any namespace materialization; if anything during + construction triggered ``databricks.sdk.runtime``'s import, this test would crash. + """ + + class _Initializer: + @staticmethod + def getOrCreate(): + raise RuntimeError( + "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are not " + "supported on a Spark Connect cluster." + ) + + fake = types.ModuleType("dbruntime") + fake.UserNamespaceInitializer = _Initializer + monkeypatch.setitem(sys.modules, "dbruntime", fake) + monkeypatch.delitem(sys.modules, "databricks.sdk.runtime", raising=False) + + ws = WorkspaceClient(config=config) + + assert ws is not None + assert "databricks.sdk.runtime" not in sys.modules