From 17e199735abff7f141e03b4a3ac89467b381f6c8 Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Mon, 8 Jun 2026 14:08:28 +0530 Subject: [PATCH 1/4] Fall back to remote runtime on Spark Connect when the legacy namespace is unavailable On a Databricks shared-access-mode (Spark Connect) cluster, importing databricks.sdk.runtime (which happens when WorkspaceClient.__init__ eagerly builds dbutils) materializes a legacy SparkContext via UserNamespaceInitializer.get_namespace_globals() and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT (a PySparkRuntimeError, not ImportError). The surrounding 'except ImportError' does not catch it, so the error escapes the import and crashes WorkspaceClient construction. Treat a namespace-materialization failure the same as 'not in a classic runtime': log a warning and fall back to the existing OSS/remote implementation, which is Spark Connect-compatible (DatabricksSession + RemoteDbUtils). Fixes #1463 Signed-off-by: Shubham Dhal --- NEXT_CHANGELOG.md | 2 + databricks/sdk/runtime/__init__.py | 18 ++++++- tests/test_runtime.py | 86 ++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 tests/test_runtime.py diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 32901b0cd..4d0a6a024 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -10,6 +10,8 @@ ### Bug Fixes +* Fall back to the remote runtime implementation when the legacy user namespace cannot be materialized. On Spark Connect runtimes (e.g. shared-access-mode clusters), importing `databricks.sdk.runtime` — which happens when constructing a `WorkspaceClient` on such a cluster — tried to build a legacy `SparkContext` and raised `CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT` at import time. It now logs a warning and falls back to the Spark Connect-compatible remote implementation instead of crashing. + ### Documentation ### Breaking Changes diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 1b5c40573..4d994d0b5 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -93,9 +93,10 @@ def inner() -> Dict[str, str]: return None, None +# Internal implementation +# Separated from above for backward compatibility +_use_runtime_namespace = False try: - # Internal implementation - # Separated from above for backward compatibility from dbruntime import UserNamespaceInitializer userNamespaceGlobals = UserNamespaceInitializer.getOrCreate().get_namespace_globals() @@ -105,7 +106,20 @@ def inner() -> Dict[str, str]: continue _globals[var] = userNamespaceGlobals[var] is_local_implementation = False + _use_runtime_namespace = True except ImportError: + # Not running inside a classic Databricks runtime; fall back to the OSS implementation below. + pass +except Exception as e: + # On Spark Connect runtimes (e.g. shared-access-mode clusters), materializing the + # legacy user namespace builds a SparkContext, which is unavailable in remote clients + # and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic + # runtime" and fall back to the OSS/remote implementation below, which is Spark + # Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore + # constructing a WorkspaceClient on such a cluster) raises at import time. + logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}") + +if not _use_runtime_namespace: # OSS implementation is_local_implementation = True diff --git a/tests/test_runtime.py b/tests/test_runtime.py new file mode 100644 index 000000000..d54e72bf6 --- /dev/null +++ b/tests/test_runtime.py @@ -0,0 +1,86 @@ +"""Tests for the import-time behavior of the ``databricks.sdk.runtime`` package.""" + +import importlib +import sys +import types + +import pytest + + +class _RemoteClientInitializer: + """Stand-in for ``dbruntime.UserNamespaceInitializer`` on a Spark Connect runtime. + + On a shared-access-mode (Spark Connect) cluster, materializing the legacy user namespace + builds a ``SparkContext``, which is unavailable in remote clients and raises + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``. + """ + + @staticmethod + def getOrCreate(): + class _Namespace: + def get_namespace_globals(self): + raise RuntimeError( + "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are not " + "supported on a Spark Connect cluster. Use spark instead." + ) + + return _Namespace() + + +@pytest.fixture +def spark_connect_runtime(monkeypatch): + """Simulate a Spark Connect runtime: ``dbruntime`` is importable, but materializing the + legacy user namespace raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``.""" + fake = types.ModuleType("dbruntime") + fake.UserNamespaceInitializer = _RemoteClientInitializer + had_dbruntime = "dbruntime" in sys.modules + saved_dbruntime = sys.modules.get("dbruntime") + sys.modules["dbruntime"] = fake + + # The remote fallback constructs ``RemoteDbUtils()``, which initializes a default ``Config``; + # give it hermetic PAT credentials so the fallback itself doesn't fail for unrelated auth + # reasons (see databricks-sdk-py#986). + monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com") + monkeypatch.setenv("DATABRICKS_TOKEN", "test-token") + + saved_runtime = sys.modules.get("databricks.sdk.runtime") + try: + yield + finally: + if had_dbruntime: + sys.modules["dbruntime"] = saved_dbruntime + else: + sys.modules.pop("dbruntime", None) + # Restore ``databricks.sdk.runtime`` to its pre-test state. If it was loaded before this + # test, reload it cleanly while the PAT env is still set; otherwise drop it so it is + # re-imported lazily on next use. + if saved_runtime is None: + sys.modules.pop("databricks.sdk.runtime", None) + else: + importlib.reload(saved_runtime) + + +def test_runtime_import_survives_spark_connect_remote_client(spark_connect_runtime): + """Regression for dbt-databricks#1252: importing ``databricks.sdk.runtime`` on a Spark + Connect runtime must fall back to the remote implementation instead of raising.""" + import databricks.sdk.runtime as runtime + + importlib.reload(runtime) # re-execute the module body with the faked ``dbruntime`` present + + assert runtime.is_local_implementation is True + assert runtime.dbutils is not None + + +def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config): + """End-to-end: constructing a ``WorkspaceClient`` on a Spark Connect runtime must not raise + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` (the actual dbt-databricks#1252 failure, since + ``WorkspaceClient.__init__`` eagerly builds dbutils via ``databricks.sdk.runtime``).""" + import databricks.sdk.runtime as runtime + + importlib.reload(runtime) + + from databricks.sdk import WorkspaceClient + + ws = WorkspaceClient(config=config) + + assert ws is not None From 61cc9350ef100dbe8dd0588a25378a12112b9082 Mon Sep 17 00:00:00 2001 From: Divyansh Vijayvergia Date: Tue, 9 Jun 2026 15:05:27 +0000 Subject: [PATCH 2/4] runtime: justify broad except in Spark Connect fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sentence to the inline comment explaining why the catch is broad rather than typed on PySparkRuntimeError — avoids importing pyspark at SDK import time and keeps unexpected runtime-namespace errors surfaced as a warning + safe fallback instead of a constructor crash. --- databricks/sdk/runtime/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 4d994d0b5..a57104311 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -116,7 +116,10 @@ def inner() -> Dict[str, str]: # and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic # runtime" and fall back to the OSS/remote implementation below, which is Spark # Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore - # constructing a WorkspaceClient on such a cluster) raises at import time. + # constructing a WorkspaceClient on such a cluster) raises at import time. The catch + # is broad rather than typed on PySparkRuntimeError so the SDK does not need to import + # pyspark just to narrow the exception type; any other unexpected failure here is also + # safer surfaced as a warning + remote fallback than as a constructor crash. logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}") if not _use_runtime_namespace: From 11fdf3f2d73eb4705b5ecc71c8e00e0a5b81e9a6 Mon Sep 17 00:00:00 2001 From: Divyansh Vijayvergia Date: Tue, 9 Jun 2026 15:19:21 +0000 Subject: [PATCH 3/4] tests: tighten test_runtime.py to match repo idioms Use monkeypatch.setitem for the sys.modules injection (auto-teardown instead of manual save/restore), move the runtime reload into the fixture so test bodies stay focused on the assertion, inline the fake initializer, and strengthen the first assertion to isinstance( RemoteDbUtils) so it explicitly proves the Spark Connect fallback path was taken rather than just that some dbutils exists. --- tests/test_runtime.py | 95 ++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 59 deletions(-) diff --git a/tests/test_runtime.py b/tests/test_runtime.py index d54e72bf6..48002b3d3 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -1,4 +1,4 @@ -"""Tests for the import-time behavior of the ``databricks.sdk.runtime`` package.""" +"""Tests for the import-time behavior of ``databricks.sdk.runtime``.""" import importlib import sys @@ -6,79 +6,56 @@ import pytest +from databricks.sdk.dbutils import RemoteDbUtils -class _RemoteClientInitializer: - """Stand-in for ``dbruntime.UserNamespaceInitializer`` on a Spark Connect runtime. - On a shared-access-mode (Spark Connect) cluster, materializing the legacy user namespace - builds a ``SparkContext``, which is unavailable in remote clients and raises - ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``. - """ - - @staticmethod - def getOrCreate(): - class _Namespace: - def get_namespace_globals(self): - raise RuntimeError( - "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are not " - "supported on a Spark Connect cluster. Use spark instead." - ) +@pytest.fixture +def spark_connect_runtime(monkeypatch): + """``dbruntime`` is importable, but materializing the legacy user namespace raises + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` — the Spark Connect failure mode.""" - return _Namespace() + class _Initializer: + @staticmethod + def getOrCreate(): + class _Namespace: + def get_namespace_globals(self): + raise RuntimeError( + "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are " + "not supported on a Spark Connect cluster. Use spark instead." + ) + return _Namespace() -@pytest.fixture -def spark_connect_runtime(monkeypatch): - """Simulate a Spark Connect runtime: ``dbruntime`` is importable, but materializing the - legacy user namespace raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``.""" fake = types.ModuleType("dbruntime") - fake.UserNamespaceInitializer = _RemoteClientInitializer - had_dbruntime = "dbruntime" in sys.modules - saved_dbruntime = sys.modules.get("dbruntime") - sys.modules["dbruntime"] = fake - - # The remote fallback constructs ``RemoteDbUtils()``, which initializes a default ``Config``; - # give it hermetic PAT credentials so the fallback itself doesn't fail for unrelated auth - # reasons (see databricks-sdk-py#986). + fake.UserNamespaceInitializer = _Initializer + monkeypatch.setitem(sys.modules, "dbruntime", fake) + + # The remote fallback constructs ``RemoteDbUtils()``, which initializes a default + # ``Config``; hermetic PAT credentials keep the fallback from failing for unrelated + # auth reasons (see databricks-sdk-py#986). monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com") monkeypatch.setenv("DATABRICKS_TOKEN", "test-token") - saved_runtime = sys.modules.get("databricks.sdk.runtime") - try: - yield - finally: - if had_dbruntime: - sys.modules["dbruntime"] = saved_dbruntime - else: - sys.modules.pop("dbruntime", None) - # Restore ``databricks.sdk.runtime`` to its pre-test state. If it was loaded before this - # test, reload it cleanly while the PAT env is still set; otherwise drop it so it is - # re-imported lazily on next use. - if saved_runtime is None: - sys.modules.pop("databricks.sdk.runtime", None) - else: - importlib.reload(saved_runtime) - - -def test_runtime_import_survives_spark_connect_remote_client(spark_connect_runtime): - """Regression for dbt-databricks#1252: importing ``databricks.sdk.runtime`` on a Spark - Connect runtime must fall back to the remote implementation instead of raising.""" - import databricks.sdk.runtime as runtime + # Re-execute ``databricks.sdk.runtime``'s module body with the fake ``dbruntime`` in + # place, then restore on teardown by reloading once more without it. + import databricks.sdk.runtime - importlib.reload(runtime) # re-execute the module body with the faked ``dbruntime`` present - - assert runtime.is_local_implementation is True - assert runtime.dbutils is not None + importlib.reload(databricks.sdk.runtime) + yield + importlib.reload(databricks.sdk.runtime) -def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config): - """End-to-end: constructing a ``WorkspaceClient`` on a Spark Connect runtime must not raise - ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` (the actual dbt-databricks#1252 failure, since - ``WorkspaceClient.__init__`` eagerly builds dbutils via ``databricks.sdk.runtime``).""" +def test_runtime_import_falls_back_on_spark_connect(spark_connect_runtime): + """Regression for dbt-databricks#1252: import survives the namespace failure.""" import databricks.sdk.runtime as runtime - importlib.reload(runtime) + assert runtime.is_local_implementation is True + assert isinstance(runtime.dbutils, RemoteDbUtils) + +def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config): + """Regression for dbt-databricks#1252: ``WorkspaceClient.__init__`` eagerly builds + dbutils via ``databricks.sdk.runtime`` and must not raise on Spark Connect.""" from databricks.sdk import WorkspaceClient ws = WorkspaceClient(config=config) From 655a577e8fdc5d5b298c0fedc66d95de38761bb7 Mon Sep 17 00:00:00 2001 From: Divyansh Vijayvergia Date: Tue, 9 Jun 2026 16:44:58 +0000 Subject: [PATCH 4/4] tests: fix test_runtime.py against polluted sys.modules from earlier tests test_notebook_oauth.py caches a fake ``databricks.sdk.runtime`` directly in ``sys.modules`` without going through the import machinery, which leaves the ``runtime`` attribute on ``databricks.sdk`` unset. The previous fixture's ``import databricks.sdk.runtime`` then hit the cached fake (skipping the loader), and the follow-up ``importlib.reload( databricks.sdk.runtime)`` died with AttributeError when CI happened to run test_notebook_oauth.py first. Drop the eager import + reload from the fixture; just delitem the stale ``sys.modules`` entry via monkeypatch so the next ``import`` in the test body triggers a fresh load (which correctly sets both ``sys.modules`` and the parent attribute). Verified locally that the suite passes both in isolation and when ordered after test_notebook_oauth.py. --- tests/test_runtime.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/test_runtime.py b/tests/test_runtime.py index 48002b3d3..b509f27b8 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -1,6 +1,5 @@ """Tests for the import-time behavior of ``databricks.sdk.runtime``.""" -import importlib import sys import types @@ -36,13 +35,13 @@ def get_namespace_globals(self): monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com") monkeypatch.setenv("DATABRICKS_TOKEN", "test-token") - # Re-execute ``databricks.sdk.runtime``'s module body with the fake ``dbruntime`` in - # place, then restore on teardown by reloading once more without it. - import databricks.sdk.runtime - - importlib.reload(databricks.sdk.runtime) - yield - importlib.reload(databricks.sdk.runtime) + # Force ``databricks.sdk.runtime`` to re-execute its module body on next import so it + # picks up the fake ``dbruntime``. Earlier tests (e.g. test_notebook_oauth.py) cache a + # fake module here directly via ``sys.modules`` without going through the import + # machinery, which leaves the ``runtime`` attribute on ``databricks.sdk`` unset — + # dropping the cached entry repairs that on the next real import. ``monkeypatch`` + # restores the prior value on teardown. + monkeypatch.delitem(sys.modules, "databricks.sdk.runtime", raising=False) def test_runtime_import_falls_back_on_spark_connect(spark_connect_runtime):