diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index b4e3d1c56..085b4d09a 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,8 @@ ### Bug Fixes +* Fall back to the remote runtime implementation when the legacy user namespace cannot be materialized. On Spark Connect runtimes (e.g. shared-access-mode clusters), importing `databricks.sdk.runtime` — which happens when constructing a `WorkspaceClient` on such a cluster — tried to build a legacy `SparkContext` and raised `CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT` at import time. It now logs a warning and falls back to the Spark Connect-compatible remote implementation instead of crashing. + ### Documentation ### Breaking Changes diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 1b5c40573..4d994d0b5 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -93,9 +93,10 @@ def inner() -> Dict[str, str]: return None, None +# Internal implementation +# Separated from above for backward compatibility +_use_runtime_namespace = False try: - # Internal implementation - # Separated from above for backward compatibility from dbruntime import UserNamespaceInitializer userNamespaceGlobals = UserNamespaceInitializer.getOrCreate().get_namespace_globals() @@ -105,7 +106,20 @@ def inner() -> Dict[str, str]: continue _globals[var] = userNamespaceGlobals[var] is_local_implementation = False + _use_runtime_namespace = True except ImportError: + # Not running inside a classic Databricks runtime; fall back to the OSS implementation below. + pass +except Exception as e: + # On Spark Connect runtimes (e.g. shared-access-mode clusters), materializing the + # legacy user namespace builds a SparkContext, which is unavailable in remote clients + # and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic + # runtime" and fall back to the OSS/remote implementation below, which is Spark + # Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore + # constructing a WorkspaceClient on such a cluster) raises at import time. + logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}") + +if not _use_runtime_namespace: # OSS implementation is_local_implementation = True diff --git a/tests/test_runtime.py b/tests/test_runtime.py new file mode 100644 index 000000000..d54e72bf6 --- /dev/null +++ b/tests/test_runtime.py @@ -0,0 +1,86 @@ +"""Tests for the import-time behavior of the ``databricks.sdk.runtime`` package.""" + +import importlib +import sys +import types + +import pytest + + +class _RemoteClientInitializer: + """Stand-in for ``dbruntime.UserNamespaceInitializer`` on a Spark Connect runtime. + + On a shared-access-mode (Spark Connect) cluster, materializing the legacy user namespace + builds a ``SparkContext``, which is unavailable in remote clients and raises + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``. + """ + + @staticmethod + def getOrCreate(): + class _Namespace: + def get_namespace_globals(self): + raise RuntimeError( + "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are not " + "supported on a Spark Connect cluster. Use spark instead." + ) + + return _Namespace() + + +@pytest.fixture +def spark_connect_runtime(monkeypatch): + """Simulate a Spark Connect runtime: ``dbruntime`` is importable, but materializing the + legacy user namespace raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``.""" + fake = types.ModuleType("dbruntime") + fake.UserNamespaceInitializer = _RemoteClientInitializer + had_dbruntime = "dbruntime" in sys.modules + saved_dbruntime = sys.modules.get("dbruntime") + sys.modules["dbruntime"] = fake + + # The remote fallback constructs ``RemoteDbUtils()``, which initializes a default ``Config``; + # give it hermetic PAT credentials so the fallback itself doesn't fail for unrelated auth + # reasons (see databricks-sdk-py#986). + monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com") + monkeypatch.setenv("DATABRICKS_TOKEN", "test-token") + + saved_runtime = sys.modules.get("databricks.sdk.runtime") + try: + yield + finally: + if had_dbruntime: + sys.modules["dbruntime"] = saved_dbruntime + else: + sys.modules.pop("dbruntime", None) + # Restore ``databricks.sdk.runtime`` to its pre-test state. If it was loaded before this + # test, reload it cleanly while the PAT env is still set; otherwise drop it so it is + # re-imported lazily on next use. + if saved_runtime is None: + sys.modules.pop("databricks.sdk.runtime", None) + else: + importlib.reload(saved_runtime) + + +def test_runtime_import_survives_spark_connect_remote_client(spark_connect_runtime): + """Regression for dbt-databricks#1252: importing ``databricks.sdk.runtime`` on a Spark + Connect runtime must fall back to the remote implementation instead of raising.""" + import databricks.sdk.runtime as runtime + + importlib.reload(runtime) # re-execute the module body with the faked ``dbruntime`` present + + assert runtime.is_local_implementation is True + assert runtime.dbutils is not None + + +def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config): + """End-to-end: constructing a ``WorkspaceClient`` on a Spark Connect runtime must not raise + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` (the actual dbt-databricks#1252 failure, since + ``WorkspaceClient.__init__`` eagerly builds dbutils via ``databricks.sdk.runtime``).""" + import databricks.sdk.runtime as runtime + + importlib.reload(runtime) + + from databricks.sdk import WorkspaceClient + + ws = WorkspaceClient(config=config) + + assert ws is not None