Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

### Bug Fixes

* Fall back to the remote runtime implementation when the legacy user namespace cannot be materialized. On Spark Connect runtimes (e.g. shared-access-mode clusters), importing `databricks.sdk.runtime` — which happens when constructing a `WorkspaceClient` on such a cluster — tried to build a legacy `SparkContext` and raised `CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT` at import time. It now logs a warning and falls back to the Spark Connect-compatible remote implementation instead of crashing.

### Documentation

### Breaking Changes
Expand Down
18 changes: 16 additions & 2 deletions databricks/sdk/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def inner() -> Dict[str, str]:
return None, None


# Internal implementation
# Separated from above for backward compatibility
_use_runtime_namespace = False
try:
# Internal implementation
# Separated from above for backward compatibility
from dbruntime import UserNamespaceInitializer

userNamespaceGlobals = UserNamespaceInitializer.getOrCreate().get_namespace_globals()
Expand All @@ -105,7 +106,20 @@ def inner() -> Dict[str, str]:
continue
_globals[var] = userNamespaceGlobals[var]
is_local_implementation = False
_use_runtime_namespace = True
except ImportError:
# Not running inside a classic Databricks runtime; fall back to the OSS implementation below.
pass
except Exception as e:
# On Spark Connect runtimes (e.g. shared-access-mode clusters), materializing the
# legacy user namespace builds a SparkContext, which is unavailable in remote clients
# and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic
# runtime" and fall back to the OSS/remote implementation below, which is Spark
# Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore
# constructing a WorkspaceClient on such a cluster) raises at import time.
logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}")

if not _use_runtime_namespace:
# OSS implementation
is_local_implementation = True

Expand Down
86 changes: 86 additions & 0 deletions tests/test_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Tests for the import-time behavior of the ``databricks.sdk.runtime`` package."""

import importlib
import sys
import types

import pytest


class _RemoteClientInitializer:
"""Stand-in for ``dbruntime.UserNamespaceInitializer`` on a Spark Connect runtime.

On a shared-access-mode (Spark Connect) cluster, materializing the legacy user namespace
builds a ``SparkContext``, which is unavailable in remote clients and raises
``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``.
"""

@staticmethod
def getOrCreate():
class _Namespace:
def get_namespace_globals(self):
raise RuntimeError(
"[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are not "
"supported on a Spark Connect cluster. Use spark instead."
)

return _Namespace()


@pytest.fixture
def spark_connect_runtime(monkeypatch):
"""Simulate a Spark Connect runtime: ``dbruntime`` is importable, but materializing the
legacy user namespace raises ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT``."""
fake = types.ModuleType("dbruntime")
fake.UserNamespaceInitializer = _RemoteClientInitializer
had_dbruntime = "dbruntime" in sys.modules
saved_dbruntime = sys.modules.get("dbruntime")
sys.modules["dbruntime"] = fake

# The remote fallback constructs ``RemoteDbUtils()``, which initializes a default ``Config``;
# give it hermetic PAT credentials so the fallback itself doesn't fail for unrelated auth
# reasons (see databricks-sdk-py#986).
monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com")
monkeypatch.setenv("DATABRICKS_TOKEN", "test-token")

saved_runtime = sys.modules.get("databricks.sdk.runtime")
try:
yield
finally:
if had_dbruntime:
sys.modules["dbruntime"] = saved_dbruntime
else:
sys.modules.pop("dbruntime", None)
# Restore ``databricks.sdk.runtime`` to its pre-test state. If it was loaded before this
# test, reload it cleanly while the PAT env is still set; otherwise drop it so it is
# re-imported lazily on next use.
if saved_runtime is None:
sys.modules.pop("databricks.sdk.runtime", None)
else:
importlib.reload(saved_runtime)


def test_runtime_import_survives_spark_connect_remote_client(spark_connect_runtime):
"""Regression for dbt-databricks#1252: importing ``databricks.sdk.runtime`` on a Spark
Connect runtime must fall back to the remote implementation instead of raising."""
import databricks.sdk.runtime as runtime

importlib.reload(runtime) # re-execute the module body with the faked ``dbruntime`` present

assert runtime.is_local_implementation is True
assert runtime.dbutils is not None


def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config):
"""End-to-end: constructing a ``WorkspaceClient`` on a Spark Connect runtime must not raise
``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` (the actual dbt-databricks#1252 failure, since
``WorkspaceClient.__init__`` eagerly builds dbutils via ``databricks.sdk.runtime``)."""
import databricks.sdk.runtime as runtime

importlib.reload(runtime)

from databricks.sdk import WorkspaceClient

ws = WorkspaceClient(config=config)

assert ws is not None
Loading