Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

### Bug Fixes

* Fall back to the remote runtime implementation when the legacy user namespace cannot be materialized. On Spark Connect runtimes (e.g. shared-access-mode clusters), importing `databricks.sdk.runtime` — which happens when constructing a `WorkspaceClient` on such a cluster — tried to build a legacy `SparkContext` and raised `CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT` at import time. It now logs a warning and falls back to the Spark Connect-compatible remote implementation instead of crashing.
* Cache tokens minted by `DatabricksOidcTokenSource` (Workload Identity Federation / account-wide token federation). Previously a fresh `/oidc/v1/token` exchange was performed on every authenticated API call, adding latency, amplifying transient federation-policy errors, and hitting OIDC token-endpoint rate limits. The token source now reuses the cached token until it is stale or expired, fetching a fresh ID token on each refresh to support rotation.

### Documentation
Expand Down
21 changes: 19 additions & 2 deletions databricks/sdk/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def inner() -> Dict[str, str]:
return None, None


# Internal implementation
# Separated from above for backward compatibility
_use_runtime_namespace = False
try:
# Internal implementation
# Separated from above for backward compatibility
from dbruntime import UserNamespaceInitializer

userNamespaceGlobals = UserNamespaceInitializer.getOrCreate().get_namespace_globals()
Expand All @@ -105,7 +106,23 @@ def inner() -> Dict[str, str]:
continue
_globals[var] = userNamespaceGlobals[var]
is_local_implementation = False
_use_runtime_namespace = True
except ImportError:
# Not running inside a classic Databricks runtime; fall back to the OSS implementation below.
pass
except Exception as e:
# On Spark Connect runtimes (e.g. shared-access-mode clusters), materializing the
# legacy user namespace builds a SparkContext, which is unavailable in remote clients
# and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic
# runtime" and fall back to the OSS/remote implementation below, which is Spark
# Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore
# constructing a WorkspaceClient on such a cluster) raises at import time. The catch
# is broad rather than typed on PySparkRuntimeError so the SDK does not need to import
# pyspark just to narrow the exception type; any other unexpected failure here is also
# safer surfaced as a warning + remote fallback than as a constructor crash.
logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}")

if not _use_runtime_namespace:
# OSS implementation
is_local_implementation = True

Expand Down
62 changes: 62 additions & 0 deletions tests/test_runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Tests for the import-time behavior of ``databricks.sdk.runtime``."""

import sys
import types

import pytest

from databricks.sdk.dbutils import RemoteDbUtils


@pytest.fixture
def spark_connect_runtime(monkeypatch):
"""``dbruntime`` is importable, but materializing the legacy user namespace raises
``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` — the Spark Connect failure mode."""

class _Initializer:
@staticmethod
def getOrCreate():
class _Namespace:
def get_namespace_globals(self):
raise RuntimeError(
"[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are "
"not supported on a Spark Connect cluster. Use spark instead."
)

return _Namespace()

fake = types.ModuleType("dbruntime")
fake.UserNamespaceInitializer = _Initializer
monkeypatch.setitem(sys.modules, "dbruntime", fake)

# The remote fallback constructs ``RemoteDbUtils()``, which initializes a default
# ``Config``; hermetic PAT credentials keep the fallback from failing for unrelated
# auth reasons (see databricks-sdk-py#986).
monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com")
monkeypatch.setenv("DATABRICKS_TOKEN", "test-token")

# Force ``databricks.sdk.runtime`` to re-execute its module body on next import so it
# picks up the fake ``dbruntime``. Earlier tests (e.g. test_notebook_oauth.py) cache a
# fake module here directly via ``sys.modules`` without going through the import
# machinery, which leaves the ``runtime`` attribute on ``databricks.sdk`` unset —
# dropping the cached entry repairs that on the next real import. ``monkeypatch``
# restores the prior value on teardown.
monkeypatch.delitem(sys.modules, "databricks.sdk.runtime", raising=False)


def test_runtime_import_falls_back_on_spark_connect(spark_connect_runtime):
"""Regression for dbt-databricks#1252: import survives the namespace failure."""
import databricks.sdk.runtime as runtime

assert runtime.is_local_implementation is True
assert isinstance(runtime.dbutils, RemoteDbUtils)


def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config):
"""Regression for dbt-databricks#1252: ``WorkspaceClient.__init__`` eagerly builds
dbutils via ``databricks.sdk.runtime`` and must not raise on Spark Connect."""
from databricks.sdk import WorkspaceClient

ws = WorkspaceClient(config=config)

assert ws is not None
Loading