diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 021f27235..c91bb5225 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,7 @@ ### Bug Fixes +* Fall back to the remote runtime implementation when the legacy user namespace cannot be materialized. On Spark Connect runtimes (e.g. shared-access-mode clusters), importing `databricks.sdk.runtime` — which happens when constructing a `WorkspaceClient` on such a cluster — tried to build a legacy `SparkContext` and raised `CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT` at import time. It now logs a warning and falls back to the Spark Connect-compatible remote implementation instead of crashing. * Cache tokens minted by `DatabricksOidcTokenSource` (Workload Identity Federation / account-wide token federation). Previously a fresh `/oidc/v1/token` exchange was performed on every authenticated API call, adding latency, amplifying transient federation-policy errors, and hitting OIDC token-endpoint rate limits. The token source now reuses the cached token until it is stale or expired, fetching a fresh ID token on each refresh to support rotation. ### Documentation diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 1b5c40573..a57104311 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -93,9 +93,10 @@ def inner() -> Dict[str, str]: return None, None +# Internal implementation +# Separated from above for backward compatibility +_use_runtime_namespace = False try: - # Internal implementation - # Separated from above for backward compatibility from dbruntime import UserNamespaceInitializer userNamespaceGlobals = UserNamespaceInitializer.getOrCreate().get_namespace_globals() @@ -105,7 +106,23 @@ def inner() -> Dict[str, str]: continue _globals[var] = userNamespaceGlobals[var] is_local_implementation = False + _use_runtime_namespace = True except ImportError: + # Not running inside a classic Databricks runtime; fall back to the OSS implementation below. + pass +except Exception as e: + # On Spark Connect runtimes (e.g. shared-access-mode clusters), materializing the + # legacy user namespace builds a SparkContext, which is unavailable in remote clients + # and raises CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT. Treat this like "not in a classic + # runtime" and fall back to the OSS/remote implementation below, which is Spark + # Connect-compatible. Without this, importing databricks.sdk.runtime (and therefore + # constructing a WorkspaceClient on such a cluster) raises at import time. The catch + # is broad rather than typed on PySparkRuntimeError so the SDK does not need to import + # pyspark just to narrow the exception type; any other unexpected failure here is also + # safer surfaced as a warning + remote fallback than as a constructor crash. + logger.warning(f"Runtime namespace unavailable, falling back to remote implementation: {e}") + +if not _use_runtime_namespace: # OSS implementation is_local_implementation = True diff --git a/tests/test_runtime.py b/tests/test_runtime.py new file mode 100644 index 000000000..b509f27b8 --- /dev/null +++ b/tests/test_runtime.py @@ -0,0 +1,62 @@ +"""Tests for the import-time behavior of ``databricks.sdk.runtime``.""" + +import sys +import types + +import pytest + +from databricks.sdk.dbutils import RemoteDbUtils + + +@pytest.fixture +def spark_connect_runtime(monkeypatch): + """``dbruntime`` is importable, but materializing the legacy user namespace raises + ``CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT`` — the Spark Connect failure mode.""" + + class _Initializer: + @staticmethod + def getOrCreate(): + class _Namespace: + def get_namespace_globals(self): + raise RuntimeError( + "[CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT] Calls to SparkContext are " + "not supported on a Spark Connect cluster. Use spark instead." + ) + + return _Namespace() + + fake = types.ModuleType("dbruntime") + fake.UserNamespaceInitializer = _Initializer + monkeypatch.setitem(sys.modules, "dbruntime", fake) + + # The remote fallback constructs ``RemoteDbUtils()``, which initializes a default + # ``Config``; hermetic PAT credentials keep the fallback from failing for unrelated + # auth reasons (see databricks-sdk-py#986). + monkeypatch.setenv("DATABRICKS_HOST", "https://test.cloud.databricks.com") + monkeypatch.setenv("DATABRICKS_TOKEN", "test-token") + + # Force ``databricks.sdk.runtime`` to re-execute its module body on next import so it + # picks up the fake ``dbruntime``. Earlier tests (e.g. test_notebook_oauth.py) cache a + # fake module here directly via ``sys.modules`` without going through the import + # machinery, which leaves the ``runtime`` attribute on ``databricks.sdk`` unset — + # dropping the cached entry repairs that on the next real import. ``monkeypatch`` + # restores the prior value on teardown. + monkeypatch.delitem(sys.modules, "databricks.sdk.runtime", raising=False) + + +def test_runtime_import_falls_back_on_spark_connect(spark_connect_runtime): + """Regression for dbt-databricks#1252: import survives the namespace failure.""" + import databricks.sdk.runtime as runtime + + assert runtime.is_local_implementation is True + assert isinstance(runtime.dbutils, RemoteDbUtils) + + +def test_workspace_client_constructs_on_spark_connect(spark_connect_runtime, config): + """Regression for dbt-databricks#1252: ``WorkspaceClient.__init__`` eagerly builds + dbutils via ``databricks.sdk.runtime`` and must not raise on Spark Connect.""" + from databricks.sdk import WorkspaceClient + + ws = WorkspaceClient(config=config) + + assert ws is not None