Skip to content

Commit 54e8040

Browse files
trouzesd-db
andauthored
fix: capability detection for named compute (databricks_compute) (#1355)
Resolves #1354 ### Description Fixes a timing bug where `table_format='iceberg'` (and other capability-gated features) fail with "iceberg requires DBR 14.3+" when the model targets a named compute via `databricks_compute` model config, even though the compute supports the required DBR version. ### Fix One-line change: call `_cache_dbr_capabilities()` eagerly in `_create_fresh_connection()` before reading the cache. This ensures the DBR version is queried and cached at connection creation time rather than waiting for the lazy open(). The call is idempotent (guarded by if `http_path` not in cache), so for default compute with a warm cache it's a no-op. ### Key changes - `dbt/adapters/databricks/connections.py`: Added `self._cache_dbr_capabilities(creds, conn.http_path)` before `conn.capabilities = self._get_capabilities_for_http_path(conn.http_path)` in `_create_fresh_connection()` - `tests/unit/test_connection_manager.py`: Added 3 unit tests validating eager caching for named compute, fallback behavior when cache is empty, and idempotency for default compute with a warm cache ### Checklist - [x] I have run this code in development and it appears to resolve the stated issue - [x] This PR includes tests, or tests are not required/relevant for this PR - [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-databricks next" section. --------- Signed-off-by: trouze <tyler@tylerrouze.com> Co-authored-by: Shubham Dhal <shubham.dhal@databricks.com>
1 parent ea6a384 commit 54e8040

File tree

2 files changed

+79
-1
lines changed

2 files changed

+79
-1
lines changed

dbt/adapters/databricks/connections.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,23 @@ def _cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -
208208
is_sql_warehouse=not is_cluster,
209209
)
210210

211+
@classmethod
212+
def _try_cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None:
213+
"""Like _cache_dbr_capabilities, but only writes to the cache when the version query
214+
actually succeeds. This prevents a failed eager lookup (e.g. credentials_manager not yet
215+
set, cluster still spinning up) from storing a None-version entry that the idempotency
216+
guard in open() would later treat as authoritative, causing all capability checks to
217+
return False.
218+
"""
219+
if http_path not in cls._dbr_capabilities_cache:
220+
is_cluster = is_cluster_http_path(http_path, creds.cluster_id)
221+
dbr_version = cls._query_dbr_version(creds, http_path)
222+
if dbr_version is not None:
223+
cls._dbr_capabilities_cache[http_path] = DBRCapabilities(
224+
dbr_version=dbr_version,
225+
is_sql_warehouse=not is_cluster,
226+
)
227+
211228
def cancel_open(self) -> list[str]:
212229
cancelled = super().cancel_open()
213230
logger.info("Cancelling open python jobs")
@@ -288,6 +305,7 @@ def _create_fresh_connection(
288305
conn.http_path = QueryConfigUtils.get_http_path(query_header_context, creds)
289306
conn.thread_identifier = cast(tuple[int, int], self.get_thread_identifier())
290307
conn._query_header_context = query_header_context
308+
self._try_cache_dbr_capabilities(creds, conn.http_path)
291309
conn.capabilities = self._get_capabilities_for_http_path(conn.http_path)
292310
conn.handle = LazyHandle(self.open)
293311

tests/unit/test_connection_manager.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
from multiprocessing import get_context
22
from unittest.mock import Mock, patch
33

4-
from dbt.adapters.databricks.connections import DatabricksConnectionManager, DatabricksDBTConnection
4+
import pytest
5+
6+
from dbt.adapters.databricks.connections import (
7+
DatabricksConnectionManager,
8+
DatabricksDBTConnection,
9+
)
510
from dbt.adapters.databricks.credentials import DatabricksCredentials
11+
from dbt.adapters.databricks.dbr_capabilities import DBRCapabilities, DBRCapability
612
from dbt.adapters.databricks.utils import is_cluster_http_path
713

814

@@ -88,3 +94,57 @@ def test_open_calls_is_cluster_http_path_for_warehouse(
8894
args, kwargs = mock_from_connection_args.call_args
8995
# Second argument (is_cluster) should be True for warehouse path with cluster_id
9096
assert args[1] is True
97+
98+
99+
class TestTryCacheDbr:
100+
"""Unit tests for _try_cache_dbr_capabilities."""
101+
102+
HTTP_PATH = "sql/protocolv1/o/1234567890123456/cluster-abc"
103+
104+
@pytest.fixture(autouse=True)
105+
def clear_cache(self):
106+
DatabricksConnectionManager._dbr_capabilities_cache = {}
107+
yield
108+
DatabricksConnectionManager._dbr_capabilities_cache = {}
109+
110+
@patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None)
111+
def test_does_not_write_to_cache_when_version_is_none(self, mock_query):
112+
"""When the version query returns None, the cache must not be written.
113+
114+
This prevents a poisoned None entry from blocking the authoritative write in open().
115+
"""
116+
creds = Mock(spec=DatabricksCredentials)
117+
creds.cluster_id = None
118+
119+
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
120+
121+
assert self.HTTP_PATH not in DatabricksConnectionManager._dbr_capabilities_cache
122+
mock_query.assert_called_once_with(creds, self.HTTP_PATH)
123+
124+
@patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4))
125+
def test_writes_to_cache_when_version_is_known(self, mock_query):
126+
"""When the version query succeeds, capabilities are cached correctly."""
127+
creds = Mock(spec=DatabricksCredentials)
128+
creds.cluster_id = None
129+
130+
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
131+
132+
mock_query.assert_called_once_with(creds, self.HTTP_PATH)
133+
caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH)
134+
assert caps is not None
135+
assert caps.dbr_version == (15, 4)
136+
assert not caps.is_sql_warehouse
137+
assert caps.has_capability(DBRCapability.ICEBERG)
138+
139+
@patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4))
140+
def test_skips_write_when_already_cached(self, mock_query):
141+
"""If the path is already in cache, the version query is never made."""
142+
creds = Mock(spec=DatabricksCredentials)
143+
creds.cluster_id = None
144+
existing = DBRCapabilities(dbr_version=(14, 3), is_sql_warehouse=False)
145+
DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] = existing
146+
147+
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
148+
149+
mock_query.assert_not_called()
150+
assert DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] is existing

0 commit comments

Comments
 (0)