Skip to content

Commit c9c9456

Browse files
authored
fix: prevent _cache_dbr_capabilities from caching None-version entries (#1398) (#1414)
Fixes #1398. `_cache_dbr_capabilities` was caching `DBRCapabilities(dbr_version=None)` when `_query_dbr_version()` failed, and the idempotency guard then blocked every retry — silently disabling all version-gated features for the life of the process. Fix: apply the same `None`-guard already used by the sibling `_try_cache_dbr_capabilities`, and switch the downstream read in `open()` to `.get(..., DBRCapabilities())` so a now-possibly-missing entry doesn't `KeyError`. Failure-case behavior is unchanged for the current connection; subsequent `open()` calls can now retry instead of staying stuck. Follow-ups tracked in #1413. ## Test plan - [x] `hatch run unit tests/unit/test_connection_manager.py` — 745 pass. - [x] New `TestCacheDbr` regression mirrors `TestTryCacheDbr` + a `test_retry_succeeds_after_transient_failure` case specific to #1398 (verified it fails on `main`, passes with the fix). - [ ] CI functional run.
1 parent ff4f80b commit c9c9456

3 files changed

Lines changed: 36 additions & 31 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
- Validate relation identifier length at creation time and raise a clear error when it exceeds Databricks' 255-character limit ([#1309](https://github.com/databricks/dbt-databricks/issues/1309))
1010
- Fix spurious `MicrobatchConcurrency` behavior-change warning firing on every run regardless of whether the project contained microbatch models ([#1406](https://github.com/databricks/dbt-databricks/issues/1406))
11+
- Fix DBR capability cache being permanently poisoned by a transient version-query failure ([#1398](https://github.com/databricks/dbt-databricks/issues/1398))
1112

1213
## dbt-databricks 1.11.7 (Apr 17, 2026)
1314

dbt/adapters/databricks/connections.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,9 @@ def is_cluster(self) -> bool:
168168
databricks_conn = cast(DatabricksDBTConnection, conn)
169169
return is_cluster_http_path(databricks_conn.http_path, conn.credentials.cluster_id)
170170

171-
def _get_capabilities_for_http_path(self, http_path: str) -> DBRCapabilities:
172-
return self._dbr_capabilities_cache.get(http_path, DBRCapabilities())
171+
@classmethod
172+
def _get_capabilities_for_http_path(cls, http_path: str) -> DBRCapabilities:
173+
return cls._dbr_capabilities_cache.get(http_path, DBRCapabilities())
173174

174175
@classmethod
175176
def _query_dbr_version(
@@ -193,29 +194,19 @@ def _query_dbr_version(
193194
result = cursor.fetchone()
194195
if result:
195196
return SqlUtils.extract_dbr_version(result[1])
196-
except Exception:
197-
pass
197+
except Exception as e:
198+
logger.debug(f"Failed to query DBR version for http_path={http_path}: {e}")
198199

199200
return None
200201

201202
@classmethod
202203
def _cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None:
203-
if http_path not in cls._dbr_capabilities_cache:
204-
is_cluster = is_cluster_http_path(http_path, creds.cluster_id)
205-
dbr_version = cls._query_dbr_version(creds, http_path)
204+
"""Cache DBR capabilities for an http_path on first successful version query.
206205
207-
cls._dbr_capabilities_cache[http_path] = DBRCapabilities(
208-
dbr_version=dbr_version,
209-
is_sql_warehouse=not is_cluster,
210-
)
211-
212-
@classmethod
213-
def _try_cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None:
214-
"""Like _cache_dbr_capabilities, but only writes to the cache when the version query
215-
actually succeeds. This prevents a failed eager lookup (e.g. credentials_manager not yet
216-
set, cluster still spinning up) from storing a None-version entry that the idempotency
217-
guard in open() would later treat as authoritative, causing all capability checks to
218-
return False.
206+
Only writes when the version query succeeds: a failed lookup (credentials_manager
207+
not yet set, cluster spinning up) must not store a None-version entry, since the
208+
idempotency guard would then treat it as authoritative and disable every
209+
capability check for the rest of the process.
219210
"""
220211
if http_path not in cls._dbr_capabilities_cache:
221212
is_cluster = is_cluster_http_path(http_path, creds.cluster_id)
@@ -306,7 +297,7 @@ def _create_fresh_connection(
306297
conn.http_path = QueryConfigUtils.get_http_path(query_header_context, creds)
307298
conn.thread_identifier = cast(tuple[int, int], self.get_thread_identifier())
308299
conn._query_header_context = query_header_context
309-
self._try_cache_dbr_capabilities(creds, conn.http_path)
300+
self._cache_dbr_capabilities(creds, conn.http_path)
310301
conn.capabilities = self._get_capabilities_for_http_path(conn.http_path)
311302
conn.handle = LazyHandle(self.open)
312303

@@ -507,9 +498,9 @@ def connect() -> DatabricksHandle:
507498
if conn:
508499
databricks_connection.session_id = conn.session_id
509500
cls._cache_dbr_capabilities(creds, databricks_connection.http_path)
510-
databricks_connection.capabilities = cls._dbr_capabilities_cache[
501+
databricks_connection.capabilities = cls._get_capabilities_for_http_path(
511502
databricks_connection.http_path
512-
]
503+
)
513504
return conn
514505
else:
515506
raise DbtDatabaseError("Failed to create connection")

tests/unit/test_connection_manager.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ def test_open_calls_is_cluster_http_path_for_warehouse(
9696
assert args[1] is True
9797

9898

99-
class TestTryCacheDbr:
100-
"""Unit tests for _try_cache_dbr_capabilities."""
99+
class TestCacheDbr:
100+
"""Unit tests for _cache_dbr_capabilities."""
101101

102102
HTTP_PATH = "sql/protocolv1/o/1234567890123456/cluster-abc"
103103

@@ -109,14 +109,11 @@ def clear_cache(self):
109109

110110
@patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None)
111111
def test_does_not_write_to_cache_when_version_is_none(self, mock_query):
112-
"""When the version query returns None, the cache must not be written.
113-
114-
This prevents a poisoned None entry from blocking the authoritative write in open().
115-
"""
112+
"""Regression for #1398: a None version query result must not poison the cache."""
116113
creds = Mock(spec=DatabricksCredentials)
117114
creds.cluster_id = None
118115

119-
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
116+
DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH)
120117

121118
assert self.HTTP_PATH not in DatabricksConnectionManager._dbr_capabilities_cache
122119
mock_query.assert_called_once_with(creds, self.HTTP_PATH)
@@ -127,7 +124,7 @@ def test_writes_to_cache_when_version_is_known(self, mock_query):
127124
creds = Mock(spec=DatabricksCredentials)
128125
creds.cluster_id = None
129126

130-
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
127+
DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH)
131128

132129
mock_query.assert_called_once_with(creds, self.HTTP_PATH)
133130
caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH)
@@ -144,7 +141,23 @@ def test_skips_write_when_already_cached(self, mock_query):
144141
existing = DBRCapabilities(dbr_version=(14, 3), is_sql_warehouse=False)
145142
DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] = existing
146143

147-
DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH)
144+
DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH)
148145

149146
mock_query.assert_not_called()
150147
assert DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] is existing
148+
149+
def test_retry_succeeds_after_transient_failure(self):
150+
"""Regression for #1398: after a transient None result, the next call must re-query."""
151+
creds = Mock(spec=DatabricksCredentials)
152+
creds.cluster_id = None
153+
154+
with patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None):
155+
DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH)
156+
157+
with patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(16, 2)):
158+
DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH)
159+
160+
caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH)
161+
assert caps is not None
162+
assert caps.dbr_version == (16, 2)
163+
assert caps.has_capability(DBRCapability.ICEBERG)

0 commit comments

Comments
 (0)