From 234e34943f8e9e21ddccebf2392a94b20e1395d4 Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Tue, 21 Apr 2026 15:43:42 +0530 Subject: [PATCH 1/4] fix: prevent _cache_dbr_capabilities from caching None-version entries (#1398) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When _query_dbr_version() returns None after a successful connection (e.g. transient failure on `SET spark.databricks.clusterUsageTags.sparkVersion`), _cache_dbr_capabilities would write DBRCapabilities(dbr_version=None) to the class-level cache, and the idempotency guard at the top of the method would block every subsequent re-query — silently disabling every version-gated feature for the life of the process. Apply the same None-guard already present in _try_cache_dbr_capabilities, and switch the downstream read in open() to .get() with a default DBRCapabilities() so the now-possibly-missing entry no longer KeyErrors. --- CHANGELOG.md | 6 +++ dbt/adapters/databricks/connections.py | 16 +++--- tests/unit/test_connection_manager.py | 67 ++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c5767bf9..3aced5d9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## dbt-databricks next (TBD) + +### Fixes + +- Fix DBR capability cache being permanently poisoned by a transient version-query failure ([#1398](https://github.com/databricks/dbt-databricks/issues/1398)) + ## dbt-databricks 1.11.7 (Apr 17, 2026) ### Features diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index f22449bc5..f92aa03d6 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -202,11 +202,11 @@ def _cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) - if http_path not in cls._dbr_capabilities_cache: is_cluster = is_cluster_http_path(http_path, creds.cluster_id) dbr_version = cls._query_dbr_version(creds, http_path) - - cls._dbr_capabilities_cache[http_path] = DBRCapabilities( - dbr_version=dbr_version, - is_sql_warehouse=not is_cluster, - ) + if dbr_version is not None: + cls._dbr_capabilities_cache[http_path] = DBRCapabilities( + dbr_version=dbr_version, + is_sql_warehouse=not is_cluster, + ) @classmethod def _try_cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None: @@ -506,9 +506,9 @@ def connect() -> DatabricksHandle: if conn: databricks_connection.session_id = conn.session_id cls._cache_dbr_capabilities(creds, databricks_connection.http_path) - databricks_connection.capabilities = cls._dbr_capabilities_cache[ - databricks_connection.http_path - ] + databricks_connection.capabilities = cls._dbr_capabilities_cache.get( + databricks_connection.http_path, DBRCapabilities() + ) return conn else: raise DbtDatabaseError("Failed to create connection") diff --git a/tests/unit/test_connection_manager.py b/tests/unit/test_connection_manager.py index 1beed9065..c1b878390 100644 --- a/tests/unit/test_connection_manager.py +++ b/tests/unit/test_connection_manager.py @@ -148,3 +148,70 @@ def test_skips_write_when_already_cached(self, mock_query): mock_query.assert_not_called() assert DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] is existing + + +class TestCacheDbr: + """Unit tests for _cache_dbr_capabilities.""" + + HTTP_PATH = "sql/protocolv1/o/1234567890123456/cluster-abc" + + @pytest.fixture(autouse=True) + def clear_cache(self): + DatabricksConnectionManager._dbr_capabilities_cache = {} + yield + DatabricksConnectionManager._dbr_capabilities_cache = {} + + @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None) + def test_does_not_write_to_cache_when_version_is_none(self, mock_query): + """Regression for #1398: a None version query result must not poison the cache.""" + creds = Mock(spec=DatabricksCredentials) + creds.cluster_id = None + + DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH) + + assert self.HTTP_PATH not in DatabricksConnectionManager._dbr_capabilities_cache + mock_query.assert_called_once_with(creds, self.HTTP_PATH) + + @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4)) + def test_writes_to_cache_when_version_is_known(self, mock_query): + """When the version query succeeds, capabilities are cached correctly.""" + creds = Mock(spec=DatabricksCredentials) + creds.cluster_id = None + + DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH) + + mock_query.assert_called_once_with(creds, self.HTTP_PATH) + caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH) + assert caps is not None + assert caps.dbr_version == (15, 4) + assert not caps.is_sql_warehouse + assert caps.has_capability(DBRCapability.ICEBERG) + + @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4)) + def test_skips_write_when_already_cached(self, mock_query): + """If the path is already in cache, the version query is never made.""" + creds = Mock(spec=DatabricksCredentials) + creds.cluster_id = None + existing = DBRCapabilities(dbr_version=(14, 3), is_sql_warehouse=False) + DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] = existing + + DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH) + + mock_query.assert_not_called() + assert DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] is existing + + def test_retry_succeeds_after_transient_failure(self): + """Regression for #1398: after a transient None result, the next call must re-query.""" + creds = Mock(spec=DatabricksCredentials) + creds.cluster_id = None + + with patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None): + DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH) + + with patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(16, 2)): + DatabricksConnectionManager._cache_dbr_capabilities(creds, self.HTTP_PATH) + + caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH) + assert caps is not None + assert caps.dbr_version == (16, 2) + assert caps.has_capability(DBRCapability.ICEBERG) From e6b678ad22e3f3d47efd7eb1e46158321c2c243d Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Sat, 25 Apr 2026 10:47:43 +0530 Subject: [PATCH 2/4] refactor: drop duplicate _try_cache_dbr_capabilities After the #1398 fix, _cache_dbr_capabilities and _try_cache_dbr_capabilities have identical bodies. Keep _cache_dbr_capabilities (absorbing the rationale for the None-guard into its docstring), update the lone caller in _create_fresh_connection, and drop the now-redundant TestTryCacheDbr class. Addresses review feedback on #1414. --- dbt/adapters/databricks/connections.py | 22 +++-------- tests/unit/test_connection_manager.py | 54 -------------------------- 2 files changed, 6 insertions(+), 70 deletions(-) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index f92aa03d6..c21215998 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -199,22 +199,12 @@ def _query_dbr_version( @classmethod def _cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None: - if http_path not in cls._dbr_capabilities_cache: - is_cluster = is_cluster_http_path(http_path, creds.cluster_id) - dbr_version = cls._query_dbr_version(creds, http_path) - if dbr_version is not None: - cls._dbr_capabilities_cache[http_path] = DBRCapabilities( - dbr_version=dbr_version, - is_sql_warehouse=not is_cluster, - ) + """Cache DBR capabilities for an http_path on first successful version query. - @classmethod - def _try_cache_dbr_capabilities(cls, creds: DatabricksCredentials, http_path: str) -> None: - """Like _cache_dbr_capabilities, but only writes to the cache when the version query - actually succeeds. This prevents a failed eager lookup (e.g. credentials_manager not yet - set, cluster still spinning up) from storing a None-version entry that the idempotency - guard in open() would later treat as authoritative, causing all capability checks to - return False. + Only writes when the version query succeeds: a failed lookup (credentials_manager + not yet set, cluster spinning up) must not store a None-version entry, since the + idempotency guard would then treat it as authoritative and disable every + capability check for the rest of the process. """ if http_path not in cls._dbr_capabilities_cache: is_cluster = is_cluster_http_path(http_path, creds.cluster_id) @@ -305,7 +295,7 @@ def _create_fresh_connection( conn.http_path = QueryConfigUtils.get_http_path(query_header_context, creds) conn.thread_identifier = cast(tuple[int, int], self.get_thread_identifier()) conn._query_header_context = query_header_context - self._try_cache_dbr_capabilities(creds, conn.http_path) + self._cache_dbr_capabilities(creds, conn.http_path) conn.capabilities = self._get_capabilities_for_http_path(conn.http_path) conn.handle = LazyHandle(self.open) diff --git a/tests/unit/test_connection_manager.py b/tests/unit/test_connection_manager.py index c1b878390..536d6e04b 100644 --- a/tests/unit/test_connection_manager.py +++ b/tests/unit/test_connection_manager.py @@ -96,60 +96,6 @@ def test_open_calls_is_cluster_http_path_for_warehouse( assert args[1] is True -class TestTryCacheDbr: - """Unit tests for _try_cache_dbr_capabilities.""" - - HTTP_PATH = "sql/protocolv1/o/1234567890123456/cluster-abc" - - @pytest.fixture(autouse=True) - def clear_cache(self): - DatabricksConnectionManager._dbr_capabilities_cache = {} - yield - DatabricksConnectionManager._dbr_capabilities_cache = {} - - @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=None) - def test_does_not_write_to_cache_when_version_is_none(self, mock_query): - """When the version query returns None, the cache must not be written. - - This prevents a poisoned None entry from blocking the authoritative write in open(). - """ - creds = Mock(spec=DatabricksCredentials) - creds.cluster_id = None - - DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH) - - assert self.HTTP_PATH not in DatabricksConnectionManager._dbr_capabilities_cache - mock_query.assert_called_once_with(creds, self.HTTP_PATH) - - @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4)) - def test_writes_to_cache_when_version_is_known(self, mock_query): - """When the version query succeeds, capabilities are cached correctly.""" - creds = Mock(spec=DatabricksCredentials) - creds.cluster_id = None - - DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH) - - mock_query.assert_called_once_with(creds, self.HTTP_PATH) - caps = DatabricksConnectionManager._dbr_capabilities_cache.get(self.HTTP_PATH) - assert caps is not None - assert caps.dbr_version == (15, 4) - assert not caps.is_sql_warehouse - assert caps.has_capability(DBRCapability.ICEBERG) - - @patch.object(DatabricksConnectionManager, "_query_dbr_version", return_value=(15, 4)) - def test_skips_write_when_already_cached(self, mock_query): - """If the path is already in cache, the version query is never made.""" - creds = Mock(spec=DatabricksCredentials) - creds.cluster_id = None - existing = DBRCapabilities(dbr_version=(14, 3), is_sql_warehouse=False) - DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] = existing - - DatabricksConnectionManager._try_cache_dbr_capabilities(creds, self.HTTP_PATH) - - mock_query.assert_not_called() - assert DatabricksConnectionManager._dbr_capabilities_cache[self.HTTP_PATH] is existing - - class TestCacheDbr: """Unit tests for _cache_dbr_capabilities.""" From 22808c617e16765b8d5816e0865d0b2f69897a5c Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Sat, 25 Apr 2026 11:09:56 +0530 Subject: [PATCH 3/4] refactor: make _get_capabilities_for_http_path a classmethod The cache it reads (_dbr_capabilities_cache) is a class attribute, and the sibling writer (_cache_dbr_capabilities) is already a classmethod. Promote the reader to match, and use it from open() so the same accessor is used across the class. Addresses review feedback on #1414. --- dbt/adapters/databricks/connections.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index c21215998..6ea5ac05f 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -167,8 +167,9 @@ def is_cluster(self) -> bool: databricks_conn = cast(DatabricksDBTConnection, conn) return is_cluster_http_path(databricks_conn.http_path, conn.credentials.cluster_id) - def _get_capabilities_for_http_path(self, http_path: str) -> DBRCapabilities: - return self._dbr_capabilities_cache.get(http_path, DBRCapabilities()) + @classmethod + def _get_capabilities_for_http_path(cls, http_path: str) -> DBRCapabilities: + return cls._dbr_capabilities_cache.get(http_path, DBRCapabilities()) @classmethod def _query_dbr_version( @@ -496,8 +497,8 @@ def connect() -> DatabricksHandle: if conn: databricks_connection.session_id = conn.session_id cls._cache_dbr_capabilities(creds, databricks_connection.http_path) - databricks_connection.capabilities = cls._dbr_capabilities_cache.get( - databricks_connection.http_path, DBRCapabilities() + databricks_connection.capabilities = cls._get_capabilities_for_http_path( + databricks_connection.http_path ) return conn else: From 1b2cd87e1c4cd0d40ec68176cb78b026813404de Mon Sep 17 00:00:00 2001 From: Shubham Dhal Date: Mon, 27 Apr 2026 12:27:06 +0530 Subject: [PATCH 4/4] chore: log DBR version query failures at debug level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review comment on #1414: replace silent `except: pass` in `_query_dbr_version` with a debug log capturing the http_path and exception. Debug level is intentional — `_create_fresh_connection` calls this before `credentials_manager` is set, so the first attempt per fresh connection always raises and is recovered by the subsequent `open()` call. Warning would flood the default log on every model run. Co-authored-by: Isaac --- dbt/adapters/databricks/connections.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/databricks/connections.py b/dbt/adapters/databricks/connections.py index 45063c642..56ba72fc2 100644 --- a/dbt/adapters/databricks/connections.py +++ b/dbt/adapters/databricks/connections.py @@ -194,8 +194,8 @@ def _query_dbr_version( result = cursor.fetchone() if result: return SqlUtils.extract_dbr_version(result[1]) - except Exception: - pass + except Exception as e: + logger.debug(f"Failed to query DBR version for http_path={http_path}: {e}") return None