Address review feedback on bounded SEA API PR

gopalldb · gopalldb · commit 95fcfc9a69a8 · 2026-05-27T14:47:19.000+05:30
P0-1: Remove redundant chunk.setStatus(DOWNLOAD_FAILED) in inner catch
— defer entirely to finally block. Fixes StreamingChunkDownloadTaskTest.

P0-2: Add NEXT_CHANGELOG.md entry under ### Added for UseBoundedSeaApi.

P1-1: Call triggerDownloads() after reconciliation creates new chunks
from refresh response — prevents newly-discovered chunks sitting PENDING.

P1-2/P1-3: Un-gated changes (new chunk creation, EOS from refresh,
triggerDownloads) are intentional parity fixes for all
EnableStreamingChunkProvider=1 users. EnableStreamingChunkProvider
defaults to off, so default users are unaffected.

P1-4: Revert RuntimeException from inner catch — DatabricksError is
caught by outer catch(Throwable) and fails immediately (no retry),
matching ChunkDownloadTask behavior exactly. NPE/ISE won't be retried.

P2-1: Always send row_offset (even 0 for chunk 0) when bounded SEA
enabled — explicit is safer than relying on server default.

P2-3: Update nextLinkFetchIndex after reconciliation to avoid prefetch
thread re-fetching chunks already discovered via refresh.

P2-5: Add "Requires server support" to connection property help text.

Co-authored-by: Isaac
Signed-off-by: Gopal Lal &lt;gopal.lal@databricks.com&gt;
diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md
@@ -3,6 +3,11 @@
 ## [Unreleased]
 
 ### Added
+- Added result set heartbeat / keep-alive to prevent server-side result expiry during slow consumption. When enabled via `EnableHeartbeat=1`, the driver periodically polls `GetStatementStatus` (SEA) or `GetOperationStatus` (Thrift) to keep the operation alive while the client reads results. Configurable interval via `HeartbeatIntervalSeconds` (default 60s). Heartbeat automatically stops when results are fully consumed, ResultSet is closed, or the server returns a terminal state. Disabled by default due to cost implications (heartbeats keep the warehouse running).
+- Metadata operations now use SQL SHOW commands for both Thrift and SEA backends,
+  ensuring consistent behavior for SQL warehouses regardless of underlying
+  protocol. To revert to native Thrift metadata RPCs, set `UseQueryForMetadata=0`.
+- Added `UseBoundedSeaApi` connection property (default `0`/off). When enabled, the driver uses the bounded SEA API contract for CloudFetch: sends `row_offset` on GetResultData requests and uses `next_chunk_index` for chunk discovery instead of `total_chunk_count`. Requires server support.
 
 ### Updated
 
diff --git a/src/main/java/com/databricks/jdbc/api/impl/arrow/StreamingChunkDownloadTask.java b/src/main/java/com/databricks/jdbc/api/impl/arrow/StreamingChunkDownloadTask.java
@@ -83,15 +83,15 @@ public Void call() throws DatabricksSQLException {
               taskTotalMs,
               retries);
 
-        } catch (IOException | SQLException | RuntimeException e) {
+        } catch (IOException | SQLException e) {
           retries++;
           if (retries >= MAX_RETRIES) {
             LOGGER.error(
                 "Failed to download chunk {} after {} attempts: {}",
                 chunk.getChunkIndex(),
                 MAX_RETRIES,
                 e.getMessage());
-            chunk.setStatus(ChunkStatus.DOWNLOAD_FAILED);
+            // Status set to DOWNLOAD_FAILED in the finally block
             throw new DatabricksSQLException(
                 String.format(
                     "Failed to download chunk %d after %d attempts",
diff --git a/src/main/java/com/databricks/jdbc/api/impl/arrow/StreamingChunkProvider.java b/src/main/java/com/databricks/jdbc/api/impl/arrow/StreamingChunkProvider.java
@@ -611,9 +611,9 @@ ExternalLink getRefreshedLink(long chunkIndex, long rowOffset) throws SQLExcepti
             c.setChunkLink(link);
           }
         } else {
-          // New chunk from server not yet in our map — create it.
-          // This handles the bounded SEA case where the refresh response
-          // may include chunks beyond our current highestKnownChunkIndex.
+          // Server returned a chunk not yet in our map — create it.
+          // Handles cases where refresh response includes chunks beyond
+          // our current highestKnownChunkIndex.
           try {
             createChunkFromLink(link);
           } catch (Exception e) {
@@ -625,11 +625,18 @@ ExternalLink getRefreshedLink(long chunkIndex, long rowOffset) throws SQLExcepti
         }
       }
 
-      // Update end-of-stream from refresh response
+      // Update end-of-stream and prefetch index from refresh response
       if (!result.hasMore()) {
         endOfStreamReached = true;
+      } else if (result.getNextFetchIndex() > nextLinkFetchIndex) {
+        // Avoid re-fetching chunks that the refresh already discovered
+        nextLinkFetchIndex = result.getNextFetchIndex();
+        nextRowOffsetToFetch = result.getNextRowOffset();
       }
 
+      // Trigger downloads for any newly-created chunks
+      triggerDownloads();
+
       // Check if our target chunk was refreshed by the batch
       targetChunk = chunks.get(chunkIndex);
       if (targetChunk != null && !targetChunk.isChunkLinkInvalid()) {
diff --git a/src/main/java/com/databricks/jdbc/common/DatabricksJdbcUrlParams.java b/src/main/java/com/databricks/jdbc/common/DatabricksJdbcUrlParams.java
@@ -206,7 +206,7 @@ public enum DatabricksJdbcUrlParams {
       "1"),
   USE_BOUNDED_SEA_API(
       "UseBoundedSeaApi",
-      "Use bounded SEA API for CloudFetch: send row_offset on GetResultData, force StreamingChunkProvider, stop relying on total_chunk_count",
+      "Use bounded SEA API for CloudFetch: send row_offset on GetResultData, force StreamingChunkProvider, stop relying on total_chunk_count. Requires server support.",
       "0"),
   DISABLE_OAUTH_REFRESH_TOKEN(
       "DisableOauthRefreshToken",
diff --git a/src/main/java/com/databricks/jdbc/dbclient/impl/sqlexec/DatabricksSdkClient.java b/src/main/java/com/databricks/jdbc/dbclient/impl/sqlexec/DatabricksSdkClient.java
@@ -529,8 +529,8 @@ public ChunkLinkFetchResult getResultChunks(
     GetStatementResultChunkNRequest request =
         new GetStatementResultChunkNRequest().setStatementId(statementId).setChunkIndex(chunkIndex);
     String path = String.format(RESULT_CHUNK_PATH, statementId, chunkIndex);
-    // Bounded SEA API: send row_offset to support future >100GB results and cluster-side fetch
-    if (connectionContext.isBoundedSeaApiEnabled() && chunkStartRowOffset > 0) {
+    // Bounded SEA API: always send row_offset (even 0 for chunk 0)
+    if (connectionContext.isBoundedSeaApiEnabled()) {
       path = path + "?row_offset=" + chunkStartRowOffset;
     }
     try {