|
1 | 1 | package com.databricks.jdbc.api.impl.arrow; |
2 | 2 |
|
| 3 | +import static com.databricks.jdbc.common.util.DatabricksThriftUtil.createExternalLink; |
3 | 4 | import static com.databricks.jdbc.common.util.DatabricksThriftUtil.getColumnInfoFromTColumnDesc; |
4 | 5 |
|
5 | 6 | import com.databricks.jdbc.api.impl.ComplexDataTypeParser; |
6 | 7 | import com.databricks.jdbc.api.impl.IExecutionResult; |
| 8 | +import com.databricks.jdbc.api.internal.IDatabricksConnectionContext; |
7 | 9 | import com.databricks.jdbc.api.internal.IDatabricksSession; |
8 | 10 | import com.databricks.jdbc.api.internal.IDatabricksStatementInternal; |
9 | 11 | import com.databricks.jdbc.common.CompressionCodec; |
|
16 | 18 | import com.databricks.jdbc.model.client.thrift.generated.TColumnDesc; |
17 | 19 | import com.databricks.jdbc.model.client.thrift.generated.TFetchResultsResp; |
18 | 20 | import com.databricks.jdbc.model.client.thrift.generated.TGetResultSetMetadataResp; |
| 21 | +import com.databricks.jdbc.model.client.thrift.generated.TSparkArrowResultLink; |
| 22 | +import com.databricks.jdbc.model.core.ChunkLinkFetchResult; |
19 | 23 | import com.databricks.jdbc.model.core.ColumnInfo; |
20 | 24 | import com.databricks.jdbc.model.core.ColumnInfoTypeName; |
| 25 | +import com.databricks.jdbc.model.core.ExternalLink; |
21 | 26 | import com.databricks.jdbc.model.core.ResultData; |
22 | 27 | import com.databricks.jdbc.model.core.ResultManifest; |
23 | 28 | import com.google.common.annotations.VisibleForTesting; |
24 | 29 | import java.util.ArrayList; |
| 30 | +import java.util.Collection; |
25 | 31 | import java.util.List; |
26 | 32 |
|
27 | 33 | /** Result container for Arrow-based query results. */ |
@@ -69,20 +75,71 @@ public ArrowStreamResult( |
69 | 75 | "Creating ArrowStreamResult with remote links for statementId: {}", |
70 | 76 | statementId.toSQLExecStatementId()); |
71 | 77 | this.chunkProvider = |
72 | | - new RemoteChunkProvider( |
73 | | - statementId, |
74 | | - resultManifest, |
75 | | - resultData, |
76 | | - session, |
77 | | - httpClient, |
78 | | - session.getConnectionContext().getCloudFetchThreadPoolSize()); |
| 78 | + createRemoteChunkProvider(statementId, resultManifest, resultData, session, httpClient); |
79 | 79 | } |
80 | 80 | this.columnInfos = |
81 | 81 | resultManifest.getSchema().getColumnCount() == 0 |
82 | 82 | ? new ArrayList<>() |
83 | 83 | : new ArrayList<>(resultManifest.getSchema().getColumns()); |
84 | 84 | } |
85 | 85 |
|
| 86 | + /** |
| 87 | + * Creates the appropriate remote chunk provider based on configuration. |
| 88 | + * |
| 89 | + * @param statementId The statement ID |
| 90 | + * @param resultManifest The result manifest containing chunk metadata |
| 91 | + * @param resultData The result data containing initial external links |
| 92 | + * @param session The session for fetching additional chunks |
| 93 | + * @param httpClient The HTTP client for downloading chunk data |
| 94 | + * @return A ChunkProvider instance |
| 95 | + */ |
| 96 | + private static ChunkProvider createRemoteChunkProvider( |
| 97 | + StatementId statementId, |
| 98 | + ResultManifest resultManifest, |
| 99 | + ResultData resultData, |
| 100 | + IDatabricksSession session, |
| 101 | + IDatabricksHttpClient httpClient) |
| 102 | + throws DatabricksSQLException { |
| 103 | + |
| 104 | + IDatabricksConnectionContext connectionContext = session.getConnectionContext(); |
| 105 | + |
| 106 | + if (connectionContext.isStreamingChunkProviderEnabled()) { |
| 107 | + LOGGER.info( |
| 108 | + "Using StreamingChunkProvider for statementId: {}", statementId.toSQLExecStatementId()); |
| 109 | + |
| 110 | + ChunkLinkFetcher linkFetcher = new SeaChunkLinkFetcher(session, statementId); |
| 111 | + CompressionCodec compressionCodec = resultManifest.getResultCompression(); |
| 112 | + int maxChunksInMemory = connectionContext.getCloudFetchThreadPoolSize(); |
| 113 | + int linkPrefetchWindow = connectionContext.getLinkPrefetchWindow(); |
| 114 | + int chunkReadyTimeoutSeconds = connectionContext.getChunkReadyTimeoutSeconds(); |
| 115 | + double cloudFetchSpeedThreshold = connectionContext.getCloudFetchSpeedThreshold(); |
| 116 | + |
| 117 | + // Convert ExternalLinks to ChunkLinkFetchResult for the provider |
| 118 | + ChunkLinkFetchResult initialLinks = |
| 119 | + convertToChunkLinkFetchResult(resultData.getExternalLinks()); |
| 120 | + |
| 121 | + return new StreamingChunkProvider( |
| 122 | + linkFetcher, |
| 123 | + httpClient, |
| 124 | + compressionCodec, |
| 125 | + statementId, |
| 126 | + maxChunksInMemory, |
| 127 | + linkPrefetchWindow, |
| 128 | + chunkReadyTimeoutSeconds, |
| 129 | + cloudFetchSpeedThreshold, |
| 130 | + initialLinks); |
| 131 | + } else { |
| 132 | + // Use the original RemoteChunkProvider |
| 133 | + return new RemoteChunkProvider( |
| 134 | + statementId, |
| 135 | + resultManifest, |
| 136 | + resultData, |
| 137 | + session, |
| 138 | + httpClient, |
| 139 | + connectionContext.getCloudFetchThreadPoolSize()); |
| 140 | + } |
| 141 | + } |
| 142 | + |
86 | 143 | public ArrowStreamResult( |
87 | 144 | TFetchResultsResp resultsResp, |
88 | 145 | boolean isInlineArrow, |
@@ -110,16 +167,63 @@ public ArrowStreamResult( |
110 | 167 | if (isInlineArrow) { |
111 | 168 | this.chunkProvider = new InlineChunkProvider(resultsResp, parentStatement, session); |
112 | 169 | } else { |
113 | | - CompressionCodec compressionCodec = |
114 | | - CompressionCodec.getCompressionMapping(resultsResp.getResultSetMetadata()); |
115 | 170 | this.chunkProvider = |
116 | | - new RemoteChunkProvider( |
117 | | - parentStatement, |
118 | | - resultsResp, |
119 | | - session, |
120 | | - httpClient, |
121 | | - session.getConnectionContext().getCloudFetchThreadPoolSize(), |
122 | | - compressionCodec); |
| 171 | + createThriftRemoteChunkProvider(resultsResp, parentStatement, session, httpClient); |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + /** |
| 176 | + * Creates the appropriate remote chunk provider for Thrift based on configuration. |
| 177 | + * |
| 178 | + * @param resultsResp The Thrift fetch results response |
| 179 | + * @param parentStatement The parent statement for fetching additional chunks |
| 180 | + * @param session The session for fetching additional chunks |
| 181 | + * @param httpClient The HTTP client for downloading chunk data |
| 182 | + * @return A ChunkProvider instance |
| 183 | + */ |
| 184 | + private static ChunkProvider createThriftRemoteChunkProvider( |
| 185 | + TFetchResultsResp resultsResp, |
| 186 | + IDatabricksStatementInternal parentStatement, |
| 187 | + IDatabricksSession session, |
| 188 | + IDatabricksHttpClient httpClient) |
| 189 | + throws DatabricksSQLException { |
| 190 | + |
| 191 | + IDatabricksConnectionContext connectionContext = session.getConnectionContext(); |
| 192 | + CompressionCodec compressionCodec = |
| 193 | + CompressionCodec.getCompressionMapping(resultsResp.getResultSetMetadata()); |
| 194 | + |
| 195 | + if (connectionContext.isStreamingChunkProviderEnabled()) { |
| 196 | + StatementId statementId = parentStatement.getStatementId(); |
| 197 | + LOGGER.info("Using StreamingChunkProvider for Thrift statementId: {}", statementId); |
| 198 | + |
| 199 | + ChunkLinkFetcher linkFetcher = new ThriftChunkLinkFetcher(session, statementId); |
| 200 | + int maxChunksInMemory = connectionContext.getCloudFetchThreadPoolSize(); |
| 201 | + int linkPrefetchWindow = connectionContext.getLinkPrefetchWindow(); |
| 202 | + int chunkReadyTimeoutSeconds = connectionContext.getChunkReadyTimeoutSeconds(); |
| 203 | + double cloudFetchSpeedThreshold = connectionContext.getCloudFetchSpeedThreshold(); |
| 204 | + |
| 205 | + // Convert initial Thrift links to ChunkLinkFetchResult |
| 206 | + ChunkLinkFetchResult initialLinks = convertThriftLinksToChunkLinkFetchResult(resultsResp); |
| 207 | + |
| 208 | + return new StreamingChunkProvider( |
| 209 | + linkFetcher, |
| 210 | + httpClient, |
| 211 | + compressionCodec, |
| 212 | + statementId, |
| 213 | + maxChunksInMemory, |
| 214 | + linkPrefetchWindow, |
| 215 | + chunkReadyTimeoutSeconds, |
| 216 | + cloudFetchSpeedThreshold, |
| 217 | + initialLinks); |
| 218 | + } else { |
| 219 | + // Use the original RemoteChunkProvider |
| 220 | + return new RemoteChunkProvider( |
| 221 | + parentStatement, |
| 222 | + resultsResp, |
| 223 | + session, |
| 224 | + httpClient, |
| 225 | + connectionContext.getCloudFetchThreadPoolSize(), |
| 226 | + compressionCodec); |
123 | 227 | } |
124 | 228 | } |
125 | 229 |
|
@@ -268,4 +372,79 @@ private void setColumnInfo(TGetResultSetMetadataResp resultManifest) { |
268 | 372 | columnInfos.add(getColumnInfoFromTColumnDesc(tColumnDesc)); |
269 | 373 | } |
270 | 374 | } |
| 375 | + |
| 376 | + /** |
| 377 | + * Converts a collection of ExternalLinks to a ChunkLinkFetchResult. |
| 378 | + * |
| 379 | + * @param externalLinks The external links to convert, may be null |
| 380 | + * @return A ChunkLinkFetchResult, or null if input is null or empty |
| 381 | + */ |
| 382 | + private static ChunkLinkFetchResult convertToChunkLinkFetchResult( |
| 383 | + Collection<ExternalLink> externalLinks) { |
| 384 | + if (externalLinks == null || externalLinks.isEmpty()) { |
| 385 | + return null; |
| 386 | + } |
| 387 | + |
| 388 | + List<ExternalLink> linkList = |
| 389 | + externalLinks instanceof List |
| 390 | + ? (List<ExternalLink>) externalLinks |
| 391 | + : new ArrayList<>(externalLinks); |
| 392 | + |
| 393 | + // Derive hasMore and nextRowOffset from last link (SEA style) |
| 394 | + ExternalLink lastLink = linkList.get(linkList.size() - 1); |
| 395 | + boolean hasMore = lastLink.getNextChunkIndex() != null; |
| 396 | + long nextFetchIndex = hasMore ? lastLink.getNextChunkIndex() : -1; |
| 397 | + long nextRowOffset = lastLink.getRowOffset() + lastLink.getRowCount(); |
| 398 | + |
| 399 | + return ChunkLinkFetchResult.of(linkList, hasMore, nextFetchIndex, nextRowOffset); |
| 400 | + } |
| 401 | + |
| 402 | + /** |
| 403 | + * Converts Thrift result links to a ChunkLinkFetchResult. |
| 404 | + * |
| 405 | + * <p>This method converts TSparkArrowResultLink from the Thrift response to the unified |
| 406 | + * ChunkLinkFetchResult format used by StreamingChunkProvider. |
| 407 | + * |
| 408 | + * @param resultsResp The Thrift fetch results response containing initial links |
| 409 | + * @return A ChunkLinkFetchResult, or null if no links |
| 410 | + */ |
| 411 | + private static ChunkLinkFetchResult convertThriftLinksToChunkLinkFetchResult( |
| 412 | + TFetchResultsResp resultsResp) { |
| 413 | + List<TSparkArrowResultLink> resultLinks = resultsResp.getResults().getResultLinks(); |
| 414 | + if (resultLinks == null || resultLinks.isEmpty()) { |
| 415 | + return null; |
| 416 | + } |
| 417 | + |
| 418 | + List<ExternalLink> chunkLinks = new ArrayList<>(); |
| 419 | + int lastIndex = resultLinks.size() - 1; |
| 420 | + boolean hasMoreRows = resultsResp.hasMoreRows; |
| 421 | + |
| 422 | + for (int linkIndex = 0; linkIndex < resultLinks.size(); linkIndex++) { |
| 423 | + TSparkArrowResultLink thriftLink = resultLinks.get(linkIndex); |
| 424 | + |
| 425 | + // Convert Thrift link to ExternalLink (sets chunkIndex, rowOffset, rowCount, etc.) |
| 426 | + ExternalLink externalLink = createExternalLink(thriftLink, linkIndex); |
| 427 | + |
| 428 | + // For the last link, set nextChunkIndex based on hasMoreRows |
| 429 | + if (linkIndex == lastIndex) { |
| 430 | + if (hasMoreRows) { |
| 431 | + // More chunks available - next fetch should start from lastIndex + 1 |
| 432 | + externalLink.setNextChunkIndex((long) linkIndex + 1); |
| 433 | + } |
| 434 | + // If hasMoreRows is false, nextChunkIndex remains null (end of stream) |
| 435 | + } else { |
| 436 | + // Not the last link - next chunk follows immediately |
| 437 | + externalLink.setNextChunkIndex((long) linkIndex + 1); |
| 438 | + } |
| 439 | + |
| 440 | + chunkLinks.add(externalLink); |
| 441 | + } |
| 442 | + |
| 443 | + // Calculate next fetch positions from last link |
| 444 | + TSparkArrowResultLink lastThriftLink = resultLinks.get(lastIndex); |
| 445 | + long nextFetchIndex = hasMoreRows ? lastIndex + 1 : -1; |
| 446 | + long nextRowOffset = lastThriftLink.getStartRowOffset() + lastThriftLink.getRowCount(); |
| 447 | + |
| 448 | + return ChunkLinkFetchResult.of(chunkLinks, hasMoreRows, nextFetchIndex, nextRowOffset); |
| 449 | + } |
271 | 450 | } |
0 commit comments