-
Notifications
You must be signed in to change notification settings - Fork 40
Implement lazy loading for inline Arrow results #1029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
821fe48
a00b042
84f6788
155cf8c
c2230c5
4e807a6
1df5d7a
e061378
512e4e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -143,13 +143,11 @@ private static ChunkProvider createRemoteChunkProvider( | |
|
|
||
| public ArrowStreamResult( | ||
| TFetchResultsResp resultsResp, | ||
| boolean isInlineArrow, | ||
| IDatabricksStatementInternal parentStatementId, | ||
| IDatabricksSession session) | ||
| throws DatabricksSQLException { | ||
| this( | ||
| resultsResp, | ||
| isInlineArrow, | ||
| parentStatementId, | ||
| session, | ||
| DatabricksHttpClientFactory.getInstance().getClient(session.getConnectionContext())); | ||
|
|
@@ -158,19 +156,14 @@ public ArrowStreamResult( | |
| @VisibleForTesting | ||
| ArrowStreamResult( | ||
| TFetchResultsResp resultsResp, | ||
| boolean isInlineArrow, | ||
| IDatabricksStatementInternal parentStatement, | ||
| IDatabricksSession session, | ||
| IDatabricksHttpClient httpClient) | ||
| throws DatabricksSQLException { | ||
| this.session = session; | ||
| setColumnInfo(resultsResp.getResultSetMetadata()); | ||
| if (isInlineArrow) { | ||
| this.chunkProvider = new InlineChunkProvider(resultsResp, parentStatement, session); | ||
| } else { | ||
| this.chunkProvider = | ||
| createThriftRemoteChunkProvider(resultsResp, parentStatement, session, httpClient); | ||
| } | ||
| this.chunkProvider = | ||
| createThriftRemoteChunkProvider(resultsResp, parentStatement, session, httpClient); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -238,48 +231,15 @@ public List<String> getArrowMetadata() throws DatabricksSQLException { | |
| /** {@inheritDoc} */ | ||
| @Override | ||
| public Object getObject(int columnIndex) throws DatabricksSQLException { | ||
| ColumnInfoTypeName requiredType = columnInfos.get(columnIndex).getTypeName(); | ||
| ColumnInfo columnInfo = columnInfos.get(columnIndex); | ||
| ColumnInfoTypeName requiredType = columnInfo.getTypeName(); | ||
| String arrowMetadata = chunkIterator.getType(columnIndex); | ||
| if (arrowMetadata == null) { | ||
| arrowMetadata = columnInfos.get(columnIndex).getTypeText(); | ||
| arrowMetadata = columnInfo.getTypeText(); | ||
| } | ||
|
|
||
| // Handle complex type conversion when complex datatype support is disabled | ||
| boolean isComplexDatatypeSupportEnabled = | ||
| this.session.getConnectionContext().isComplexDatatypeSupportEnabled(); | ||
| boolean isGeoSpatialSupportEnabled = | ||
| this.session.getConnectionContext().isGeoSpatialSupportEnabled(); | ||
|
|
||
| // Check if we need to convert geospatial types to string when geospatial support is disabled | ||
| // This check must come before the general complex type check | ||
| if (!isGeoSpatialSupportEnabled && isGeospatialType(requiredType)) { | ||
| LOGGER.debug("Geospatial support is disabled, converting {} to STRING", requiredType); | ||
|
|
||
| Object result = | ||
| chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, ColumnInfoTypeName.STRING, "STRING", columnInfos.get(columnIndex)); | ||
| if (result == null) { | ||
| return null; | ||
| } | ||
| // Return raw string for geospatial types when support is disabled | ||
| return result; | ||
| } | ||
|
|
||
| if (!isComplexDatatypeSupportEnabled && isComplexType(requiredType)) { | ||
| LOGGER.debug("Complex datatype support is disabled, converting complex type to STRING"); | ||
|
|
||
| Object result = | ||
| chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, ColumnInfoTypeName.STRING, "STRING", columnInfos.get(columnIndex)); | ||
| if (result == null) { | ||
| return null; | ||
| } | ||
| ComplexDataTypeParser parser = new ComplexDataTypeParser(); | ||
| return parser.formatComplexTypeString(result.toString(), requiredType.name(), arrowMetadata); | ||
| } | ||
|
|
||
| return chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, requiredType, arrowMetadata, columnInfos.get(columnIndex)); | ||
| return getObjectWithComplexTypeHandling( | ||
| session, chunkIterator, columnIndex, requiredType, arrowMetadata, columnInfo); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -384,6 +344,66 @@ private void setColumnInfo(TGetResultSetMetadataResp resultManifest) { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Helper method to handle complex type and geospatial type conversion when support is disabled. | ||
| * | ||
| * <p>This method is also used by LazyThriftInlineArrowResult for consistent type handling. | ||
| * | ||
| * @param session The databricks session | ||
| * @param chunkIterator The chunk iterator | ||
| * @param columnIndex The column index | ||
| * @param requiredType The required column type | ||
| * @param arrowMetadata The arrow metadata | ||
| * @param columnInfo The column info | ||
| * @return The object value (converted if complex/geospatial type and support disabled) | ||
| * @throws DatabricksSQLException if an error occurs | ||
| */ | ||
| protected static Object getObjectWithComplexTypeHandling( | ||
| IDatabricksSession session, | ||
| ArrowResultChunkIterator chunkIterator, | ||
| int columnIndex, | ||
| ColumnInfoTypeName requiredType, | ||
| String arrowMetadata, | ||
| ColumnInfo columnInfo) | ||
| throws DatabricksSQLException { | ||
| boolean isComplexDatatypeSupportEnabled = | ||
| session.getConnectionContext().isComplexDatatypeSupportEnabled(); | ||
| boolean isGeoSpatialSupportEnabled = | ||
| session.getConnectionContext().isGeoSpatialSupportEnabled(); | ||
|
|
||
| // Check if we need to convert geospatial types to string when geospatial support is disabled | ||
| // This check must come before the general complex type check | ||
| if (!isGeoSpatialSupportEnabled && isGeospatialType(requiredType)) { | ||
| LOGGER.debug("Geospatial support is disabled, converting {} to STRING", requiredType); | ||
|
|
||
| Object result = | ||
| chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, ColumnInfoTypeName.STRING, "STRING", columnInfo); | ||
| if (result == null) { | ||
| return null; | ||
| } | ||
| // Return raw string for geospatial types when support is disabled | ||
| return result; | ||
| } | ||
|
|
||
| if (!isComplexDatatypeSupportEnabled && isComplexType(requiredType)) { | ||
| LOGGER.debug("Complex datatype support is disabled, converting complex type to STRING"); | ||
|
|
||
| Object result = | ||
| chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, ColumnInfoTypeName.STRING, "STRING", columnInfo); | ||
| if (result == null) { | ||
| return null; | ||
| } | ||
| ComplexDataTypeParser parser = new ComplexDataTypeParser(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need to create this for every getObject call?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, this is just existing code from method To answer the question: No we shouldn't create such objects in hot paths like getObjects. But I don't want to change the scope of this PR. Will create a separate change/ |
||
|
|
||
| return parser.formatComplexTypeString(result.toString(), requiredType.name(), arrowMetadata); | ||
| } | ||
|
|
||
| return chunkIterator.getColumnObjectAtCurrentRow( | ||
| columnIndex, requiredType, arrowMetadata, columnInfo); | ||
| } | ||
|
|
||
| /** | ||
| * Converts a collection of ExternalLinks to a ChunkLinkFetchResult. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sreekanth is making changes in similar code, make sure that you don't override his changes for geospatial
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ack.