databricks
diff --git a/‎.github/workflows/runJdbcComparator.yml‎
Lines changed: 5 additions & 4 deletions b/‎.github/workflows/runJdbcComparator.yml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎NEXT_CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎NEXT_CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java‎
Lines changed: 10 additions & 0 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksConnectionContext.java‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksParameterMetaData.java‎
Lines changed: 1 addition & 1 deletion b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksParameterMetaData.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksPreparedStatement.java‎
Lines changed: 134 additions & 0 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksPreparedStatement.java‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSet.java‎
Lines changed: 28 additions & 0 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSet.java‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSetMetaData.java‎
Lines changed: 2 additions & 2 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksResultSetMetaData.java‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java‎
Lines changed: 8 additions & 0 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/DatabricksStatement.java‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/main/java/com/databricks/jdbc/api/impl/ExecutionResultFactory.java‎
Lines changed: 1 addition & 3 deletions b/‎src/main/java/com/databricks/jdbc/api/impl/ExecutionResultFactory.java‎
Lines changed: 1 addition & 3 deletions
@@ -26,10 +26,11 @@ jobs:
       - name: Merge main into jdbc-comparator
         run: |
           git checkout jdbc-comparator
-          git merge main --no-edit || {
-            echo "Merge conflict occurred"
-            git merge --abort
-            exit 1
+          git merge main --allow-unrelated-histories --no-edit -X theirs || {
+            echo "Force merging by accepting all changes from main"
+            git checkout --theirs .
+            git add .
+            git commit --no-edit
           }
 
       - name: Set up JDK 11
 
@@ -9,12 +9,16 @@
 - Added a client property `enableVolumeOperations` to enable  GET/PUT/REMOVE volume operations on a stream. For backward compatibility, allowedVolumeIngestionPaths can also be used for REMOVE operation.
 - Support for fetching schemas across all catalogs (when catalog is specified as null or a wildcard) in `DatabaseMetaData#getSchemas` API in SQL Execution mode.
 - **Configurable SQL validation in isValid()**: Added `EnableSQLValidationForIsValid` connection property to control whether `isValid()` method executes an actual SQL query for server-side validation. Default value is 0.
+- Implement multi-row INSERT batching optimization for prepared statements to improve performance when executing large batches of INSERT operations.
+- Implement lazy/incremental fetching for columnar results when using Databricks JDBC in Thrift mode without Arrow support. The change modifies the behavior from buffering entire result sets in memory to maintaining only a limited number of rows at a time, reducing peak heap memory usage and preventing OutOfMemory errors.
 
 ### Updated
 - Databricks SDK dependency upgraded to latest version 0.60.0
 
 ### Fixed
+- Integrated Azure U2M flow into driver for improved stability.
 - Fixed `ResultSet.getString` for Boolean columns in Metadata result set.
+- Fixed volume operations not completing unless the ResultSet is fully iterated.
 - Fixed `connection.getMetadata().getColumns()` to return the correct SQL data type code for complex type columns.
 ---
 *Note: When making changes, please add your change under the appropriate section with a brief description.* 
@@ -215,6 +215,11 @@ public String getHostUrl() throws DatabricksParsingException {
     }
   }
 
+  @Override
+  public String getHost() {
+    return this.host;
+  }
+
   @Override
   public IDatabricksComputeResource getComputeResource() {
     return computeResource;
@@ -1025,4 +1030,9 @@ public int getTelemetryFlushIntervalInMilliseconds() {
     return Math.max(
         1000, Integer.parseInt(getParameter(DatabricksJdbcUrlParams.TELEMETRY_FLUSH_INTERVAL)));
   }
+
+  @Override
+  public boolean isBatchedInsertsEnabled() {
+    return getParameter(DatabricksJdbcUrlParams.ENABLE_BATCHED_INSERTS).equals("1");
+  }
 }
@@ -6,7 +6,7 @@
 import com.databricks.jdbc.common.util.WrapperUtil;
 import com.databricks.jdbc.log.JdbcLogger;
 import com.databricks.jdbc.log.JdbcLoggerFactory;
-import com.databricks.sdk.service.sql.ColumnInfoTypeName;
+import com.databricks.jdbc.model.core.ColumnInfoTypeName;
 import java.sql.ParameterMetaData;
 import java.sql.SQLException;
 import java.util.HashMap;
 
@@ -7,8 +7,10 @@
 import static com.databricks.jdbc.common.util.SQLInterpolator.surroundPlaceholdersWithQuotes;
 import static com.databricks.jdbc.common.util.ValidationUtil.throwErrorIfNull;
 
+import com.databricks.jdbc.common.DatabricksJdbcConstants;
 import com.databricks.jdbc.common.StatementType;
 import com.databricks.jdbc.common.util.DatabricksTypeUtil;
+import com.databricks.jdbc.common.util.InsertStatementParser;
 import com.databricks.jdbc.exception.*;
 import com.databricks.jdbc.log.JdbcLogger;
 import com.databricks.jdbc.log.JdbcLoggerFactory;
@@ -88,6 +90,138 @@ public int[] executeBatch() throws DatabricksBatchUpdateException {
   @Override
   public long[] executeLargeBatch() throws DatabricksBatchUpdateException {
     LOGGER.debug("public long executeLargeBatch()");
+
+    if (databricksBatchParameterMetaData.isEmpty()) {
+      return new long[0];
+    }
+
+    // Try to optimize INSERT statements with multi-row batching
+    if (canUseBatchedInsert()) {
+      return executeBatchedInsert();
+    } else {
+      // Fall back to individual execution for non-INSERT or incompatible statements
+      return executeIndividualStatements();
+    }
+  }
+
+  /**
+   * Checks if the current batch can be optimized using multi-row INSERT. All statements must be
+   * compatible INSERT operations.
+   *
+   * <p>A batch is eligible for multi-row INSERT optimization when:
+   *
+   * <ul>
+   *   <li>The EnableBatchedInserts connection property is enabled (default: true)
+   *   <li>The SQL statement is an INSERT operation
+   *   <li>The INSERT can be parsed successfully (has table name and column list)
+   *   <li>The batch contains parameter sets for multiple rows
+   * </ul>
+   *
+   * <p>Compatible INSERT operations target the same table with the same columns in the same order.
+   * When compatible, multiple individual INSERTs like:
+   *
+   * <pre>
+   *   INSERT INTO users (id, name) VALUES (?, ?)  -- with parameters [1, "Alice"]
+   *   INSERT INTO users (id, name) VALUES (?, ?)  -- with parameters [2, "Bob"]
+   * </pre>
+   *
+   * Are combined into a single multi-row INSERT:
+   *
+   * <pre>
+   *   INSERT INTO users (id, name) VALUES (?, ?), (?, ?)  -- with parameters [1, "Alice", 2, "Bob"]
+   * </pre>
+   */
+  private boolean canUseBatchedInsert() {
+    // Check if batched inserts are enabled via connection property
+    if (!connection.getConnectionContext().isBatchedInsertsEnabled()) {
+      return false;
+    }
+
+    // Use strict exception-based parsing for better error handling
+    try {
+      InsertStatementParser.parseInsertStrict(sql);
+      return !databricksBatchParameterMetaData.isEmpty();
+    } catch (Exception e) {
+      // Not a valid INSERT statement suitable for batching
+      return false;
+    }
+  }
+
+  /** Executes the batch as a single multi-row INSERT statement. */
+  private long[] executeBatchedInsert() throws DatabricksBatchUpdateException {
+    LOGGER.debug("Executing batched INSERT with {} rows", databricksBatchParameterMetaData.size());
+
+    try {
+      InsertStatementParser.InsertInfo insertInfo = InsertStatementParser.parseInsertStrict(sql);
+
+      // Calculate how many rows we can fit in one chunk based on parameter limit
+      int parametersPerRow = insertInfo.getColumnCount();
+      int maxRowsPerChunk = DatabricksJdbcConstants.MAX_QUERY_PARAMETERS / parametersPerRow;
+
+      // Ensure we have at least 1 row per chunk
+      if (maxRowsPerChunk < 1) {
+        maxRowsPerChunk = 1;
+      }
+
+      long[] allUpdateCounts = new long[databricksBatchParameterMetaData.size()];
+      int processedRows = 0;
+
+      // Process batches in chunks
+      for (int startIndex = 0;
+          startIndex < databricksBatchParameterMetaData.size();
+          startIndex += maxRowsPerChunk) {
+        int endIndex =
+            Math.min(startIndex + maxRowsPerChunk, databricksBatchParameterMetaData.size());
+        int chunkSize = endIndex - startIndex;
+
+        LOGGER.debug("Processing chunk {}-{} ({} rows)", startIndex + 1, endIndex, chunkSize);
+
+        // Generate multi-row SQL for this chunk
+        String multiRowSql = InsertStatementParser.generateMultiRowInsert(insertInfo, chunkSize);
+
+        // Combine parameters for this chunk
+        Map<Integer, ImmutableSqlParameter> chunkParams = new HashMap<>();
+        int paramIndex = 1;
+
+        for (int i = startIndex; i < endIndex; i++) {
+          DatabricksParameterMetaData batchParams = databricksBatchParameterMetaData.get(i);
+          Map<Integer, ImmutableSqlParameter> rowParams = batchParams.getParameterBindings();
+          for (int j = 1; j <= rowParams.size(); j++) {
+            if (rowParams.containsKey(j)) {
+              chunkParams.put(paramIndex++, rowParams.get(j));
+            }
+          }
+        }
+
+        // Execute this chunk
+        executeInternal(multiRowSql, chunkParams, StatementType.UPDATE, false);
+
+        // Set update counts for this chunk (each row typically affects 1 row)
+        for (int i = startIndex; i < endIndex; i++) {
+          allUpdateCounts[i] = 1;
+        }
+
+        processedRows += chunkSize;
+      }
+
+      LOGGER.debug("Successfully processed {} rows in chunks", processedRows);
+      return allUpdateCounts;
+
+    } catch (Exception e) {
+      LOGGER.error("Error executing batched INSERT: {}", e.getMessage(), e);
+      long[] failedCounts = new long[databricksBatchParameterMetaData.size()];
+      for (int i = 0; i < failedCounts.length; i++) {
+        failedCounts[i] = Statement.EXECUTE_FAILED;
+      }
+      throw new DatabricksBatchUpdateException(
+          e.getMessage(), DatabricksDriverErrorCode.BATCH_EXECUTE_EXCEPTION, failedCounts);
+    }
+  }
+
+  /** Executes batch statements individually (fallback method). */
+  private long[] executeIndividualStatements() throws DatabricksBatchUpdateException {
+    LOGGER.debug(
+        "Executing batch individually with {} statements", databricksBatchParameterMetaData.size());
     long[] largeUpdateCount = new long[databricksBatchParameterMetaData.size()];
 
     for (int sqlQueryIndex = 0;
 
@@ -583,6 +583,16 @@ public boolean isBeforeFirst() throws SQLException {
     return executionResult.getCurrentRow() == -1;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * <p><b>Limitation:</b> For lazy-loaded result sets ({@link LazyThriftResult}), particularly
+   * those using {@link
+   * com.databricks.jdbc.model.client.thrift.generated.TSparkRowSetType#COLUMN_BASED_SET}, this
+   * method cannot reliably determine the cursor position. The total row count remains unknown until
+   * all rows are fetched, preventing accurate detection of whether the cursor is after the last
+   * row. This is specific to Databricks JDBC dialect.
+   */
   @Override
   public boolean isAfterLast() throws SQLException {
     checkIfClosed();
@@ -595,9 +605,27 @@ public boolean isFirst() throws SQLException {
     return executionResult.getCurrentRow() == 0;
   }
 
+  /**
+   * {@inheritDoc}
+   *
+   * <p>This method uses different strategies based on the result set type:
+   *
+   * <ul>
+   *   <li>For {@link LazyThriftResult} instances: Checks if there are no more rows available (using
+   *       {@code hasNext()}), since the total row count is unknown until all rows are fetched.
+   *   <li>For other result types: Compares the current row position against the known total row
+   *       count.
+   * </ul>
+   *
+   * @return {@code true} if the cursor is on the last row, {@code false} otherwise
+   * @throws SQLException if the result set is closed or an error occurs
+   */
   @Override
   public boolean isLast() throws SQLException {
     checkIfClosed();
+    if (executionResult instanceof LazyThriftResult) {
+      return executionResult.getCurrentRow() >= 0 && !executionResult.hasNext();
+    }
     return executionResult.getCurrentRow() == resultSetMetaData.getTotalRows() - 1;
   }
 
 
@@ -20,10 +20,10 @@
 import com.databricks.jdbc.log.JdbcLogger;
 import com.databricks.jdbc.log.JdbcLoggerFactory;
 import com.databricks.jdbc.model.client.thrift.generated.*;
+import com.databricks.jdbc.model.core.ColumnInfo;
+import com.databricks.jdbc.model.core.ColumnInfoTypeName;
 import com.databricks.jdbc.model.core.ColumnMetadata;
 import com.databricks.jdbc.model.core.ResultManifest;
-import com.databricks.sdk.service.sql.ColumnInfo;
-import com.databricks.sdk.service.sql.ColumnInfoTypeName;
 import com.google.common.collect.ImmutableList;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 
@@ -692,6 +692,14 @@ static boolean isSelectQuery(String query) {
     return SELECT_PATTERN.matcher(trimmedQuery).find();
   }
 
+  static boolean isInsertQuery(String query) {
+    if (query == null || query.trim().isEmpty()) {
+      return false;
+    }
+    String trimmedQuery = trimCommentsAndWhitespaces(query);
+    return INSERT_PATTERN.matcher(trimmedQuery).find();
+  }
+
   DatabricksResultSet executeInternal(
       String sql,
       Map<Integer, ImmutableSqlParameter> params,
 
@@ -1,7 +1,5 @@
 package com.databricks.jdbc.api.impl;
 
-import static com.databricks.jdbc.common.util.DatabricksThriftUtil.convertColumnarToRowBased;
-
 import com.databricks.jdbc.api.impl.arrow.ArrowStreamResult;
 import com.databricks.jdbc.api.impl.volume.VolumeOperationResult;
 import com.databricks.jdbc.api.internal.IDatabricksSession;
@@ -96,7 +94,7 @@ private static IExecutionResult getResultHandler(
     LOGGER.info("Processing result of format {} from Thrift server", resultFormat);
     switch (resultFormat) {
       case COLUMN_BASED_SET:
-        return getResultSet(convertColumnarToRowBased(resultsResp, parentStatement, session));
+        return new LazyThriftResult(resultsResp, parentStatement, session);
       case ARROW_BASED_SET:
         return new ArrowStreamResult(resultsResp, true, parentStatement, session);
       case URL_BASED_SET:
Original file line number	Diff line number	Diff line change
`@@ -215,6 +215,11 @@ public String getHostUrl() throws DatabricksParsingException {`
`215`	`215`	`}`
`216`	`216`	`}`
`217`	`217`
	`218`	`+ @Override`
	`219`	`+ public String getHost() {`
	`220`	`+ return this.host;`
	`221`	`+ }`
	`222`	`+`
`218`	`223`	`@Override`
`219`	`224`	`public IDatabricksComputeResource getComputeResource() {`
`220`	`225`	`return computeResource;`
`@@ -1025,4 +1030,9 @@ public int getTelemetryFlushIntervalInMilliseconds() {`
`1025`	`1030`	`return Math.max(`
`1026`	`1031`	`1000, Integer.parseInt(getParameter(DatabricksJdbcUrlParams.TELEMETRY_FLUSH_INTERVAL)));`
`1027`	`1032`	`}`
	`1033`	`+`
	`1034`	`+ @Override`
	`1035`	`+ public boolean isBatchedInsertsEnabled() {`
	`1036`	`+ return getParameter(DatabricksJdbcUrlParams.ENABLE_BATCHED_INSERTS).equals("1");`
	`1037`	`+ }`
`1028`	`1038`	`}`