Skip to content

Commit 04439ee

Browse files
Fix backtick quoting for column names in batched INSERT reconstruction (#1285)
## Summary - `generateMultiRowInsert()` now wraps column names with backticks when reconstructing multi-row INSERT SQL - This fixes `PARSE_SYNTAX_ERROR` for column names containing dots (e.g., `col.name`) when `EnableBatchedInserts` is enabled - Added test case for dotted column names ## Root Cause `parseColumns()` strips backticks from column names, but `generateMultiRowInsert()` joins them back without re-quoting. Unquoted column names containing dots are then interpreted as schema/table separators by the SQL parser. ## Test plan - [x] Existing `InsertStatementParserTest` tests updated and passing (18/18) - [x] New `testGenerateMultiRowInsertWithDottedColumnNames` test added Fixes #1284 --------- Signed-off-by: Oleksandr Shevchenko <oleksandr.shevchenko@datarobot.com> Co-authored-by: Vikrant Puppala <vikrant.puppala@databricks.com>
1 parent d8c326d commit 04439ee

4 files changed

Lines changed: 39 additions & 12 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
### Fixed
1010
- Fixed primitive types within complex types (ARRAY, MAP, STRUCT) not being correctly parsed when Arrow serialization uses alternate formats: TIMESTAMP/TIMESTAMP_NTZ as epoch microseconds or component arrays, and BINARY as base64-encoded strings.
11+
- Fixed `PARSE_SYNTAX_ERROR` for column names containing special characters (e.g., dots) when `EnableBatchedInserts` is enabled, by re-quoting column names with backticks in reconstructed multi-row INSERT statements.
1112

1213
---
1314
*Note: When making changes, please add your change under the appropriate section

src/main/java/com/databricks/jdbc/common/util/InsertStatementParser.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,14 @@ public static String generateMultiRowInsert(InsertInfo insertInfo, int numberOfR
194194
}
195195

196196
StringBuilder sql = new StringBuilder();
197+
String columnList =
198+
insertInfo.getColumns().stream()
199+
.map(col -> "`" + col + "`")
200+
.collect(Collectors.joining(", "));
197201
sql.append("INSERT INTO ")
198202
.append(insertInfo.getTableName())
199203
.append(" (")
200-
.append(String.join(", ", insertInfo.getColumns()))
204+
.append(columnList)
201205
.append(") VALUES ");
202206

203207
// Generate placeholders for each row

src/test/java/com/databricks/jdbc/api/impl/DatabricksPreparedStatementTest.java

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ public void testExecuteBatchStatement() throws Exception {
249249
}
250250
// Our implementation converts single INSERT to multi-row INSERT for batching
251251
String expectedMultiRowSQL =
252-
"INSERT INTO orders (user_id, shard, region_code, namespace) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
252+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
253253
when(client.executeStatement(
254254
eq(expectedMultiRowSQL),
255255
eq(new Warehouse(WAREHOUSE_ID)),
@@ -308,7 +308,7 @@ public void testExecuteBatchStatementThrowsError() throws Exception {
308308

309309
// Our implementation batches all into one multi-row INSERT, so if it fails, all fail
310310
String expectedMultiRowSQL =
311-
"INSERT INTO orders (user_id, shard, region_code, namespace) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
311+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
312312
when(client.executeStatement(
313313
eq(expectedMultiRowSQL),
314314
eq(new Warehouse(WAREHOUSE_ID)),
@@ -346,7 +346,7 @@ public void testExecuteLargeBatchStatement() throws Exception {
346346
}
347347
// Our implementation converts single INSERT to multi-row INSERT for batching
348348
String expectedMultiRowSQL =
349-
"INSERT INTO orders (user_id, shard, region_code, namespace) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
349+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
350350
when(client.executeStatement(
351351
eq(expectedMultiRowSQL),
352352
eq(new Warehouse(WAREHOUSE_ID)),
@@ -386,7 +386,7 @@ public void testExecuteLargeBatchStatementThrowsError() throws Exception {
386386

387387
// Our implementation batches all into one multi-row INSERT, so if it fails, all fail
388388
String expectedMultiRowSQL =
389-
"INSERT INTO orders (user_id, shard, region_code, namespace) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
389+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?), (?, ?, ?, ?)";
390390
when(client.executeStatement(
391391
eq(expectedMultiRowSQL),
392392
eq(new Warehouse(WAREHOUSE_ID)),
@@ -962,7 +962,8 @@ public void testBatchedInsertWithManyParameters() throws Exception {
962962

963963
// With supportManyParameters=1, all 200 rows should be batched in a single INSERT
964964
// with interpolated values (not parameterized)
965-
String expectedSqlPrefix = "INSERT INTO orders (user_id, shard, region_code, namespace) VALUES";
965+
String expectedSqlPrefix =
966+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES";
966967
when(client.executeStatement(
967968
org.mockito.ArgumentMatchers.startsWith(expectedSqlPrefix),
968969
eq(new Warehouse(WAREHOUSE_ID)),
@@ -1004,7 +1005,8 @@ public void testBatchedInsertWithVeryLargeParameterCount() throws Exception {
10041005
}
10051006

10061007
// With supportManyParameters=1, all 10,000 rows execute in a single INSERT
1007-
String expectedSqlPrefix = "INSERT INTO orders (user_id, shard, region_code, namespace) VALUES";
1008+
String expectedSqlPrefix =
1009+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES";
10081010
when(client.executeStatement(
10091011
org.mockito.ArgumentMatchers.startsWith(expectedSqlPrefix),
10101012
eq(new Warehouse(WAREHOUSE_ID)),
@@ -1048,7 +1050,8 @@ public void testBatchedInsertWithCustomBatchInsertSize() throws Exception {
10481050
assertEquals(50, connectionContext.getBatchInsertSize());
10491051

10501052
// Mock will be called 4 times (200 rows / 50 batch size = 4 chunks)
1051-
String expectedSqlPrefix = "INSERT INTO orders (user_id, shard, region_code, namespace) VALUES";
1053+
String expectedSqlPrefix =
1054+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES";
10521055
when(client.executeStatement(
10531056
org.mockito.ArgumentMatchers.startsWith(expectedSqlPrefix),
10541057
eq(new Warehouse(WAREHOUSE_ID)),
@@ -1108,7 +1111,8 @@ public void testBatchInsertSizeRespectsParameterLimit() throws Exception {
11081111

11091112
// Without supportManyParameters, should chunk at 256/4 = 64 rows
11101113
// even though BatchInsertSize=5000
1111-
String expectedSqlPrefix = "INSERT INTO orders (user_id, shard, region_code, namespace) VALUES";
1114+
String expectedSqlPrefix =
1115+
"INSERT INTO orders (`user_id`, `shard`, `region_code`, `namespace`) VALUES";
11121116
when(client.executeStatement(
11131117
org.mockito.ArgumentMatchers.startsWith(expectedSqlPrefix),
11141118
eq(new Warehouse(WAREHOUSE_ID)),
@@ -1169,7 +1173,7 @@ public void testBatchedInsertWithTimestampsGeneratesQuotedSQL() throws Exception
11691173
String executedSql = sqlCaptor.getValue();
11701174

11711175
String expectedSql =
1172-
"INSERT INTO events (id, name, created_at) VALUES "
1176+
"INSERT INTO events (`id`, `name`, `created_at`) VALUES "
11731177
+ "(1, 'Event One', '2024-01-01 12:30:45.123'), "
11741178
+ "(2, 'Event Two', '2024-02-15 08:15:30.456')";
11751179

src/test/java/com/databricks/jdbc/common/util/InsertStatementParserTest.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,8 @@ void testGenerateMultiRowInsert() throws Exception {
123123
assertNotNull(info);
124124

125125
String multiRowSql = InsertStatementParser.generateMultiRowInsert(info, 3);
126-
String expected = "INSERT INTO users (id, name, email) VALUES (?, ?, ?), (?, ?, ?), (?, ?, ?)";
126+
String expected =
127+
"INSERT INTO users (`id`, `name`, `email`) VALUES (?, ?, ?), (?, ?, ?), (?, ?, ?)";
127128
assertEquals(expected, multiRowSql);
128129
}
129130

@@ -134,7 +135,7 @@ void testGenerateMultiRowInsertSingleRow() throws Exception {
134135
assertNotNull(info);
135136

136137
String multiRowSql = InsertStatementParser.generateMultiRowInsert(info, 1);
137-
String expected = "INSERT INTO users (id, name) VALUES (?, ?)";
138+
String expected = "INSERT INTO users (`id`, `name`) VALUES (?, ?)";
138139
assertEquals(expected, multiRowSql);
139140
}
140141

@@ -150,6 +151,23 @@ void testGenerateMultiRowInsertInvalidInput() {
150151
assertThrows(Exception.class, () -> InsertStatementParser.generateMultiRowInsert(info, -1));
151152
}
152153

154+
@Test
155+
void testGenerateMultiRowInsertWithDottedColumnNames() throws Exception {
156+
// Column names containing dots must be backtick-quoted to avoid being interpreted
157+
// as schema.column references (see https://github.com/databricks/databricks-jdbc/issues/1284)
158+
String sql =
159+
"INSERT INTO `main`.`default`.`test_table` (`name`, `col.with.dots`, `another.dotted.col`) VALUES (?, ?, ?)";
160+
InsertInfo info = InsertStatementParser.parseInsert(sql);
161+
162+
assertNotNull(info);
163+
assertEquals(Arrays.asList("name", "col.with.dots", "another.dotted.col"), info.getColumns());
164+
165+
String multiRowSql = InsertStatementParser.generateMultiRowInsert(info, 2);
166+
String expected =
167+
"INSERT INTO `main`.`default`.`test_table` (`name`, `col.with.dots`, `another.dotted.col`) VALUES (?, ?, ?), (?, ?, ?)";
168+
assertEquals(expected, multiRowSql);
169+
}
170+
153171
@Test
154172
void testInsertInfoEqualsAndHashCode() {
155173
InsertInfo info1 =

0 commit comments

Comments
 (0)