Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Updated

### Fixed
- Fixed `?` characters inside SQL comments, string literals, and quoted identifiers being incorrectly counted as parameter placeholders when `supportManyParameters=1`. `SQLInterpolator` now uses `SqlCommentParser` to locate only real placeholders. Fixes #1331.

---
*Note: When making changes, please add your change under the appropriate section
Expand Down
61 changes: 30 additions & 31 deletions src/main/java/com/databricks/jdbc/common/util/SQLInterpolator.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import com.databricks.jdbc.exception.DatabricksValidationException;
import com.databricks.jdbc.model.core.ColumnInfoTypeName;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SQLInterpolator {
protected static String escapeInputs(String input) {
Expand Down Expand Up @@ -65,22 +63,12 @@ private static String formatObject(ImmutableSqlParameter object) {
}
}

private static int countPlaceholders(String sql) {
int count = 0;
for (char c : sql.toCharArray()) {
if (c == '?') {
count++;
}
}
return count;
}

/**
* Interpolates the given SQL string by replacing placeholders with the provided parameters.
*
* <p>This method splits the SQL string by placeholders (question marks) and replaces each
* placeholder with the corresponding parameter from the provided map. The map keys are 1-based
* indexes, aligning with the SQL parameter positions.
* <p>Only '?' characters that appear outside of comments, string literals, and quoted identifiers
* are treated as placeholders. The map keys are 1-based indexes, aligning with the SQL parameter
* positions.
*
* @param sql the SQL string containing placeholders ('?') to be replaced.
* @param params a map of parameters where the key is the 1-based index of the placeholder in the
Expand All @@ -91,37 +79,48 @@ private static int countPlaceholders(String sql) {
*/
public static String interpolateSQL(String sql, Map<Integer, ImmutableSqlParameter> params)
throws DatabricksValidationException {
String[] parts = sql.split("\\?");
if (countPlaceholders(sql) != params.size()) {
int[] positions = SqlCommentParser.findPlaceholderPositions(sql);
if (positions.length != params.size()) {
throw new DatabricksValidationException(
"Parameter count does not match. Provide equal number of parameters as placeholders. SQL "
+ sql);
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < parts.length; i++) {
sb.append(parts[i]);
if (i < params.size()) {
sb.append(formatObject(params.get(i + 1))); // because we have 1 based index in params
}
StringBuilder sb = new StringBuilder(sql.length() + 16);
int last = 0;
for (int i = 0; i < positions.length; i++) {
int pos = positions[i];
sb.append(sql, last, pos);
sb.append(formatObject(params.get(i + 1))); // 1-based index in params
last = pos + 1;
}
if (last < sql.length()) {
sb.append(sql, last, sql.length());
}
return sb.toString();
}

/**
* Surrounds unquoted placeholders (?) with single quotes, preserving already quoted ones. This is
* crucial for DESCRIBE QUERY commands as unquoted placeholders will cause a parse_syntax_error.
* Surrounds real placeholders (?) with single quotes, leaving '?' characters that appear inside
* comments, string literals, or quoted identifiers untouched. This is crucial for DESCRIBE QUERY
* commands, where bare placeholders cause a parse_syntax_error.
*/
public static String surroundPlaceholdersWithQuotes(String sql) {
if (sql == null || sql.isEmpty()) {
return sql;
}
// This pattern matches any '?' that is NOT already inside single quotes
StringBuilder sb = new StringBuilder();
Matcher m = Pattern.compile("(?<!')\\?(?!')").matcher(sql);
while (m.find()) {
m.appendReplacement(sb, "'?'");
int[] positions = SqlCommentParser.findPlaceholderPositions(sql);
if (positions.length == 0) {
return sql;
}
StringBuilder sb = new StringBuilder(sql.length() + positions.length * 2);
int last = 0;
for (int pos : positions) {
sb.append(sql, last, pos).append("'?'");
last = pos + 1;
}
if (last < sql.length()) {
sb.append(sql, last, sql.length());
}
m.appendTail(sb);
return sb.toString();
}
}
70 changes: 58 additions & 12 deletions src/main/java/com/databricks/jdbc/common/util/SqlCommentParser.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.databricks.jdbc.common.util;

import java.util.ArrayList;
import java.util.List;

/** Utility class for parsing out comments from SQL strings */
public class SqlCommentParser {

Expand All @@ -17,6 +20,16 @@ public interface SqlCharConsumer {
void accept(State state, char c);
}

/**
* Index-aware variant of {@link SqlCharConsumer}. The third argument is the source-string index
* of the emitted character. For synthetic characters (the space emitted after a comment), the
* index is the position of the character that ended the comment.
*/
@FunctionalInterface
public interface IndexedSqlCharConsumer {
void accept(State state, char c, int sourceIndex);
}

/**
* Iterates over each character in the SQL string while keeping track of comment, string literal,
* and identifier state. Each character that is not part of a comment calls the consumer with the
Expand All @@ -26,6 +39,15 @@ public interface SqlCharConsumer {
* @param consumer called for each visible character with its parsing state
*/
public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
forEachNonCommentChar(sql, (state, c, sourceIndex) -> consumer.accept(state, c));
}

/**
* Same as {@link #forEachNonCommentChar(String, SqlCharConsumer)} but the consumer also receives
* the source-string index of each emitted character. Useful when callers need to relate emitted
* characters back to positions in the original SQL (e.g. parameter placeholder discovery).
*/
public static void forEachNonCommentChar(String sql, IndexedSqlCharConsumer consumer) {
if (sql == null || sql.isEmpty()) {
return;
}
Expand All @@ -48,42 +70,42 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
i++; // skip '*'
} else if (c == '\'') {
state = State.IN_SINGLE_QUOTE;
consumer.accept(state, c);
consumer.accept(state, c, i);
} else if (c == '"') {
state = State.IN_DOUBLE_QUOTE;
consumer.accept(state, c);
consumer.accept(state, c, i);
} else if (c == '`') {
state = State.IN_BACKTICK;
consumer.accept(state, c);
consumer.accept(state, c, i);
} else {
consumer.accept(state, c);
consumer.accept(state, c, i);
}
break;

case IN_SINGLE_QUOTE:
consumer.accept(state, c);
consumer.accept(state, c, i);
if (c == '\'' && next == '\'') {
consumer.accept(state, next);
consumer.accept(state, next, i + 1);
i++; // skip escaped quote
} else if (c == '\'') {
state = State.NORMAL;
}
break;

case IN_DOUBLE_QUOTE:
consumer.accept(state, c);
consumer.accept(state, c, i);
if (c == '"' && next == '"') {
consumer.accept(state, next);
consumer.accept(state, next, i + 1);
i++; // skip escaped quote
} else if (c == '"') {
state = State.NORMAL;
}
break;

case IN_BACKTICK:
consumer.accept(state, c);
consumer.accept(state, c, i);
if (c == '`' && next == '`') {
consumer.accept(state, next);
consumer.accept(state, next, i + 1);
i++; // skip escaped backtick
} else if (c == '`') {
state = State.NORMAL;
Expand All @@ -93,7 +115,7 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
case IN_LINE_COMMENT:
if (c == '\n' || c == '\r') {
state = State.NORMAL;
consumer.accept(state, ' ');
consumer.accept(state, ' ', i);
if (c == '\r' && next == '\n') {
i++; // Treat \r\n as a single line ending
}
Expand All @@ -110,7 +132,7 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
i++; // skip '/'
if (blockCommentDepth == 0) {
state = State.NORMAL;
consumer.accept(state, ' ');
consumer.accept(state, ' ', i);
}
}
// else: skip character (part of the comment)
Expand All @@ -119,6 +141,30 @@ public static void forEachNonCommentChar(String sql, SqlCharConsumer consumer) {
}
}

/**
* Finds the source-string indices of placeholder ('?') characters that appear in {@link
* State#NORMAL} state — i.e. not inside comments, string literals, or quoted identifiers. Used by
* parameter binding to locate the real placeholders in a SQL statement.
*
* @param sql the SQL string to scan
* @return source-string indices of placeholder '?' characters, in order
*/
public static int[] findPlaceholderPositions(String sql) {
List<Integer> positions = new ArrayList<>();
forEachNonCommentChar(
sql,
(state, c, sourceIndex) -> {
if (state == State.NORMAL && c == '?') {
positions.add(sourceIndex);
}
});
int[] out = new int[positions.size()];
for (int k = 0; k < out.length; k++) {
out[k] = positions.get(k);
}
return out;
}

/**
* Removes all comments and extra whitespace from the SQL string.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,78 @@ public void testStringWithNewline() throws DatabricksValidationException {
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testQuestionMarkInLineCommentNotTreatedAsPlaceholder()
throws DatabricksValidationException {
String sql = "-- does this work?\nselect * from mytable where id = ?";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 42, DatabricksTypeUtil.INT));
String expected = "-- does this work?\nselect * from mytable where id = 42";
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testQuestionMarkInBlockCommentNotTreatedAsPlaceholder()
throws DatabricksValidationException {
String sql = "select /* maybe? or ? */ * from t where id = ?";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 1, DatabricksTypeUtil.INT));
String expected = "select /* maybe? or ? */ * from t where id = 1";
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testQuestionMarkInStringLiteralNotTreatedAsPlaceholder()
throws DatabricksValidationException {
String sql = "select 'hello?', * from mytable where id = ?";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 5, DatabricksTypeUtil.INT));
String expected = "select 'hello?', * from mytable where id = 5";
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testQuestionMarkInQuotedIdentifierNotTreatedAsPlaceholder()
throws DatabricksValidationException {
String sql = "select `col?` from t where id = ? and name = \"who?\"";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 7, DatabricksTypeUtil.INT));
String expected = "select `col?` from t where id = 7 and name = \"who?\"";
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testCombinedCommentsAndLiteralsWithQuestionMarks()
throws DatabricksValidationException {
String sql =
"-- ?\n/* ? */ select 'a?' as x, ? as y, \"b?\" as z, `c?` as w from t where id = ?";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, "alice", DatabricksTypeUtil.STRING));
params.put(2, getSqlParam(2, 99, DatabricksTypeUtil.INT));
String expected =
"-- ?\n/* ? */ select 'a?' as x, 'alice' as y, \"b?\" as z, `c?` as w from t where id = 99";
assertEquals(expected, SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testInterpolateAdjacentPlaceholders() throws DatabricksValidationException {
String sql = "select ?,?,? from t";
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 1, DatabricksTypeUtil.INT));
params.put(2, getSqlParam(2, 2, DatabricksTypeUtil.INT));
params.put(3, getSqlParam(3, 3, DatabricksTypeUtil.INT));
assertEquals("select 1,2,3 from t", SQLInterpolator.interpolateSQL(sql, params));
}

@Test
public void testInterpolatePlaceholderAtStartAndEnd() throws DatabricksValidationException {
Map<Integer, ImmutableSqlParameter> params = new HashMap<>();
params.put(1, getSqlParam(1, 7, DatabricksTypeUtil.INT));
assertEquals("7 ", SQLInterpolator.interpolateSQL("? ", params));
assertEquals(" 7", SQLInterpolator.interpolateSQL(" ?", params));
assertEquals("7", SQLInterpolator.interpolateSQL("?", params));
}

@Test
public void testEscapeInputs() {
// Simple apostrophe doubling
Expand Down Expand Up @@ -177,7 +249,31 @@ private static Stream<Arguments> providePlaceholderQuotingTestCases() {
Arguments.of(
"SELECT * FROM table WHERE id = ? AND (name = ? OR age = ?) AND status = ?",
"SELECT * FROM table WHERE id = '?' AND (name = '?' OR age = '?') AND status = '?'",
"Complex query with multiple conditions"));
"Complex query with multiple conditions"),

// ? inside line comment must not be quoted
Arguments.of(
"-- is this a ?\nSELECT ? FROM t",
"-- is this a ?\nSELECT '?' FROM t",
"Question mark inside line comment is preserved"),

// ? inside block comment must not be quoted
Arguments.of(
"SELECT /* ? */ ? FROM t",
"SELECT /* ? */ '?' FROM t",
"Question mark inside block comment is preserved"),

// ? inside double-quoted identifier must not be quoted
Arguments.of(
"SELECT \"col?\" FROM t WHERE id = ?",
"SELECT \"col?\" FROM t WHERE id = '?'",
"Question mark inside double-quoted identifier is preserved"),

// ? inside backtick identifier must not be quoted
Arguments.of(
"SELECT `col?` FROM t WHERE id = ?",
"SELECT `col?` FROM t WHERE id = '?'",
"Question mark inside backtick identifier is preserved"));
}

@ParameterizedTest(name = "{2}")
Expand Down
Loading
Loading