Skip to content

Commit fea50c1

Browse files
laurittrask
andauthored
Sql sanitizer should by default treat double-quoted fragments as string literals (#15582)
Co-authored-by: Trask Stalnaker <trask.stalnaker@gmail.com>
1 parent abd9e3c commit fea50c1

27 files changed

Lines changed: 401 additions & 145 deletions

File tree

conventions/src/main/kotlin/io/opentelemetry/instrumentation/gradle/StaticImportFormatter.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ class StaticImportFormatter : FormatterFunc, Serializable {
6161
"io.opentelemetry.instrumentation.api.internal.SemconvStability",
6262
"emit[a-zA-Z0-9]*"
6363
),
64+
Triple(
65+
"SqlDialect",
66+
"io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlDialect",
67+
"DOUBLE_QUOTES_ARE_[A-Z_]+"
68+
),
6469
Triple("Collectors", "java.util.stream.Collectors", "[a-z][a-zA-Z0-9]*"),
6570
)
6671

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/DbClientSpanNameExtractor.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ private SqlClientSpanNameExtractor(SqlClientAttributesGetter<REQUEST, ?> getter)
183183
@Override
184184
public String extract(REQUEST request) {
185185
String namespace = getter.getDbNamespace(request);
186+
SqlDialect dialect = getter.getSqlDialect(request);
186187
Collection<String> rawQueryTexts = getter.getRawQueryTexts(request);
187188

188189
if (rawQueryTexts.isEmpty()) {
@@ -202,8 +203,8 @@ public String extract(REQUEST request) {
202203
if (rawQueryTexts.size() > 1) { // for backcompat(?)
203204
return computeSpanName(namespace, null, null, null);
204205
}
205-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyze(rawQueryTexts.iterator().next());
206-
206+
SqlQuery analyzedQuery =
207+
SqlQueryAnalyzerUtil.analyze(rawQueryTexts.iterator().next(), dialect);
207208
return computeSpanName(
208209
namespace,
209210
analyzedQuery.getOperationName(),
@@ -213,7 +214,7 @@ public String extract(REQUEST request) {
213214

214215
if (rawQueryTexts.size() == 1) {
215216
String rawQueryText = rawQueryTexts.iterator().next();
216-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText);
217+
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText, dialect);
217218
boolean batch = isBatch(request);
218219
String querySummary = analyzedQuery.getQuerySummary();
219220
if (querySummary != null) {
@@ -223,7 +224,7 @@ public String extract(REQUEST request) {
223224
getter, request, batch ? "BATCH" : null, null, analyzedQuery.getStoredProcedureName());
224225
}
225226

226-
MultiQuery multiQuery = MultiQuery.analyzeWithSummary(rawQueryTexts, false);
227+
MultiQuery multiQuery = MultiQuery.analyzeWithSummary(rawQueryTexts, dialect, false);
227228
String querySummary = multiQuery.getQuerySummary();
228229
if (querySummary != null) {
229230
return querySummary;
@@ -266,7 +267,9 @@ public String extract(REQUEST request) {
266267
Collection<String> rawQueryTexts = getter.getRawQueryTexts(request);
267268
String operationName = null;
268269
if (rawQueryTexts.size() == 1) {
269-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyze(rawQueryTexts.iterator().next());
270+
String rawQuery = rawQueryTexts.iterator().next();
271+
SqlDialect dialect = getter.getSqlDialect(request);
272+
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyze(rawQuery, dialect);
270273
operationName = analyzedQuery.getOperationName();
271274
}
272275
if (operationName == null) {

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/MultiQuery.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ private MultiQuery(
2424
}
2525

2626
static MultiQuery analyzeWithSummary(
27-
Collection<String> rawQueryTexts, boolean querySanitizationEnabled) {
27+
Collection<String> rawQueryTexts, SqlDialect dialect, boolean querySanitizationEnabled) {
2828
UniqueValue uniqueStoredProcedureName = new UniqueValue();
2929
Set<String> uniqueQueryTexts = new LinkedHashSet<>();
3030
UniqueValue uniqueQuerySummary = new UniqueValue();
3131
for (String rawQueryText : rawQueryTexts) {
32-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText);
32+
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText, dialect);
3333
uniqueStoredProcedureName.set(analyzedQuery.getStoredProcedureName());
3434
uniqueQueryTexts.add(querySanitizationEnabled ? analyzedQuery.getQueryText() : rawQueryText);
3535
uniqueQuerySummary.set(analyzedQuery.getQuerySummary());

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlClientAttributesExtractor.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,15 @@ public static <REQUEST, RESPONSE> SqlClientAttributesExtractorBuilder<REQUEST, R
8080
@Override
8181
public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST request) {
8282
Collection<String> rawQueryTexts = getter.getRawQueryTexts(request);
83+
SqlDialect dialect = getter.getSqlDialect(request);
8384

8485
Long batchSize = getter.getDbOperationBatchSize(request);
8586
boolean isBatch = batchSize != null && batchSize > 1;
8687

8788
if (emitOldDatabaseSemconv()) {
8889
if (rawQueryTexts.size() == 1) { // for backcompat(?)
8990
String rawQueryText = rawQueryTexts.iterator().next();
90-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyze(rawQueryText);
91+
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyze(rawQueryText, dialect);
9192
String operationName = analyzedQuery.getOperationName();
9293
attributes.put(
9394
DB_STATEMENT, querySanitizationEnabled ? analyzedQuery.getQueryText() : rawQueryText);
@@ -106,7 +107,7 @@ public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST
106107
boolean shouldSanitize = querySanitizationEnabled && !parameterizedQuery;
107108
if (rawQueryTexts.size() == 1) {
108109
String rawQueryText = rawQueryTexts.iterator().next();
109-
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText);
110+
SqlQuery analyzedQuery = SqlQueryAnalyzerUtil.analyzeWithSummary(rawQueryText, dialect);
110111
attributes.put(DB_QUERY_TEXT, shouldSanitize ? analyzedQuery.getQueryText() : rawQueryText);
111112
String querySummary = analyzedQuery.getQuerySummary();
112113
attributes.put(
@@ -115,7 +116,8 @@ public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST
115116
attributes.put(DB_STORED_PROCEDURE_NAME, analyzedQuery.getStoredProcedureName());
116117
} else if (rawQueryTexts.size() > 1) {
117118
MultiQuery multiQuery =
118-
MultiQuery.analyzeWithSummary(getter.getRawQueryTexts(request), shouldSanitize);
119+
MultiQuery.analyzeWithSummary(
120+
getter.getRawQueryTexts(request), dialect, shouldSanitize);
119121
attributes.put(DB_QUERY_TEXT, join("; ", multiQuery.getQueryTexts()));
120122
attributes.put(DB_QUERY_SUMMARY, multiQuery.getQuerySummary());
121123
attributes.put(DB_STORED_PROCEDURE_NAME, multiQuery.getStoredProcedureName());

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlClientAttributesGetter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ default String getDbQuerySummary(REQUEST request) {
5252
return null;
5353
}
5454

55+
/** Returns the SQL dialect used by the database. */
56+
SqlDialect getSqlDialect(REQUEST request);
57+
5558
/**
5659
* Get the raw SQL query texts. The values returned by this method are later sanitized by the
5760
* {@link SqlClientAttributesExtractor} before being set as span attribute.

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlDialect.java

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,51 @@
55

66
package io.opentelemetry.instrumentation.api.incubator.semconv.db;
77

8-
/** Enumeration of sql dialects that are handled differently by {@link SqlQueryAnalyzer}. */
9-
public enum SqlDialect {
10-
DEFAULT,
11-
// couchbase uses double quotes for string literals
12-
COUCHBASE
8+
import com.google.auto.value.AutoValue;
9+
10+
/** Describes SQL dialect options that affect how {@link SqlQueryAnalyzer} processes queries. */
11+
@AutoValue
12+
public abstract class SqlDialect {
13+
14+
/**
15+
* Dialect where double-quoted fragments are treated as string literals and therefore sanitized
16+
* (replaced with {@code ?}).
17+
*
18+
* <p>This is a safer choice if you don't know how the database interprets double-quoted tokens.
19+
* If the database actually uses double quotes for identifiers, the downside is that identifier
20+
* names are replaced with {@code ?}, reducing diagnostic clarity. By contrast, choosing {@link
21+
* #DOUBLE_QUOTES_ARE_IDENTIFIERS} incorrectly would fail to sanitize sensitive string-literal
22+
* values.
23+
*/
24+
public static final SqlDialect DOUBLE_QUOTES_ARE_STRING_LITERALS = builder().build();
25+
26+
/**
27+
* Dialect where double-quoted fragments are treated as identifiers (e.g. column/table names) and
28+
* therefore left intact during sanitization. Use this only for databases where double quotes
29+
* always denote identifiers.
30+
*/
31+
public static final SqlDialect DOUBLE_QUOTES_ARE_IDENTIFIERS =
32+
builder().setDoubleQuotesAreIdentifiers(true).build();
33+
34+
SqlDialect() {}
35+
36+
static Builder builder() {
37+
return new AutoValue_SqlDialect.Builder().setDoubleQuotesAreIdentifiers(false);
38+
}
39+
40+
/**
41+
* Returns {@code true} if the dialect uses double quotes for identifiers, {@code false} if double
42+
* quotes are used for string literals.
43+
*/
44+
abstract boolean doubleQuotesAreIdentifiers();
45+
46+
@AutoValue.Builder
47+
abstract static class Builder {
48+
49+
Builder() {}
50+
51+
abstract Builder setDoubleQuotesAreIdentifiers(boolean doubleQuotesAreIdentifiers);
52+
53+
abstract SqlDialect build();
54+
}
1355
}

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlQueryAnalyzer.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package io.opentelemetry.instrumentation.api.incubator.semconv.db;
77

8+
import static io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlDialect.DOUBLE_QUOTES_ARE_STRING_LITERALS;
89
import static io.opentelemetry.instrumentation.api.internal.SupportabilityMetrics.CounterNames.SQL_SANITIZER_CACHE_MISS;
910

1011
import com.google.auto.value.AutoValue;
@@ -33,8 +34,12 @@ private SqlQueryAnalyzer(boolean querySanitizationEnabled) {
3334
this.querySanitizationEnabled = querySanitizationEnabled;
3435
}
3536

37+
/**
38+
* @deprecated Use {@link #analyze(String, SqlDialect)} and pass an explicit dialect.
39+
*/
40+
@Deprecated
3641
public SqlQuery analyze(@Nullable String query) {
37-
return analyze(query, SqlDialect.DEFAULT);
42+
return analyze(query, DOUBLE_QUOTES_ARE_STRING_LITERALS);
3843
}
3944

4045
public SqlQuery analyze(@Nullable String query, SqlDialect dialect) {
@@ -56,9 +61,14 @@ private static SqlQuery analyzeImpl(String query, SqlDialect dialect) {
5661
return AutoSqlSanitizer.sanitize(query, dialect);
5762
}
5863

59-
/** Analyze and extract query summary. */
64+
/**
65+
* Analyze and extract query summary.
66+
*
67+
* @deprecated Use {@link #analyzeWithSummary(String, SqlDialect)} and pass an explicit dialect.
68+
*/
69+
@Deprecated
6070
public SqlQuery analyzeWithSummary(@Nullable String query) {
61-
return analyzeWithSummary(query, SqlDialect.DEFAULT);
71+
return analyzeWithSummary(query, DOUBLE_QUOTES_ARE_STRING_LITERALS);
6272
}
6373

6474
/** Analyze and extract query summary. */
@@ -82,8 +92,8 @@ private static SqlQuery analyzeWithSummaryImpl(String query, SqlDialect dialect)
8292
}
8393

8494
// visible for tests
85-
static boolean isCached(String query) {
86-
return sqlToQueryCache.get(CacheKey.create(query, SqlDialect.DEFAULT)) != null;
95+
static boolean isCached(String query, SqlDialect dialect) {
96+
return sqlToQueryCache.get(CacheKey.create(query, dialect)) != null;
8797
}
8898

8999
@AutoValue

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlQueryAnalyzerUtil.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package io.opentelemetry.instrumentation.api.incubator.semconv.db;
77

8+
import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlQueryAnalyzer.CacheKey;
89
import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter;
910
import io.opentelemetry.instrumentation.api.internal.InstrumenterContext;
1011
import java.util.HashMap;
@@ -17,17 +18,21 @@
1718
class SqlQueryAnalyzerUtil {
1819
private static final SqlQueryAnalyzer analyzer = SqlQueryAnalyzer.create(true);
1920

20-
static SqlQuery analyze(String queryText) {
21-
Map<String, SqlQuery> map =
21+
static SqlQuery analyze(String queryText, SqlDialect dialect) {
22+
Map<CacheKey, SqlQuery> map =
2223
InstrumenterContext.computeIfAbsent("sanitized-sql-map", unused -> new HashMap<>());
23-
return map.computeIfAbsent(queryText, analyzer::analyze);
24+
return map.computeIfAbsent(
25+
CacheKey.create(queryText, dialect),
26+
key -> analyzer.analyze(key.getQueryText(), key.getDialect()));
2427
}
2528

26-
static SqlQuery analyzeWithSummary(String queryText) {
27-
Map<String, SqlQuery> map =
29+
static SqlQuery analyzeWithSummary(String queryText, SqlDialect dialect) {
30+
Map<CacheKey, SqlQuery> map =
2831
InstrumenterContext.computeIfAbsent(
2932
"sanitized-sql-map-with-summary", unused -> new HashMap<>());
30-
return map.computeIfAbsent(queryText, analyzer::analyzeWithSummary);
33+
return map.computeIfAbsent(
34+
CacheKey.create(queryText, dialect),
35+
key -> analyzer.analyzeWithSummary(key.getQueryText(), key.getDialect()));
3136
}
3237

3338
private SqlQueryAnalyzerUtil() {}

instrumentation-api-incubator/src/main/jflex/SqlSanitizer.jflex

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ WHITESPACE = [ \t\r\n]+
4545
%{
4646
static SqlQuery sanitize(String statement, SqlDialect dialect) {
4747
AutoSqlSanitizer sanitizer = new AutoSqlSanitizer(new java.io.StringReader(statement));
48-
sanitizer.dialect = dialect;
48+
sanitizer.doubleQuotesAreIdentifiers = dialect.doubleQuotesAreIdentifiers();
4949
try {
5050
while (!sanitizer.yyatEOF()) {
5151
int token = sanitizer.yylex();
@@ -119,7 +119,7 @@ WHITESPACE = [ \t\r\n]+
119119
private boolean insideComment = false;
120120
private Operation operation = NoOp.INSTANCE;
121121
private boolean extractionDone = false;
122-
private SqlDialect dialect;
122+
private boolean doubleQuotesAreIdentifiers;
123123

124124
private void setOperation(Operation operation) {
125125
if (this.operation == NoOp.INSTANCE) {
@@ -560,13 +560,21 @@ WHITESPACE = [ \t\r\n]+
560560
}
561561

562562
{DOUBLE_QUOTED_STR} {
563-
if (dialect == SqlDialect.COUCHBASE) {
564-
builder.append('?');
565-
} else {
566-
if (!insideComment && !extractionDone) {
567-
extractionDone = operation.handleIdentifier();
568-
}
563+
// Always notify the operation about double-quoted tokens regardless of dialect so
564+
// that table name extraction works correctly even when the dialect treats them as
565+
// string literals. For example, SELECT * FROM "my_table" should extract the table
566+
// name "my_table" whether or not the dialect sanitizes the token.
567+
//
568+
// The extractionDone guard ensures handleIdentifier() is a no-op once extraction
569+
// is complete, so there is no risk of leaking sensitive string content into the
570+
// span name.
571+
if (!insideComment && !extractionDone) {
572+
extractionDone = operation.handleIdentifier();
573+
}
574+
if (doubleQuotesAreIdentifiers) {
569575
appendCurrentFragment();
576+
} else {
577+
builder.append('?');
570578
}
571579
if (isOverLimit()) return YYEOF;
572580
}

instrumentation-api-incubator/src/main/jflex/SqlSanitizerWithSummary.jflex

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,13 +1094,21 @@ WHITESPACE = [ \t\r\n]+
10941094
}
10951095

10961096
{DOUBLE_QUOTED_STR} {
1097-
if (dialect == SqlDialect.COUCHBASE) {
1098-
builder.append('?');
1099-
} else {
1100-
if (!insideComment) {
1101-
operation.handleIdentifier();
1102-
}
1097+
if (!insideComment) {
1098+
// Always notify the operation about double-quoted tokens regardless of dialect so
1099+
// that summarization works correctly even when the dialect treats them as string
1100+
// literals. For example, SELECT * FROM "my_table" should produce the summary
1101+
// "SELECT my_table" whether or not the dialect sanitizes the token.
1102+
//
1103+
// The operation's own state guards (e.g. identifierCaptured, captureTableList)
1104+
// ensure handleIdentifier() is a no-op when not structurally expected, so there
1105+
// is no risk of leaking sensitive string content into the summary.
1106+
operation.handleIdentifier();
1107+
}
1108+
if (dialect.doubleQuotesAreIdentifiers()) {
11031109
appendCurrentFragment();
1110+
} else {
1111+
builder.append('?');
11041112
}
11051113
if (isOverLimit()) return YYEOF;
11061114
}

0 commit comments

Comments
 (0)