Skip to content

Commit 484cf97

Browse files
saravadeomcculls
authored andcommitted
Fix platform-dependent String.getBytes() calls to use explicit UTF-8 charset
Specify StandardCharsets.UTF_8 in String.getBytes() calls used with MessageDigest and other encoding-sensitive APIs. Without an explicit charset, getBytes() uses the platform's default charset, which can vary across systems and produce inconsistent results. Files changed: - AppSecEventTracker: user ID anonymization hash now uses UTF-8, ensuring consistent hashing across all platforms. Also resolved the TODO about MessageDigest caching with a clarifying comment referencing micro-benchmark data showing negligible overhead. - Fingerprinter: exception fingerprint hashes now use UTF-8. - JsonStreamParser: JSON byte conversion now uses UTF-8 (JSON spec). - LLMObsSpanMapper: writeUTF8() now receives actual UTF-8 bytes.
1 parent c13e821 commit 484cf97

3 files changed

Lines changed: 10 additions & 6 deletions

File tree

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/Fingerprinter.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable;
44

55
import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter;
6+
import java.nio.charset.StandardCharsets;
67
import java.security.MessageDigest;
78
import java.security.NoSuchAlgorithmException;
89
import org.slf4j.Logger;
@@ -30,15 +31,15 @@ public static String fingerprint(Throwable t, ClassNameFilter classNameFiltering
3031
return null;
3132
}
3233
String typeName = clazz.getTypeName();
33-
digest.update(typeName.getBytes());
34+
digest.update(typeName.getBytes(StandardCharsets.UTF_8));
3435
StackTraceElement[] stackTrace = t.getStackTrace();
3536
if (stackTrace != null) {
3637
for (StackTraceElement stackTraceElement : stackTrace) {
3738
String className = stackTraceElement.getClassName();
3839
if (classNameFiltering.isExcluded(className)) {
3940
continue;
4041
}
41-
digest.update(stackTraceElement.toString().getBytes());
42+
digest.update(stackTraceElement.toString().getBytes(StandardCharsets.UTF_8));
4243
}
4344
}
4445
return bytesToHex(digest.digest());
@@ -47,7 +48,7 @@ public static String fingerprint(Throwable t, ClassNameFilter classNameFiltering
4748
public static String fingerprint(StackTraceElement element) {
4849
try {
4950
MessageDigest digest = MessageDigest.getInstance("SHA-256");
50-
digest.update(element.toString().getBytes());
51+
digest.update(element.toString().getBytes(StandardCharsets.UTF_8));
5152
return bytesToHex(digest.digest());
5253
} catch (NoSuchAlgorithmException e) {
5354
LOGGER.debug("Unable to find digest algorithm SHA-256", e);

dd-trace-core/src/main/java/datadog/trace/core/util/JsonStreamParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.ByteArrayInputStream;
66
import java.io.IOException;
77
import java.io.InputStream;
8+
import java.nio.charset.StandardCharsets;
89
import okio.BufferedSource;
910
import okio.Okio;
1011

@@ -60,7 +61,7 @@ public interface Visitor {
6061
*/
6162
public static boolean tryToParse(String raw, Visitor visitor, PathCursor pathCursor) {
6263
if (raw.startsWith("{") && raw.endsWith("}") || raw.startsWith("[") && raw.endsWith("]")) {
63-
try (InputStream is = new ByteArrayInputStream(raw.getBytes())) {
64+
try (InputStream is = new ByteArrayInputStream(raw.getBytes(StandardCharsets.UTF_8))) {
6465
return tryToParse(is, visitor, pathCursor.copy());
6566
} catch (Exception e) {
6667
visitor.expandValueFailed(pathCursor, e);

internal-api/src/main/java/datadog/trace/api/appsec/AppSecEventTracker.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import datadog.trace.bootstrap.instrumentation.api.AgentSpan;
3737
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
3838
import datadog.trace.bootstrap.instrumentation.api.Tags;
39+
import java.nio.charset.StandardCharsets;
3940
import java.security.MessageDigest;
4041
import java.security.NoSuchAlgorithmException;
4142
import java.util.HashMap;
@@ -374,12 +375,13 @@ protected static String anonymize(final UserIdCollectionMode mode, final String
374375
}
375376
MessageDigest digest;
376377
try {
377-
// TODO avoid lookup a new instance every time
378+
// A new instance is needed each time for thread safety.
379+
// Per micro-benchmarks, the overhead of getInstance() is negligible.
378380
digest = MessageDigest.getInstance("SHA-256");
379381
} catch (NoSuchAlgorithmException e) {
380382
return null;
381383
}
382-
digest.update(userId.getBytes());
384+
digest.update(userId.getBytes(StandardCharsets.UTF_8));
383385
byte[] hash = digest.digest();
384386
if (hash.length > HASH_SIZE_BYTES) {
385387
byte[] temp = new byte[HASH_SIZE_BYTES];

0 commit comments

Comments
 (0)