DataDog · gh-worker-dd-mergequeue-cf854d · Apr 28, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
@@ -88,6 +88,7 @@
 /dd-trace-api/src/main/java/datadog/trace/api/aiguard/          @DataDog/asm-java
 /dd-trace-api/src/main/java/datadog/trace/api/EventTracker.java @DataDog/asm-java
 /internal-api/src/main/java/datadog/trace/api/gateway/          @DataDog/asm-java
+/internal-api/src/main/java/datadog/trace/api/http/             @DataDog/asm-java
 **/appsec/                                                      @DataDog/asm-java
 **/*CallSite*.java                                              @DataDog/asm-java
 **/*CallSite*.groovy                                            @DataDog/asm-java

@@ -1,8 +1,8 @@
 package datadog.trace.instrumentation.commons.fileupload;
 
+import datadog.trace.api.http.MultipartContentDecoder;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.commons.fileupload.FileItem;
@@ -35,7 +35,7 @@ public static String readContent(FileItem fileItem) {
           && (n = is.read(buf, total, MAX_CONTENT_BYTES - total)) != -1) {
         total += n;
       }
-      return new String(buf, 0, total, StandardCharsets.ISO_8859_1);
+      return MultipartContentDecoder.decodeBytes(buf, total, fileItem.getContentType());
     } catch (IOException ignored) {
       return "";
     }

@@ -39,6 +39,15 @@ class FileItemContentReaderTest extends Specification {
     FileItemContentReader.readContent(item) == ''
   }
 
+  void 'readContent uses Content-Type from file item for charset decoding'() {
+    given:
+    def text = 'héllo wörld'
+    def item = fileItemFromBytes(text.getBytes('UTF-8'), 'file.txt', 'text/plain; charset=UTF-8')
+
+    expect:
+    FileItemContentReader.readContent(item) == text
+  }
+
   void 'readContents returns content for each non-form file with a name'() {
     given:
     def items = [fileItem('content-a', 'file-a.txt'), fileItem('content-b', 'file-b.txt'),]
@@ -101,10 +110,19 @@ class FileItemContentReaderTest extends Specification {
   }
 
   private FileItem fileItem(String content, String name) {
+    fileItem(content, name, null)
+  }
+
+  private FileItem fileItem(String content, String name, String contentType) {
+    fileItemFromBytes((content ?: '').getBytes('ISO-8859-1'), name, contentType)
+  }
+
+  private FileItem fileItemFromBytes(byte[] bytes, String name, String contentType) {
     FileItem item = Stub(FileItem)
     item.isFormField() >> false
     item.getName() >> name
-    item.getInputStream() >> new ByteArrayInputStream((content ?: '').getBytes('ISO-8859-1'))
+    item.getContentType() >> contentType
+    item.getInputStream() >> new ByteArrayInputStream(bytes)
     return item
   }
 }
@@ -0,0 +1,74 @@
+package datadog.trace.api.http;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
+
+/** Decodes multipart file content bytes to String using the per-part Content-Type charset. */
+public final class MultipartContentDecoder {
+
+  public static String decodeBytes(byte[] buf, int length, String contentType) {
+    Charset charset = extractCharset(contentType);
+    if (charset == null) charset = Charset.defaultCharset();
+    try {
+      return charset
+          .newDecoder()
+          .onMalformedInput(CodingErrorAction.REPLACE)
+          .onUnmappableCharacter(CodingErrorAction.REPLACE)
+          .decode(ByteBuffer.wrap(buf, 0, length))
+          .toString();
+    } catch (CharacterCodingException e) {
+      // unreachable: CodingErrorAction.REPLACE never throws CharacterCodingException
+      throw new IllegalStateException(e);
+    }
+  }
+
+  public static Charset extractCharset(String contentType) {
+    if (contentType == null) return null;
+    int searchFrom = 0;
+    while (true) {
+      int idx = indexOfIgnoreAsciiCase(contentType, "charset=", searchFrom);
+      if (idx < 0) return null;
+      // Require a parameter boundary before "charset=" so "xcharset=..." is not matched
+      if (idx == 0 || contentType.charAt(idx - 1) == ';' || contentType.charAt(idx - 1) == ' ') {
+        int nameStart = idx + 8;
+        int end = contentType.length();
+        for (int i = nameStart; i < end; i++) {
+          char c = contentType.charAt(i);
+          if (c == ';' || c == ',' || c == ' ') {
+            end = i;
+            break;
+          }
+        }
+        String name = contentType.substring(nameStart, end).trim();
+        if (name.length() > 1 && name.charAt(0) == '"' && name.charAt(name.length() - 1) == '"') {
+          name = name.substring(1, name.length() - 1);
+        }
+        try {
+          return Charset.forName(name);
+        } catch (IllegalArgumentException e) {
+          return null;
+        }
+      }
+      searchFrom = idx + 1;
+    }
+  }
+
+  private static int indexOfIgnoreAsciiCase(String s, String needle, int fromIndex) {
+    int sLen = s.length();
+    int nLen = needle.length();
+    outer:
+    for (int i = fromIndex, max = sLen - nLen; i <= max; i++) {
+      for (int j = 0; j < nLen; j++) {
+        if (Character.toLowerCase(s.charAt(i + j)) != needle.charAt(j)) {
+          continue outer;
+        }
+      }
+      return i;
+    }
+    return -1;
+  }
+
+  private MultipartContentDecoder() {}
+}
@@ -0,0 +1,151 @@
+package datadog.trace.api.http;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import java.nio.charset.StandardCharsets;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+public class MultipartContentDecoderTest {
+
+  @Test
+  void decodeBytesUsesDeclaredUtf8Charset() {
+    String text = "héllo wörld";
+    byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
+    assertEquals(
+        text,
+        MultipartContentDecoder.decodeBytes(bytes, bytes.length, "text/plain; charset=UTF-8"));
+  }
+
+  @Test
+  void decodeBytesUsesDeclaredIso88591Charset() {
+    String text = "café";
+    byte[] bytes = text.getBytes(StandardCharsets.ISO_8859_1);
+    assertEquals(
+        text,
+        MultipartContentDecoder.decodeBytes(bytes, bytes.length, "text/plain; charset=ISO-8859-1"));
+  }
+
+  @Test
+  void decodeBytesDefaultsToMachineDefaultWhenNoCharset() {
+    String text = "hello world";
+    byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
+    assertEquals(text, MultipartContentDecoder.decodeBytes(bytes, bytes.length, "text/plain"));
+  }
+
+  @Test
+  void decodeBytesDefaultsToMachineDefaultWhenNullContentType() {
+    String text = "hello world";
+    byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
+    assertEquals(text, MultipartContentDecoder.decodeBytes(bytes, bytes.length, null));
+  }
+
+  @Test
+  void decodeBytesRespectsLengthParameter() {
+    byte[] bytes = "hello world".getBytes(StandardCharsets.UTF_8);
+    assertEquals("hello", MultipartContentDecoder.decodeBytes(bytes, 5, null));
+  }
+
+  @Test
+  void decodeBytesReturnsEmptyStringForZeroLength() {
+    assertEquals("", MultipartContentDecoder.decodeBytes(new byte[16], 0, null));
+  }
+
+  @Test
+  void decodeBytesReplacesMalformedBytesWithReplacementCharacterUsingDeclaredCharset() {
+    // 0xE9 (ISO-8859-1 'é') is not valid UTF-8; REPLACE substitutes U+FFFD
+    byte[] bytes = "café".getBytes(StandardCharsets.ISO_8859_1);
+    assertEquals(
+        "caf�",
+        MultipartContentDecoder.decodeBytes(bytes, bytes.length, "text/plain; charset=UTF-8"));
+  }
+
+  @Test
+  void decodeBytesHandlesTruncationAtMultibyteCharacterBoundary() {
+    // "€" encodes as 3 bytes in UTF-8: E2 82 AC
+    byte[] complete = "hello€".getBytes(StandardCharsets.UTF_8); // 8 bytes
+    // Pass only 6 bytes: "hello" + first byte of "€" (incomplete sequence)
+    String result = MultipartContentDecoder.decodeBytes(complete, 6, "text/plain; charset=UTF-8");
+    // Incomplete sequence → U+FFFD with declared charset, not fallback to JVM default
+    assertEquals("hello�", result);
+  }
+
+  @ParameterizedTest
+  @NullAndEmptySource
+  void extractCharsetReturnsNullForNullOrEmptyContentType(String contentType) {
+    assertNull(MultipartContentDecoder.extractCharset(contentType));
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"text/plain", "image/jpeg", "application/octet-stream"})
+  void extractCharsetReturnsNullForContentTypeWithoutCharset(String contentType) {
+    assertNull(MultipartContentDecoder.extractCharset(contentType));
+  }
+
+  @Test
+  void extractCharsetReturnsNullForInvalidCharsetName() {
+    assertNull(MultipartContentDecoder.extractCharset("text/plain; charset=NOTACHARSET"));
+  }
+
+  @ParameterizedTest
+  @ValueSource(
+      strings = {
+        "text/plain; CHARSET=UTF-8",
+        "text/plain; Charset=UTF-8",
+        "text/plain; charset=utf-8"
+      })
+  void extractCharsetIsCaseInsensitive(String contentType) {
+    assertEquals("UTF-8", MultipartContentDecoder.extractCharset(contentType).name());
+  }
+
+  @ParameterizedTest
+  @CsvSource({"text/plain; charset=UTF-8, UTF-8", "text/xml; charset=ISO-8859-1, ISO-8859-1"})
+  void extractCharsetFromStandardContentType(String contentType, String expectedCharset) {
+    assertEquals(expectedCharset, MultipartContentDecoder.extractCharset(contentType).name());
+  }
+
+  @Test
+  void extractCharsetIgnoresSubstringMatchInParameterName() {
+    // "xcharset=UTF-16" must not match; the real "charset=UTF-8" that follows must be used
+    assertEquals(
+        "UTF-8",
+        MultipartContentDecoder.extractCharset("text/plain; xcharset=UTF-16; charset=UTF-8")
+            .name());
+  }
+
+  @Test
+  void extractCharsetReturnsNullWhenOnlySubstringMatchExists() {
+    assertNull(MultipartContentDecoder.extractCharset("text/plain; xcharset=UTF-8"));
+  }
+
+  @Test
+  void extractCharsetHandlesAdditionalParameters() {
+    assertEquals(
+        "UTF-8",
+        MultipartContentDecoder.extractCharset("text/plain; charset=UTF-8; boundary=something")
+            .name());
+  }
+
+  @ParameterizedTest
+  @CsvSource({
+    "text/plain; charset=\"UTF-8\", UTF-8",
+    "text/xml; charset=\"ISO-8859-1\", ISO-8859-1"
+  })
+  void extractCharsetHandlesQuotedCharsetValue(String contentType, String expectedCharset) {
+    assertEquals(expectedCharset, MultipartContentDecoder.extractCharset(contentType).name());
+  }
+
+  @Test
+  void decodeBytesUsesQuotedDeclaredCharset() {
+    String text = "café";
+    byte[] bytes = text.getBytes(StandardCharsets.ISO_8859_1);
+    assertEquals(
+        text,
+        MultipartContentDecoder.decodeBytes(
+            bytes, bytes.length, "text/plain; charset=\"ISO-8859-1\""));
+  }
+}