Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
045ea63
fix(appsec): use per-part charset for files_content in commons-fileup…
jandro996 Apr 27, 2026
733393c
fix(appsec): use CharsetDecoder with REPORT for charset fallback in F…
jandro996 Apr 27, 2026
4738eed
refactor(appsec): extract MultipartContentDecoder to internal-api for…
jandro996 Apr 27, 2026
21b3820
test(appsec): add missing corner cases to MultipartContentDecoderTest
jandro996 Apr 27, 2026
b7a1a33
test(appsec): trim redundant decoder tests from FileItemContentReader…
jandro996 Apr 27, 2026
e74778b
fix(appsec): use machine default charset as fallback in MultipartCont…
jandro996 Apr 28, 2026
55044ff
test(appsec): migrate MultipartContentDecoderTest from Groovy/Spock t…
jandro996 Apr 28, 2026
6f0a3ec
fix(appsec): strip surrounding quotes from charset parameter in Multi…
jandro996 Apr 28, 2026
beea424
fix(appsec): replace String#split with char loop in MultipartContentD…
jandro996 Apr 28, 2026
228e1c4
Merge branch 'master' into alejandro.gonzalez/APPSEC-61875-files-cont…
jandro996 Apr 28, 2026
aa1b0b6
refactor(appsec): avoid toLowerCase allocation in MultipartContentDec…
jandro996 Apr 28, 2026
fd0ed70
chore: add CODEOWNERS entry for internal-api/datadog/trace/api/http
jandro996 Apr 28, 2026
22536b1
Use REPLACE for malformed bytes so truncation preserves declared charset
jandro996 Apr 28, 2026
bbaf10e
Remove dead catch block from MultipartContentDecoder.decodeBytes
jandro996 Apr 28, 2026
5d471a6
Fix charset parameter name matching to require exact boundary
jandro996 Apr 28, 2026
33f9eb1
Restore required catch for checked CharacterCodingException
jandro996 Apr 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package datadog.trace.instrumentation.commons.fileupload;

import datadog.trace.api.http.MultipartContentDecoder;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.fileupload.FileItem;
Expand Down Expand Up @@ -35,7 +35,7 @@ public static String readContent(FileItem fileItem) {
&& (n = is.read(buf, total, MAX_CONTENT_BYTES - total)) != -1) {
total += n;
}
return new String(buf, 0, total, StandardCharsets.ISO_8859_1);
return MultipartContentDecoder.decodeBytes(buf, total, fileItem.getContentType());
} catch (IOException ignored) {
return "";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ class FileItemContentReaderTest extends Specification {
FileItemContentReader.readContent(item) == ''
}

void 'readContent uses Content-Type from file item for charset decoding'() {
given:
def text = 'héllo wörld'
def item = fileItemFromBytes(text.getBytes('UTF-8'), 'file.txt', 'text/plain; charset=UTF-8')

expect:
FileItemContentReader.readContent(item) == text
}

void 'readContents returns content for each non-form file with a name'() {
given:
def items = [fileItem('content-a', 'file-a.txt'), fileItem('content-b', 'file-b.txt'),]
Expand Down Expand Up @@ -101,10 +110,19 @@ class FileItemContentReaderTest extends Specification {
}

private FileItem fileItem(String content, String name) {
fileItem(content, name, null)
}

private FileItem fileItem(String content, String name, String contentType) {
fileItemFromBytes((content ?: '').getBytes('ISO-8859-1'), name, contentType)
}

private FileItem fileItemFromBytes(byte[] bytes, String name, String contentType) {
FileItem item = Stub(FileItem)
item.isFormField() >> false
item.getName() >> name
item.getInputStream() >> new ByteArrayInputStream((content ?: '').getBytes('ISO-8859-1'))
item.getContentType() >> contentType
item.getInputStream() >> new ByteArrayInputStream(bytes)
return item
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package datadog.trace.api.http;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Locale;

/** Decodes multipart file content bytes to String using the per-part Content-Type charset. */
public final class MultipartContentDecoder {

public static String decodeBytes(byte[] buf, int length, String contentType) {
Charset charset = extractCharset(contentType);
if (charset == null) charset = StandardCharsets.UTF_8;
Comment thread
jandro996 marked this conversation as resolved.
Outdated
try {
return charset
.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)
.decode(ByteBuffer.wrap(buf, 0, length))
Comment thread
jandro996 marked this conversation as resolved.
.toString();
} catch (CharacterCodingException e) {
return new String(buf, 0, length, StandardCharsets.ISO_8859_1);
}
}

public static Charset extractCharset(String contentType) {
if (contentType == null) return null;
int idx = contentType.toLowerCase(Locale.ROOT).indexOf("charset=");
if (idx < 0) return null;
String name = contentType.substring(idx + 8).split("[;, ]")[0].trim();
Comment thread
jandro996 marked this conversation as resolved.
Outdated
try {
return Charset.forName(name);
} catch (IllegalArgumentException e) {
return null;
}
}

private MultipartContentDecoder() {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package datadog.trace.api.http

import spock.lang.Specification

class MultipartContentDecoderTest extends Specification {

void 'decodeBytes uses declared UTF-8 charset'() {
given:
def text = 'héllo wörld'
byte[] bytes = text.getBytes('UTF-8')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, 'text/plain; charset=UTF-8') == text
}

void 'decodeBytes falls back to UTF-8 when Content-Type has no charset'() {
given:
def text = 'hello world'
byte[] bytes = text.getBytes('UTF-8')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, 'text/plain') == text
}

void 'decodeBytes falls back to UTF-8 when Content-Type is null'() {
given:
def text = 'hello world'
byte[] bytes = text.getBytes('UTF-8')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, null) == text
}

void 'decodeBytes falls back to ISO-8859-1 when bytes are invalid for declared charset'() {
given:
// 0xE9 is 'é' in ISO-8859-1 but an invalid lone UTF-8 byte
byte[] bytes = 'café'.getBytes('ISO-8859-1')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, null) == 'café'
}

void 'decodeBytes uses declared ISO-8859-1 charset'() {
given:
def text = 'café'
byte[] bytes = text.getBytes('ISO-8859-1')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, 'text/plain; charset=ISO-8859-1') == text
}

void 'decodeBytes respects length parameter'() {
given:
byte[] bytes = 'hello world'.getBytes('UTF-8')

expect:
MultipartContentDecoder.decodeBytes(bytes, 5, null) == 'hello'
}

void 'decodeBytes returns empty string for zero length'() {
expect:
MultipartContentDecoder.decodeBytes(new byte[16], 0, null) == ''
}

void 'decodeBytes falls back to ISO-8859-1 when declared charset cannot decode the bytes'() {
given:
// bytes are ISO-8859-1 encoded but Content-Type explicitly declares UTF-8
byte[] bytes = 'café'.getBytes('ISO-8859-1')

expect:
MultipartContentDecoder.decodeBytes(bytes, bytes.length, 'text/plain; charset=UTF-8') == 'café'
}

void 'extractCharset returns null for null contentType'() {
expect:
MultipartContentDecoder.extractCharset(null) == null
}

void 'extractCharset returns null for empty contentType'() {
expect:
MultipartContentDecoder.extractCharset('') == null
}

void 'extractCharset returns null for contentType without charset'() {
expect:
MultipartContentDecoder.extractCharset('text/plain') == null
MultipartContentDecoder.extractCharset('image/jpeg') == null
MultipartContentDecoder.extractCharset('application/octet-stream') == null
}

void 'extractCharset returns null for invalid charset name'() {
expect:
MultipartContentDecoder.extractCharset('text/plain; charset=NOTACHARSET') == null
}

void 'extractCharset extracts charset case-insensitively'() {
expect:
MultipartContentDecoder.extractCharset('text/plain; CHARSET=UTF-8').name() == 'UTF-8'
MultipartContentDecoder.extractCharset('text/plain; Charset=UTF-8').name() == 'UTF-8'
MultipartContentDecoder.extractCharset('text/plain; charset=utf-8').name() == 'UTF-8'
}

void 'extractCharset extracts charset from standard Content-Type'() {
expect:
MultipartContentDecoder.extractCharset('text/plain; charset=UTF-8').name() == 'UTF-8'
MultipartContentDecoder.extractCharset('text/xml; charset=ISO-8859-1').name() == 'ISO-8859-1'
}

void 'extractCharset extracts charset when followed by additional parameters'() {
expect:
MultipartContentDecoder.extractCharset('text/plain; charset=UTF-8; boundary=something').name() == 'UTF-8'
}
}
Loading