Skip to content

Commit 53af2b0

Browse files
jandro996devflow.devflow-routing-intake
andauthored
feat(appsec): expose uploaded file content as new WAF address server.request.body.files_content for commons-fileupload (#11137)
## What Does This Do - Introduces the `server.request.body.files_content` address and `requestFilesContent()` event in the gateway API, wired through `GatewayBridge` and`InstrumentationGateway` - Extends `ServletFileUpload.parseRequest()` instrumentation (commons-fileupload) to read up to 4096 bytes of each uploaded file's content and fire the new WAF callback; blocks with a `BlockingException` on `RequestBlockingAction`; content event is skipped when the filenames event has already blocked the request ## Additional Info - Content is capped at 4096 bytes per file to keep memory usage bounded - Number of files is capped at 25 files - This PR covers the gateway wiring and the commons-fileupload entry point. Coverage for other multipart stacks (Tomcat `request.getParts()`, Jetty, Liberty) will follow in successive PRs # Contributor Checklist - Format the title according to [the contribution guidelines](https://github.com/DataDog/dd-trace-java/blob/master/CONTRIBUTING.md#title-format) - Assign the `type:` and (`comp:` or `inst:`) labels in addition to [any other useful labels](https://github.com/DataDog/dd-trace-java/blob/master/CONTRIBUTING.md#labels) - Avoid using `close`, `fix`, or [any linking keywords](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword) when referencing an issue Use `solves` instead, and assign the PR [milestone](https://github.com/DataDog/dd-trace-java/milestones) to the issue - Update the [CODEOWNERS](https://github.com/DataDog/dd-trace-java/blob/master/.github/CODEOWNERS) file on source file addition, migration, or deletion - Update [public documentation](https://docs.datadoghq.com/tracing/trace_collection/library_config/java/) with any new configuration flags or behaviors Jira ticket: [APPSEC-61875] ***Note:*** **Once your PR is ready to merge, add it to the merge queue by commenting `/merge`.** `/merge -c` cancels the queue request. `/merge -f --reason "reason"` skips all merge queue checks; please use this judiciously, as some checks do not run at the PR-level. For more information, see [this doc](https://datadoghq.atlassian.net/wiki/spaces/DEVX/pages/3121612126/MergeQueue). <!-- # Opening vs Drafting a PR: When opening a pull request, please open it as a draft to not auto assign reviewers before you feel the pull request is in a reviewable state. # Linking a JIRA ticket: Please link your JIRA ticket by adding its identifier between brackets (ex [PROJ-IDENT]) in the PR description, not the title. This requirement only applies to Datadog employees. --> [APPSEC-61875]: https://datadoghq.atlassian.net/browse/APPSEC-61875?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ Co-authored-by: devflow.devflow-routing-intake <devflow.devflow-routing-intake@kubernetes.us1.ddbuild.io>
1 parent eb30f2b commit 53af2b0

12 files changed

Lines changed: 364 additions & 13 deletions

File tree

dd-java-agent/appsec/src/main/java/com/datadog/appsec/event/data/KnownAddresses.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@ public interface KnownAddresses {
7474
Address<Long> REQUEST_COMBINED_FILE_SIZE =
7575
new Address<>("server.request.body.combined_file_size");
7676

77+
/**
78+
* Contains the content of each uploaded file in a multipart/form-data request. Content is
79+
* truncated per file and only the first files are inspected. Available only on inspected
80+
* multipart/form-data requests.
81+
*/
82+
Address<List<String>> REQUEST_FILES_CONTENT = new Address<>("server.request.body.files_content");
83+
7784
/**
7885
* The parsed query string.
7986
*
@@ -205,6 +212,8 @@ static Address<?> forName(String name) {
205212
return REQUEST_FILES_FILENAMES;
206213
case "server.request.body.combined_file_size":
207214
return REQUEST_COMBINED_FILE_SIZE;
215+
case "server.request.body.files_content":
216+
return REQUEST_FILES_CONTENT;
208217
case "server.request.query":
209218
return REQUEST_QUERY;
210219
case "server.request.headers.no_cookies":

dd-java-agent/appsec/src/main/java/com/datadog/appsec/gateway/GatewayBridge.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ public class GatewayBridge {
131131
private volatile DataSubscriberInfo execCmdSubInfo;
132132
private volatile DataSubscriberInfo shellCmdSubInfo;
133133
private volatile DataSubscriberInfo requestFilesFilenamesSubInfo;
134+
private volatile DataSubscriberInfo requestFilesContentSubInfo;
134135

135136
public GatewayBridge(
136137
SubscriptionService subscriptionService,
@@ -208,6 +209,10 @@ public void init() {
208209
subscriptionService.registerCallback(
209210
EVENTS.requestFilesFilenames(), this::onRequestFilesFilenames);
210211
}
212+
if (additionalIGEvents.contains(EVENTS.requestFilesContent())) {
213+
subscriptionService.registerCallback(
214+
EVENTS.requestFilesContent(), this::onRequestFilesContent);
215+
}
211216
}
212217

213218
/**
@@ -235,6 +240,7 @@ public void reset() {
235240
execCmdSubInfo = null;
236241
shellCmdSubInfo = null;
237242
requestFilesFilenamesSubInfo = null;
243+
requestFilesContentSubInfo = null;
238244
}
239245

240246
private Flow<Void> onUser(final RequestContext ctx_, final String user) {
@@ -605,6 +611,31 @@ private Flow<Void> onRequestFilesFilenames(RequestContext ctx_, List<String> fil
605611
}
606612
}
607613

614+
private Flow<Void> onRequestFilesContent(RequestContext ctx_, List<String> filesContent) {
615+
AppSecRequestContext ctx = ctx_.getData(RequestContextSlot.APPSEC);
616+
if (ctx == null || filesContent == null || filesContent.isEmpty()) {
617+
return NoopFlow.INSTANCE;
618+
}
619+
while (true) {
620+
DataSubscriberInfo subInfo = requestFilesContentSubInfo;
621+
if (subInfo == null) {
622+
subInfo = producerService.getDataSubscribers(KnownAddresses.REQUEST_FILES_CONTENT);
623+
requestFilesContentSubInfo = subInfo;
624+
}
625+
if (subInfo == null || subInfo.isEmpty()) {
626+
return NoopFlow.INSTANCE;
627+
}
628+
DataBundle bundle =
629+
new SingletonDataBundle<>(KnownAddresses.REQUEST_FILES_CONTENT, filesContent);
630+
try {
631+
GatewayContext gwCtx = new GatewayContext(false);
632+
return producerService.publishDataEvent(subInfo, ctx, bundle, gwCtx);
633+
} catch (ExpiredSubscriberInfoException e) {
634+
requestFilesContentSubInfo = null;
635+
}
636+
}
637+
}
638+
608639
private Flow<Void> onDatabaseSqlQuery(RequestContext ctx_, String sql) {
609640
AppSecRequestContext ctx = ctx_.getData(RequestContextSlot.APPSEC);
610641
if (ctx == null) {
@@ -1464,6 +1495,7 @@ private static class IGAppSecEventDependencies {
14641495
DATA_DEPENDENCIES.put(KnownAddresses.REQUEST_BODY_OBJECT, l(EVENTS.requestBodyProcessed()));
14651496
DATA_DEPENDENCIES.put(
14661497
KnownAddresses.REQUEST_FILES_FILENAMES, l(EVENTS.requestFilesFilenames()));
1498+
DATA_DEPENDENCIES.put(KnownAddresses.REQUEST_FILES_CONTENT, l(EVENTS.requestFilesContent()));
14671499
}
14681500

14691501
private static Collection<datadog.trace.api.gateway.EventType<?>> l(

dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/event/data/KnownAddressesSpecificationForkedTest.groovy

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class KnownAddressesSpecificationForkedTest extends Specification {
2626
'server.request.body.files_field_names',
2727
'server.request.body.filenames',
2828
'server.request.body.combined_file_size',
29+
'server.request.body.files_content',
2930
'server.request.query',
3031
'server.request.headers.no_cookies',
3132
'grpc.server.method',
@@ -58,7 +59,7 @@ class KnownAddressesSpecificationForkedTest extends Specification {
5859

5960
void 'number of known addresses is expected number'() {
6061
expect:
61-
Address.instanceCount() == 46
62+
Address.instanceCount() == 47
6263
KnownAddresses.WAF_CONTEXT_PROCESSOR.serial == Address.instanceCount() - 1
6364
}
6465
}

dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/GatewayBridgeIGRegistrationSpecification.groovy

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,15 @@ class GatewayBridgeIGRegistrationSpecification extends DDSpecification {
3434
then:
3535
1 * ig.registerCallback(Events.REQUEST_BODY_DONE, _)
3636
}
37+
38+
void 'requestFilesContent is registered via data address'() {
39+
given:
40+
1 * eventDispatcher.allSubscribedDataAddresses() >> [KnownAddresses.REQUEST_FILES_CONTENT]
41+
42+
when:
43+
bridge.init()
44+
45+
then:
46+
1 * ig.registerCallback(Events.get().requestFilesContent(), _)
47+
}
3748
}

dd-java-agent/appsec/src/test/groovy/com/datadog/appsec/gateway/GatewayBridgeSpecification.groovy

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ class GatewayBridgeSpecification extends DDSpecification {
123123
BiFunction<RequestContext, String, Flow<Void>> fileLoadedCB
124124
BiFunction<RequestContext, String, Flow<Void>> fileWrittenCB
125125
BiFunction<RequestContext, List<String>, Flow<Void>> requestFilesFilenamesCB
126+
BiFunction<RequestContext, List<String>, Flow<Void>> requestFilesContentCB
126127
BiFunction<RequestContext, String, Flow<Void>> requestSessionCB
127128
BiFunction<RequestContext, String[], Flow<Void>> execCmdCB
128129
BiFunction<RequestContext, String, Flow<Void>> shellCmdCB
@@ -463,7 +464,7 @@ class GatewayBridgeSpecification extends DDSpecification {
463464

464465
void callInitAndCaptureCBs() {
465466
// force all callbacks to be registered
466-
_ * eventDispatcher.allSubscribedDataAddresses() >> [KnownAddresses.REQUEST_PATH_PARAMS, KnownAddresses.REQUEST_BODY_OBJECT, KnownAddresses.REQUEST_FILES_FILENAMES]
467+
_ * eventDispatcher.allSubscribedDataAddresses() >> [KnownAddresses.REQUEST_PATH_PARAMS, KnownAddresses.REQUEST_BODY_OBJECT, KnownAddresses.REQUEST_FILES_FILENAMES, KnownAddresses.REQUEST_FILES_CONTENT]
467468

468469
1 * ig.registerCallback(EVENTS.requestStarted(), _) >> {
469470
requestStartedCB = it[1]; null
@@ -561,6 +562,9 @@ class GatewayBridgeSpecification extends DDSpecification {
561562
1 * ig.registerCallback(EVENTS.requestFilesFilenames(), _) >> {
562563
requestFilesFilenamesCB = it[1]; null
563564
}
565+
1 * ig.registerCallback(EVENTS.requestFilesContent(), _) >> {
566+
requestFilesContentCB = it[1]; null
567+
}
564568
0 * ig.registerCallback(_, _)
565569

566570
bridge.init()
@@ -1142,6 +1146,38 @@ class GatewayBridgeSpecification extends DDSpecification {
11421146
0 * eventDispatcher.publishDataEvent(*_)
11431147
}
11441148

1149+
void 'process request files content'() {
1150+
setup:
1151+
final filesContent = ['%PDF-1.4 malicious content', '#!/bin/bash\nrm -rf /']
1152+
eventDispatcher.getDataSubscribers({
1153+
KnownAddresses.REQUEST_FILES_CONTENT in it
1154+
}) >> nonEmptyDsInfo
1155+
DataBundle bundle
1156+
GatewayContext gatewayContext
1157+
1158+
when:
1159+
Flow<?> flow = requestFilesContentCB.apply(ctx, filesContent)
1160+
1161+
then:
1162+
1 * eventDispatcher.publishDataEvent(nonEmptyDsInfo, ctx.data, _ as DataBundle, _ as GatewayContext) >> {
1163+
a, b, db, gw -> bundle = db; gatewayContext = gw; NoopFlow.INSTANCE
1164+
}
1165+
bundle.get(KnownAddresses.REQUEST_FILES_CONTENT) == filesContent
1166+
flow.result == null
1167+
flow.action == Flow.Action.Noop.INSTANCE
1168+
gatewayContext.isTransient == false
1169+
gatewayContext.isRasp == false
1170+
}
1171+
1172+
void 'process request files content with empty list returns noop'() {
1173+
when:
1174+
Flow<?> flow = requestFilesContentCB.apply(ctx, [])
1175+
1176+
then:
1177+
flow == NoopFlow.INSTANCE
1178+
0 * eventDispatcher.publishDataEvent(*_)
1179+
}
1180+
11451181
void 'process exec cmd'() {
11461182
setup:
11471183
final cmd = ['/bin/../usr/bin/reboot', '-f'] as String[]

dd-java-agent/instrumentation/commons-fileupload-1.5/src/main/java/datadog/trace/instrumentation/commons/fileupload/CommonsFileUploadAppSecModule.java renamed to dd-java-agent/instrumentation/commons-fileupload-1.5/src/main/java/datadog/trace/instrumentation/commons/fileupload/CommonsFileUploadAppSecInstrumentation.java

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
import org.apache.commons.fileupload.FileItem;
2525

2626
@AutoService(InstrumenterModule.class)
27-
public class CommonsFileUploadAppSecModule extends InstrumenterModule.AppSec
27+
public class CommonsFileUploadAppSecInstrumentation extends InstrumenterModule.AppSec
2828
implements Instrumenter.ForSingleType, Instrumenter.HasMethodAdvice {
2929

30-
public CommonsFileUploadAppSecModule() {
30+
public CommonsFileUploadAppSecInstrumentation() {
3131
super("commons-fileupload");
3232
}
3333

@@ -36,6 +36,13 @@ public String instrumentedType() {
3636
return "org.apache.commons.fileupload.servlet.ServletFileUpload";
3737
}
3838

39+
@Override
40+
public String[] helperClassNames() {
41+
return new String[] {
42+
"datadog.trace.instrumentation.commons.fileupload.FileItemContentReader",
43+
};
44+
}
45+
3946
@Override
4047
public void methodAdvice(MethodTransformer transformer) {
4148
transformer.applyAdvice(
@@ -47,6 +54,7 @@ public void methodAdvice(MethodTransformer transformer) {
4754

4855
@RequiresRequestContext(RequestContextSlot.APPSEC)
4956
public static class ParseRequestAdvice {
57+
5058
@Advice.OnMethodExit(suppress = Throwable.class, onThrowable = Throwable.class)
5159
static void after(
5260
@Advice.Return final List<FileItem> fileItems,
@@ -57,9 +65,11 @@ static void after(
5765
}
5866

5967
CallbackProvider cbp = AgentTracer.get().getCallbackProvider(RequestContextSlot.APPSEC);
60-
BiFunction<RequestContext, List<String>, Flow<Void>> callback =
68+
BiFunction<RequestContext, List<String>, Flow<Void>> filenamesCallback =
6169
cbp.getCallback(EVENTS.requestFilesFilenames());
62-
if (callback == null) {
70+
BiFunction<RequestContext, List<String>, Flow<Void>> contentCallback =
71+
cbp.getCallback(EVENTS.requestFilesContent());
72+
if (filenamesCallback == null && contentCallback == null) {
6373
return;
6474
}
6575

@@ -73,18 +83,42 @@ static void after(
7383
filenames.add(name);
7484
}
7585
}
76-
if (filenames.isEmpty()) {
86+
if (filenames.isEmpty() && contentCallback == null) {
7787
return;
7888
}
7989

80-
Flow<Void> flow = callback.apply(reqCtx, filenames);
81-
Flow.Action action = flow.getAction();
82-
if (action instanceof Flow.Action.RequestBlockingAction) {
83-
Flow.Action.RequestBlockingAction rba = (Flow.Action.RequestBlockingAction) action;
90+
// Fire filenames event
91+
if (filenamesCallback != null && !filenames.isEmpty()) {
92+
Flow<Void> flow = filenamesCallback.apply(reqCtx, filenames);
93+
Flow.Action action = flow.getAction();
94+
if (action instanceof Flow.Action.RequestBlockingAction) {
95+
Flow.Action.RequestBlockingAction rba = (Flow.Action.RequestBlockingAction) action;
96+
BlockResponseFunction brf = reqCtx.getBlockResponseFunction();
97+
if (brf != null) {
98+
brf.tryCommitBlockingResponse(reqCtx.getTraceSegment(), rba);
99+
t = new BlockingException("Blocked request (multipart file upload)");
100+
reqCtx.getTraceSegment().effectivelyBlocked();
101+
return;
102+
}
103+
}
104+
}
105+
106+
// Fire content event only if not blocked
107+
if (contentCallback == null) {
108+
return;
109+
}
110+
List<String> filesContent = FileItemContentReader.readContents(fileItems);
111+
if (filesContent.isEmpty()) {
112+
return;
113+
}
114+
Flow<Void> contentFlow = contentCallback.apply(reqCtx, filesContent);
115+
Flow.Action contentAction = contentFlow.getAction();
116+
if (contentAction instanceof Flow.Action.RequestBlockingAction) {
117+
Flow.Action.RequestBlockingAction rba = (Flow.Action.RequestBlockingAction) contentAction;
84118
BlockResponseFunction brf = reqCtx.getBlockResponseFunction();
85119
if (brf != null) {
86120
brf.tryCommitBlockingResponse(reqCtx.getTraceSegment(), rba);
87-
t = new BlockingException("Blocked request (multipart file upload)");
121+
t = new BlockingException("Blocked request (multipart file upload content)");
88122
reqCtx.getTraceSegment().effectivelyBlocked();
89123
}
90124
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package datadog.trace.instrumentation.commons.fileupload;
2+
3+
import java.io.IOException;
4+
import java.io.InputStream;
5+
import java.nio.charset.StandardCharsets;
6+
import java.util.ArrayList;
7+
import java.util.List;
8+
import org.apache.commons.fileupload.FileItem;
9+
10+
/** Reads uploaded file content for WAF inspection. */
11+
public final class FileItemContentReader {
12+
public static final int MAX_CONTENT_BYTES = 4096;
13+
public static final int MAX_FILES_TO_INSPECT = 25;
14+
15+
public static List<String> readContents(List<FileItem> fileItems) {
16+
List<String> result = new ArrayList<>();
17+
for (FileItem fileItem : fileItems) {
18+
if (result.size() >= MAX_FILES_TO_INSPECT) {
19+
break;
20+
}
21+
if (fileItem.isFormField()) {
22+
continue;
23+
}
24+
result.add(readContent(fileItem));
25+
}
26+
return result;
27+
}
28+
29+
public static String readContent(FileItem fileItem) {
30+
try (InputStream is = fileItem.getInputStream()) {
31+
byte[] buf = new byte[MAX_CONTENT_BYTES];
32+
int total = 0;
33+
int n;
34+
while (total < MAX_CONTENT_BYTES
35+
&& (n = is.read(buf, total, MAX_CONTENT_BYTES - total)) != -1) {
36+
total += n;
37+
}
38+
return new String(buf, 0, total, StandardCharsets.ISO_8859_1);
39+
} catch (IOException ignored) {
40+
return "";
41+
}
42+
}
43+
44+
private FileItemContentReader() {}
45+
}

0 commit comments

Comments
 (0)