Skip to content

Commit 2f114e5

Browse files
committed
[argus] sandbox_audit: raise _MAX_COMMAND_LENGTH 10_000 → 131_072
The 10 000-char cap rejects legitimate heredocs that DeerFlow agents use to write small-to-medium files in one shot — e.g. a 20 KB self-contained HTML page produced by a research task. 128 KB is still four orders of magnitude below Linux ARG_MAX, and the cap remains an effective tripwire for base64 payload injection (which would otherwise produce megabytes-long lines). Updates the existing length tests to read the constant rather than hard-coding 10_001, and adds: - test_max_length_at_128k — pins the new value - test_20kb_heredoc_accepted — realistic positive case from the bug that motivated the change PR-candidate: yes Upstream-issue: none Reason: Easy win, well-scoped, with a believable use case in the test. Could be even more generally accepted if reframed as configurable, but a flat bump is the smallest defensible change.
1 parent 01f1863 commit 2f114e5

2 files changed

Lines changed: 25 additions & 10 deletions

File tree

backend/packages/harness/deerflow/agents/middlewares/sandbox_audit_middleware.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,13 @@ def _append_warn_to_result(self, result: ToolMessage | Command, command: str) ->
271271
# Input sanitisation
272272
# ------------------------------------------------------------------
273273

274-
# Normal bash commands rarely exceed a few hundred characters. 10 000 is
275-
# well above any legitimate use case yet a tiny fraction of Linux ARG_MAX.
276-
# Anything longer is almost certainly a payload injection or base64-encoded
277-
# attack string.
278-
_MAX_COMMAND_LENGTH = 10_000
274+
# Most bash commands are tiny, but DeerFlow agents legitimately route
275+
# heredocs through bash to write small-to-medium files in one shot —
276+
# e.g. a 20 KB self-contained HTML page. The previous 10 000-char cap
277+
# rejected those. 131 072 (128 KB) is still four orders of magnitude
278+
# below Linux ARG_MAX and remains an effective tripwire for base64
279+
# payload injection.
280+
_MAX_COMMAND_LENGTH = 131_072
279281

280282
def _validate_input(self, command: str) -> str | None:
281283
"""Return ``None`` if *command* is acceptable, else a rejection reason."""

backend/tests/test_sandbox_audit_middleware.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,26 @@ def test_normal_command_accepted(self):
276276
assert self.mw._validate_input("ls -la") is None
277277

278278
def test_command_at_max_length_accepted(self):
279-
cmd = "a" * 10_000
279+
cmd = "a" * self.mw._MAX_COMMAND_LENGTH
280280
assert self.mw._validate_input(cmd) is None
281281

282282
def test_command_exceeding_max_length_rejected(self):
283-
cmd = "a" * 10_001
283+
cmd = "a" * (self.mw._MAX_COMMAND_LENGTH + 1)
284284
assert self.mw._validate_input(cmd) == "command too long"
285285

286+
def test_max_length_at_128k(self):
287+
"""The cap is 128 KB — large enough to allow heredocs that write
288+
small-to-medium files (e.g. a 20 KB HTML page) without rejection,
289+
small enough to remain a tripwire for base64 payload injection."""
290+
assert self.mw._MAX_COMMAND_LENGTH == 131_072
291+
292+
def test_20kb_heredoc_accepted(self):
293+
"""Realistic case: an agent writes a 20 KB self-contained HTML file
294+
via a bash heredoc. This was rejected by the old 10 000 cap."""
295+
payload = "x" * 20_000
296+
heredoc = f"cat <<'EOF' > /tmp/page.html\n{payload}\nEOF"
297+
assert self.mw._validate_input(heredoc) is None
298+
286299
def test_null_byte_rejected(self):
287300
assert self.mw._validate_input("ls\x00; rm -rf /") == "null byte detected"
288301

@@ -318,7 +331,7 @@ def test_null_byte_command_blocked_with_reason(self):
318331
assert "null byte" in result.content.lower()
319332

320333
def test_oversized_command_blocked_with_reason(self):
321-
request = _make_request("a" * 10_001)
334+
request = _make_request("a" * (self.mw._MAX_COMMAND_LENGTH + 1))
322335
handler = _make_handler()
323336
result = self.mw.wrap_tool_call(request, handler)
324337
assert not handler.called
@@ -339,7 +352,7 @@ def test_none_command_coerced_to_empty(self):
339352

340353
def test_oversized_command_audit_log_truncated(self):
341354
"""Oversized commands should be truncated in audit logs to prevent log amplification."""
342-
big_cmd = "x" * 10_001
355+
big_cmd = "x" * (self.mw._MAX_COMMAND_LENGTH + 1)
343356
request = _make_request(big_cmd)
344357
handler = _make_handler()
345358
with unittest.mock.patch.object(self.mw, "_write_audit", wraps=self.mw._write_audit) as spy:
@@ -597,7 +610,7 @@ async def test_null_byte_command_blocked_with_reason(self):
597610

598611
@pytest.mark.anyio
599612
async def test_oversized_command_blocked_with_reason(self):
600-
request = _make_request("a" * 10_001)
613+
request = _make_request("a" * (SandboxAuditMiddleware._MAX_COMMAND_LENGTH + 1))
601614
result, called = await self._call_async(request)
602615
assert not called
603616
assert isinstance(result, ToolMessage)

0 commit comments

Comments
 (0)