Skip to content

Commit 99157ff

Browse files
committed
Add json.loads on bytes & surrogatepass
1 parent 1419c92 commit 99157ff

12 files changed

Lines changed: 206 additions & 12 deletions

File tree

aikido_zen/context/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,20 @@ def set_body_internal(self, body):
119119
# Make sure that empty bodies like b"" don't get sent.
120120
self.body = None
121121
if isinstance(self.body, bytes):
122-
self.body = self.body.decode("utf-8") # Decode byte input to string.
122+
# json.loads on bytes uses surrogatepass internally, so try it first.
123+
# This handles bodies with surrogate/invalid bytes that would otherwise
124+
# cause decode("utf-8") to raise and leave the JSON unparsed.
125+
try:
126+
parsed_body = json.loads(self.body)
127+
if parsed_body:
128+
self.body = parsed_body
129+
return
130+
except (JSONDecodeError, ValueError):
131+
pass
132+
# Use errors="replace" so invalid bytes become � instead of raising.
133+
# A strict decode would let attackers bypass detection by prepending a
134+
# single invalid byte to any payload.
135+
self.body = self.body.decode("utf-8", errors="replace")
123136
if not isinstance(self.body, str):
124137
return
125138
if self.body.strip()[0] in ["{", "[", '"']:

aikido_zen/context/init_test.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,10 @@ def test_set_normal_byte_string():
175175

176176

177177
def test_set_byte_string_wrong_encoding():
178-
body = "hello world! 😊".encode("utf-16") # UTF-16 unique character
178+
body = "hello world! 😊".encode("utf-16") # UTF-16 bytes are not valid UTF-8
179179
context = Context(req=basic_wsgi_req, body=body, source="flask")
180-
assert context.body == body # Body remains unchanged because utf-8 failed.
180+
# Invalid bytes are replaced with � so the body is still scannable.
181+
assert context.body == body.decode("utf-8", errors="replace")
181182

182183

183184
def test_set_none():
@@ -296,3 +297,23 @@ def test_set_protection_forced_off():
296297
assert context.protection_forced_off is False
297298
context.set_force_protection_off(None)
298299
assert context.protection_forced_off is None
300+
301+
302+
def test_set_bytes_with_invalid_utf8_prefix():
303+
# Regression: AIKIDO-5RDTZW1V — a single invalid UTF-8 byte (e.g. \xff) prepended
304+
# to a path traversal payload must not bypass detection. The body must be decoded
305+
# with errors="replace" so the traversal string remains visible to sinks.
306+
body = b"\xff/../../../../../etc/passwd"
307+
context = Context(req=basic_wsgi_req, body=body, source="flask")
308+
assert isinstance(context.body, str)
309+
assert "/../../../../../etc/passwd" in context.body
310+
311+
312+
def test_set_bytes_json_with_surrogate_bytes():
313+
# Regression: AIKIDO-B3YABOSP — surrogate bytes embedded in a JSON body must not
314+
# bypass detection. json.loads(bytes) uses surrogatepass internally, so the dict
315+
# is parsed and the attack payload (e.g. {"$regex": ".*"}) is visible.
316+
body = b'{"username": {"$regex": ".*"}, "bypass": "\xed\xa0\x80"}'
317+
context = Context(req=basic_wsgi_req, body=body, source="flask")
318+
assert isinstance(context.body, dict)
319+
assert context.body.get("username") == {"$regex": ".*"}

aikido_zen/helpers/path_to_string.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ def path_to_string(path):
1616
return path
1717

1818
if isinstance(path, bytes):
19-
try:
20-
return path.decode("utf-8")
21-
except UnicodeDecodeError:
22-
return None
19+
# Use errors="replace" so invalid bytes (e.g. \xff, surrogate sequences)
20+
# don't silently suppress path traversal detection — the replacement char
21+
# preserves the traversal components that follow.
22+
return path.decode("utf-8", errors="replace")
2323
if isinstance(path, PurePath):
2424
# Stringify PurePath. This can still allow path traversal but in extremely
2525
# limited cases so it's safe to just stringify for now.

aikido_zen/helpers/path_to_string_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ def test_path_to_string_with_valid_url():
1616
def test_path_to_string_with_bytes():
1717
assert path_to_string(b"test.txt") == "test.txt"
1818
assert path_to_string(b"/home/user/file.txt") == "/home/user/file.txt"
19-
assert path_to_string(b"\xff") is None # Invalid UTF-8 byte sequence
19+
# Invalid bytes are replaced with � so traversal components are preserved.
20+
assert path_to_string(b"\xff") == "�"
21+
assert path_to_string(b"\xff/../../../etc/passwd") == "�/../../../etc/passwd"
22+
# Surrogate bytes (AIKIDO-B3YABOSP pattern) also survive as replacement chars.
23+
assert path_to_string(b"\xed\xa0\x80/../etc/passwd") == "���/../etc/passwd"
2024

2125

2226
def test_path_to_string_with_empty_string():

aikido_zen/sources/flask/extract_form_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def extract_form_data_from_flask_request_and_save_data(req):
1010
if req.form:
1111
context.set_body(req.form)
1212
else:
13-
context.set_body(req.data.decode("utf-8"))
13+
context.set_body(req.data)
1414
context.set_as_current_context()
1515
except Exception as e:
1616
logger.debug("Exception occurred whilst extracting flask body data: %s", e)

aikido_zen/sources/quart.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async def _handle_request_before(func, instance, args, kwargs):
3232
context.set_body(form)
3333
else:
3434
data = await request.data
35-
context.set_body(data.decode("utf-8"))
35+
context.set_body(data)
3636
context.cookies = request.cookies.to_dict()
3737
context.set_as_current_context()
3838

aikido_zen/vulnerabilities/path_traversal/detect_path_traversal_test.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,16 @@ def test_path_normalization():
132132
# Combined slashes and dot: ///.///etc/passwd should normalize to /etc/passwd
133133
assert detect_path_traversal("///.///etc/passwd", "///.///etc") is True
134134
assert detect_path_traversal("///.///etc/passwd", "///.///etc/passwd") is True
135+
136+
137+
def test_replacement_char_prefix_does_not_hide_traversal():
138+
# Regression: AIKIDO-5RDTZW1V / AIKIDO-B3YABOSP — an attacker prepends
139+
# invalid UTF-8 bytes (\xff or surrogate sequences) to a traversal payload.
140+
# After decode("utf-8", errors="replace") both the stored body string and the
141+
# path_to_string() output start with the replacement character �, so the
142+
# user-input substring is still found in the file path and traversal is detected.
143+
replacement = "�"
144+
traversal = "/../../../../../etc/passwd"
145+
assert detect_path_traversal(replacement + traversal, replacement + traversal) is True
146+
# Three replacement chars (from \xed\xa0\x80, three separate bad bytes)
147+
assert detect_path_traversal(replacement * 3 + traversal, replacement * 3 + traversal) is True

end2end/django_mysql_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,45 @@ def test_initial_heartbeat():
111111
assert req_stats["rateLimited"] == 0
112112
assert req_stats["attacksDetected"] == {"blocked": 2, "total": 2}
113113
assert req_stats["attackWaves"] == {"total": 0, "blocked": 0}
114+
115+
116+
# --- AIKIDO-5RDTZW1V regression: invalid UTF-8 bytes must not bypass detection ---
117+
118+
def test_bypass_invalid_utf8_bytes_path_traversal():
119+
# An attacker prepends \xff (invalid UTF-8) to a path traversal payload.
120+
# Before the fix, decode("utf-8") raised UnicodeDecodeError and the body was
121+
# never stored, so the firewall saw nothing. After the fix the body is decoded
122+
# with errors="replace" and the traversal is still detected.
123+
body = b"\xff/../../../../../etc/passwd"
124+
res = requests.post(base_url_fw + "/read", data=body)
125+
assert res.status_code == 500
126+
127+
time.sleep(5)
128+
events = fetch_events_from_mock("http://localhost:5000")
129+
attacks = filter_on_event_type(events, "detected_attack")
130+
131+
assert len(attacks) == 3
132+
assert attacks[2]["attack"]["kind"] == "path_traversal"
133+
assert attacks[2]["attack"]["blocked"] is True
134+
assert attacks[2]["attack"]["source"] == "body"
135+
136+
137+
# --- AIKIDO-B3YABOSP regression: surrogate bytes in JSON must not bypass detection ---
138+
139+
def test_bypass_surrogate_bytes_sql_injection():
140+
# Surrogate bytes (\xed\xa0\x80) make decode("utf-8") raise, so the old code
141+
# never parsed the body as JSON and the SQL injection payload was invisible.
142+
# After the fix, json.loads(bytes) is tried first (it uses surrogatepass internally)
143+
# so the dict is extracted and the injection is caught when the cursor executes.
144+
body = b'{"dog_name": "Dangerous bobby\\", 1); -- ", "bypass": "\xed\xa0\x80"}'
145+
res = requests.post(base_url_fw + "/json-sql", data=body)
146+
assert res.status_code == 500
147+
148+
time.sleep(5)
149+
events = fetch_events_from_mock("http://localhost:5000")
150+
attacks = filter_on_event_type(events, "detected_attack")
151+
152+
assert len(attacks) == 4
153+
assert attacks[3]["attack"]["kind"] == "sql_injection"
154+
assert attacks[3]["attack"]["blocked"] is True
155+
assert attacks[3]["attack"]["source"] == "body"

end2end/flask_mongo_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,45 @@ def test_dangerous_auth_fw_force():
126126
'source': "body",
127127
'user': None
128128
}
129+
130+
131+
# --- AIKIDO-5RDTZW1V regression: invalid UTF-8 bytes must not bypass detection ---
132+
133+
def test_bypass_invalid_utf8_bytes_path_traversal():
134+
# An attacker prepends \xff (invalid UTF-8) to a path traversal payload.
135+
# Before the fix, decode("utf-8") raised UnicodeDecodeError and the body was
136+
# never stored, so the firewall saw nothing. After the fix the body is decoded
137+
# with errors="replace" and the traversal is still detected.
138+
body = b"\xff/../../../../../etc/passwd"
139+
res = requests.post("http://localhost:8094/read", data=body)
140+
assert res.status_code == 500
141+
142+
time.sleep(5)
143+
events = fetch_events_from_mock("http://localhost:5000")
144+
attacks = filter_on_event_type(events, "detected_attack")
145+
146+
assert len(attacks) == 3
147+
assert attacks[2]["attack"]["kind"] == "path_traversal"
148+
assert attacks[2]["attack"]["blocked"] is True
149+
assert attacks[2]["attack"]["source"] == "body"
150+
151+
152+
# --- AIKIDO-B3YABOSP regression: surrogate bytes in JSON must not bypass detection ---
153+
154+
def test_bypass_surrogate_bytes_nosql_injection():
155+
# Surrogate bytes (\xed\xa0\x80) make decode("utf-8") raise, so the old code
156+
# never parsed the JSON and the NoSQL injection payload {"$ne":""} was invisible.
157+
# After the fix, json.loads(bytes) is tried first (it uses surrogatepass internally)
158+
# so the dict body is fully parsed and the injection is caught.
159+
body = b'{"dog_name": "bobby_tables", "pswd": {"$ne": ""}, "bypass": "\xed\xa0\x80"}'
160+
res = requests.post("http://localhost:8094/auth-raw", data=body)
161+
assert res.status_code == 500
162+
163+
time.sleep(5)
164+
events = fetch_events_from_mock("http://localhost:5000")
165+
attacks = filter_on_event_type(events, "detected_attack")
166+
167+
assert len(attacks) == 4
168+
assert attacks[3]["attack"]["kind"] == "nosql_injection"
169+
assert attacks[3]["attack"]["blocked"] is True
170+
assert attacks[3]["attack"]["source"] == "body"

sample-apps/django-mysql/sample_app/urls.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,7 @@
66
path("", views.index, name="index"),
77
path("dogpage/<int:dog_id>", views.dog_page, name="dog_page"),
88
path("shell/<str:user_command>", views.shell_url, name="shell"),
9-
path("create", views.create_dogpage, name="create")
9+
path("create", views.create_dogpage, name="create"),
10+
path("read", views.read_file, name="read"),
11+
path("json-sql", views.json_sql, name="json_sql"),
1012
]

0 commit comments

Comments
 (0)