Skip to content

Commit 9aada2e

Browse files
test: add regression test for SSE response UTF-8 encoding
Add tests for response body encoding in SSE and route.fetch consistency.
1 parent 3aecfcf commit 9aada2e

File tree

1 file changed

+154
-0
lines changed

1 file changed

+154
-0
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Copyright (c) Microsoft Corporation.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import pytest
16+
17+
from playwright.async_api import Page
18+
from tests.server import Server, TestServerRequest
19+
20+
21+
async def test_response_body_should_return_correct_utf8_bytes_for_sse_stream(
22+
page: Page, server: Server
23+
) -> None:
24+
"""
25+
Test that response.body() returns correct UTF-8 bytes for SSE streaming responses.
26+
27+
Regression test for: https://github.com/microsoft/playwright-python/issues/3023
28+
29+
The issue: response.body() was returning double-encoded UTF-8 bytes (mojibake)
30+
for SSE streams, caused by CDP Network.getResponseBody returning a string
31+
instead of bytes.
32+
"""
33+
34+
# Test data with UTF-8 characters (Chinese + emoji)
35+
test_messages = [
36+
"你好,这是第一条消息", # Chinese: "Hello, this is the first message"
37+
"测试中文:😀🎉", # Chinese + emoji
38+
]
39+
40+
# Build SSE response body
41+
sse_body = ""
42+
for msg in test_messages:
43+
sse_body += f"data: {msg}\n\n"
44+
45+
expected_bytes = sse_body.encode("utf-8")
46+
47+
# Set up SSE endpoint
48+
def handle_sse(request: TestServerRequest) -> None:
49+
request.setHeader("Content-Type", "text/event-stream; charset=utf-8")
50+
request.setHeader("Cache-Control", "no-cache")
51+
request.write(sse_body.encode("utf-8"))
52+
request.finish()
53+
54+
server.set_route("/sse", handle_sse)
55+
56+
# Collect response body via page.on("response")
57+
response_body_bytes = None
58+
59+
async def on_response(response):
60+
nonlocal response_body_bytes
61+
if "/sse" in response.url:
62+
response_body_bytes = await response.body()
63+
64+
page.on("response", on_response)
65+
66+
# Trigger the SSE request
67+
await page.goto(server.PREFIX + "/sse")
68+
69+
# Wait for response to be captured
70+
await page.wait_for_timeout(500)
71+
72+
# Assertion 1: response.body() should return correct UTF-8 bytes
73+
assert response_body_bytes is not None, "SSE response was not captured"
74+
75+
# Assertion 2: Bytes should NOT be double-encoded (mojibake check)
76+
# Mojibake pattern: UTF-8 -> Latin-1 decode -> UTF-8 encode
77+
# For "你好" (correct: \xe4\xbd\xa0), mojibake would be \xc3\xa4\xc2\xbd\xc2\xa0
78+
assert response_body_bytes == expected_bytes, (
79+
f"Response body has incorrect encoding.\n"
80+
f"Expected: {expected_bytes!r}\n"
81+
f"Got: {response_body_bytes!r}\n"
82+
f"This indicates double-encoding (mojibake)."
83+
)
84+
85+
# Assertion 3: Decoded text should match original messages
86+
decoded_text = response_body_bytes.decode("utf-8")
87+
for msg in test_messages:
88+
assert msg in decoded_text, f"Expected message '{msg}' not found in response"
89+
90+
91+
async def test_response_body_utf8_vs_route_fetch_consistency(
92+
page: Page, server: Server
93+
) -> None:
94+
"""
95+
Verify that response.body() and route.fetch() return consistent UTF-8 bytes.
96+
97+
Regression test for: https://github.com/microsoft/playwright-python/issues/3023
98+
99+
Before fix: route.fetch() returned correct bytes, but response.body() returned mojibake.
100+
After fix: both should return identical bytes.
101+
"""
102+
103+
# Test data with UTF-8 characters
104+
test_content = "测试内容:Hello 世界! 🌍"
105+
106+
def handle_endpoint(request: TestServerRequest) -> None:
107+
request.setHeader("Content-Type", "text/plain; charset=utf-8")
108+
request.write(test_content.encode("utf-8"))
109+
request.finish()
110+
111+
server.set_route("/test", handle_endpoint)
112+
113+
# Method 1: Capture via route.fetch()
114+
route_fetch_bytes = None
115+
116+
async def handle_route(route):
117+
nonlocal route_fetch_bytes
118+
response = await route.fetch()
119+
route_fetch_bytes = await response.body()
120+
await route.fulfill(response=response)
121+
122+
await page.route("**/test", handle_route)
123+
124+
# Method 2: Capture via page.on("response")
125+
response_body_bytes = None
126+
127+
async def on_response(response):
128+
nonlocal response_body_bytes
129+
if "/test" in response.url:
130+
response_body_bytes = await response.body()
131+
132+
page.on("response", on_response)
133+
134+
# Trigger the request
135+
await page.goto(server.PREFIX + "/test")
136+
137+
# Wait for both captures
138+
await page.wait_for_timeout(500)
139+
140+
# Assertion 1: Both methods should capture the response
141+
assert route_fetch_bytes is not None, "route.fetch() did not capture response"
142+
assert response_body_bytes is not None, "response.body() did not capture response"
143+
144+
# Assertion 2: Both should return identical bytes
145+
assert route_fetch_bytes == response_body_bytes, (
146+
f"Inconsistent encoding detected!\n"
147+
f"route.fetch(): {route_fetch_bytes!r}\n"
148+
f"response.body(): {response_body_bytes!r}\n"
149+
f"These should be identical."
150+
)
151+
152+
# Assertion 3: Content should decode correctly
153+
decoded = response_body_bytes.decode("utf-8")
154+
assert test_content in decoded, f"Expected content not found in response"

0 commit comments

Comments
 (0)