diff --git a/aiohttp/client.py b/aiohttp/client.py index c3e874e650d..5668b401d2f 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -839,6 +839,19 @@ async def _connect_and_send_request( r_url = resp.headers.get(hdrs.LOCATION) or resp.headers.get( hdrs.URI ) + # If the header contains non-UTF-8 bytes (decoded as + # surrogates by multidict), fall back to latin-1 + # decoding per RFC 7230. + if r_url is not None and any( + 0xD800 <= ord(ch) <= 0xDFFF for ch in r_url + ): + for key, val in resp._raw_headers: + if key.lower() == b"location": + r_url = val.decode("latin-1") + break + elif key.lower() == b"uri": + r_url = val.decode("latin-1") + break if r_url is None: # see github.com/aio-libs/aiohttp/issues/2022 break diff --git a/tests/test_client_functional.py b/tests/test_client_functional.py index 8ee45330bb5..e0888698bdd 100644 --- a/tests/test_client_functional.py +++ b/tests/test_client_functional.py @@ -3142,6 +3142,47 @@ async def generate_redirecting_response(request: web.Request) -> web.Response: await client.get("/redirect") +async def test_redirect_with_non_utf8_location_header() -> None: + """Test that redirects with non-UTF-8 bytes in Location header work correctly. + + Regression test for https://github.com/aio-libs/aiohttp/issues/10047 + """ + server_received: list[bytes] = [] + + async def handler( + reader: asyncio.StreamReader, writer: asyncio.StreamWriter + ) -> None: + request_line = await reader.readline() + server_received.append(request_line) + if b"/redirect" in request_line: + writer.write(b"HTTP/1.1 301 Moved Permanently\r\n") + writer.write(b"Location: /synspr\xf8ve\r\n") + writer.write(b"Content-Length: 0\r\n") + writer.write(b"\r\n") + else: + writer.write(b"HTTP/1.1 200 OK\r\n") + writer.write(b"Content-Length: 7\r\n") + writer.write(b"\r\n") + writer.write(b"success") + await writer.drain() + writer.close() + + server = await asyncio.start_server(handler, "127.0.0.1", 0) + addr = server.sockets[0].getsockname() + url = URL(f"http://{addr[0]}:{addr[1]}/redirect") + + async with aiohttp.ClientSession() as session: + async with session.get(url, allow_redirects=True) as resp: + assert resp.status == 200 + body = await resp.read() + assert body == b"success" + assert "\xf8" not in str(resp.url) + assert "\udcf8" not in str(resp.url) + + server.close() + await server.wait_closed() + + @pytest.mark.parametrize( ("status", "expected_ok"), (