Skip to content

Commit fb86a0e

Browse files
committed
Enhance IPv6 zone ID detection logic in adapters.py
- Updated the `_has_ipv6_zone_id` function to more accurately distinguish between zone IDs and percent-encoded characters within brackets. - Improved regex patterns to handle both literal '%' and '%25' cases, ensuring compliance with RFC 6874. - Added additional test cases in `test_adapters.py` to cover false positives and edge cases for zone ID detection.
1 parent 19fb218 commit fb86a0e

2 files changed

Lines changed: 18 additions & 5 deletions

File tree

src/requests/adapters.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,17 @@ def _has_ipv6_zone_id(url: str) -> bool:
9090
:return: True if URL contains IPv6 zone ID
9191
:rtype: bool
9292
"""
93-
# Look for pattern: [<text>%<text>] indicating IPv6 with zone ID
94-
# The % can be URL-encoded as %25 or literal %
95-
# Match brackets containing a % anywhere in the URL
96-
# This handles both literal % and %25 encoding
97-
return bool(re.search(r"\[[^\]]*%(?:25)?[^\]]*\]", url))
93+
# Distinguish zone IDs from arbitrary percent-encoded characters inside brackets.
94+
#
95+
# A percent-encoded character is exactly %XX (two hex digits), e.g. %20 (space).
96+
# A zone ID uses % as a delimiter followed by a network interface name, e.g. %eth0.
97+
#
98+
# Two forms are detected:
99+
# - Literal %: must be followed by a letter (interface names like eth0, wlan0, lo
100+
# always start with a letter on Linux/macOS), ruling out %20, %2F, etc.
101+
# - RFC 6874 encoded %25: followed by any valid interface-name chars (the %25
102+
# prefix unambiguously signals a zone-ID delimiter, not arbitrary encoding).
103+
return bool(re.search(r"\[[^\]]*(?:%25[a-zA-Z0-9_.\-]+|%[a-zA-Z][a-zA-Z0-9_.\-]*)\]", url))
98104

99105

100106
def _urllib3_request_context(

tests/test_adapters.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ class TestIPv6ZoneIDDetection:
3434
("http://localhost/", False),
3535
("http://example.com/foo%20bar", False), # % in path, not zone ID
3636
("http://[::1]/path%20with%20percent", False), # % in path, not in host
37+
# False-positive guard: percent-encoded chars inside brackets are NOT zone IDs
38+
("http://[::1%20]/", False), # %20 = space encoding, not a zone ID
39+
("http://[::1%2F]/", False), # %2F = slash encoding, not a zone ID
40+
("http://[::1%2B]/", False), # %2B = plus encoding, not a zone ID
41+
("http://[::1%41]/", False), # %41 = 'A', two hex digits, not a zone ID
42+
("http://[fe80::1%20]:8080/", False), # %20 in host with port
43+
("http://[::1%25]/", False), # bare %25 with nothing after it is not a zone ID
3744
# Edge cases with multiple percent signs
3845
("http://[fe80::1%eth0]/path%20test", True), # Zone ID + path encoding
3946
("http://[fe80::1%25eth0]/path%20test", True), # %25 zone ID + path encoding

0 commit comments

Comments
 (0)