77"""
88
99import os .path
10+ import re
1011import socket # noqa: F401
1112import typing
1213import warnings
@@ -73,6 +74,45 @@ def SOCKSProxyManager(*args, **kwargs):
7374DEFAULT_RETRIES = 0
7475DEFAULT_POOL_TIMEOUT = None
7576
77+ # Anchored to the authority section of the URL (between "://" and the first
78+ # "/", "?", or "#") so that brackets in the path or query string cannot
79+ # produce false positives.
80+ #
81+ # Inside the brackets two forms are detected:
82+ # - RFC 6874 encoded %25: the delimiter is %25 followed by one or more
83+ # ZoneID characters. Per RFC 6874 the ZoneID unreserved chars are
84+ # [A-Za-z0-9_.\-~] plus percent-encoded octets (%[0-9A-Fa-f]{2}), so
85+ # names like "Ethernet%203" (space encoded as %20) or names containing
86+ # tildes are matched correctly.
87+ # - Literal %: a negative lookahead (?![0-9A-Fa-f]{2}) rejects valid
88+ # percent-encoded bytes whose first hex digit happens to be a letter
89+ # (e.g. %AB, %aF, %CD). After that guard, one alphanumeric character
90+ # is required (covering both named interfaces like eth0 and numeric
91+ # zone indices like 1 or 3), followed by zero or more identifier chars.
92+ _IPV6_ZONE_ID_RE = re .compile (
93+ r"://[^/?#]*\[[^\]]*"
94+ r"(?:%25(?:[a-zA-Z0-9_.\-~]|%[0-9A-Fa-f]{2})+"
95+ r"|%(?![0-9A-Fa-f]{2})[0-9A-Za-z][A-Za-z0-9_.\-]*)\]"
96+ )
97+
98+
99+ def _has_ipv6_zone_id (url : str ) -> bool :
100+ """
101+ Detect if URL contains IPv6 zone identifier (scope ID).
102+
103+ IPv6 zone IDs use % character within brackets, e.g.:
104+ http://[fe80::1%eth0]:8080/
105+
106+ This is used to determine whether to use urllib3's parse_url()
107+ (which handles zone IDs correctly) or urlparse() for backward
108+ compatibility.
109+
110+ :param url: URL string to check
111+ :return: True if URL contains IPv6 zone ID
112+ :rtype: bool
113+ """
114+ return bool (_IPV6_ZONE_ID_RE .search (url ))
115+
76116
77117def _urllib3_request_context (
78118 request : "PreparedRequest" ,
@@ -82,9 +122,21 @@ def _urllib3_request_context(
82122) -> "(dict[str, typing.Any], dict[str, typing.Any])" :
83123 host_params = {}
84124 pool_kwargs = {}
85- parsed_request_url = urlparse (request .url )
86- scheme = parsed_request_url .scheme .lower ()
87- port = parsed_request_url .port
125+
126+ # Use urllib3's parse_url for IPv6 zone IDs, urlparse otherwise
127+ if _has_ipv6_zone_id (request .url ):
128+ parsed_request_url = parse_url (request .url )
129+ scheme = parsed_request_url .scheme .lower ()
130+ port = parsed_request_url .port
131+ # parse_url uses .host and includes brackets for IPv6, strip them
132+ hostname = parsed_request_url .host
133+ if hostname and hostname .startswith ("[" ) and hostname .endswith ("]" ):
134+ hostname = hostname [1 :- 1 ]
135+ else :
136+ parsed_request_url = urlparse (request .url )
137+ scheme = parsed_request_url .scheme .lower ()
138+ port = parsed_request_url .port
139+ hostname = parsed_request_url .hostname # urlparse uses .hostname
88140
89141 cert_reqs = "CERT_REQUIRED"
90142 if verify is False :
@@ -105,7 +157,7 @@ def _urllib3_request_context(
105157 pool_kwargs ["cert_file" ] = client_cert
106158 host_params = {
107159 "scheme" : scheme ,
108- "host" : parsed_request_url . hostname ,
160+ "host" : hostname ,
109161 "port" : port ,
110162 }
111163 return host_params , pool_kwargs
@@ -536,7 +588,10 @@ def request_url(self, request, proxies):
536588 :rtype: str
537589 """
538590 proxy = select_proxy (request .url , proxies )
539- scheme = urlparse (request .url ).scheme
591+ if _has_ipv6_zone_id (request .url ):
592+ scheme = parse_url (request .url ).scheme
593+ else :
594+ scheme = urlparse (request .url ).scheme
540595
541596 is_proxied_http_request = proxy and scheme != "https"
542597 using_socks_proxy = False
0 commit comments