@@ -339,12 +339,81 @@ def _prepare(self, url: Union["URL", str, bytes]) -> ParseResult:
339339 url = str (url )
340340
341341 if not isinstance (url , str ):
342- raise URLError ("Invalid URL: %s" % url )
342+ raise URLError (f"Invalid URL: { url } " )
343+
344+ url_to_parse = url .lstrip ()
345+
346+ # 0. Pre-parsing: default to http if scheme is missing
347+ if "://" not in url_to_parse and not url_to_parse .startswith ("/" ) and not url_to_parse .startswith ("./" ):
348+ # Check if it doesn't look like a potential relative URL with query/fragment
349+ if not (url_to_parse .startswith ("?" ) or url_to_parse .startswith ("#" )):
350+ url_to_parse = f"http://{ url_to_parse } "
351+
352+ # 1. Pre-parsing repair for raw IPv6 addresses
353+ if ":" in url_to_parse :
354+ # Extract authority candidate: part between scheme and path
355+ if "://" in url_to_parse :
356+ authority_candidate = url_to_parse .split ("://" , 1 )[1 ].split ("/" , 1 )[0 ].split ("?" , 1 )[0 ].split ("#" , 1 )[0 ]
357+ else :
358+ authority_candidate = url_to_parse .split ("/" , 1 )[0 ].split ("?" , 1 )[0 ].split ("#" , 1 )[0 ]
359+
360+ # Extract host part (ignoring user:pass@)
361+ host_candidate = authority_candidate .rsplit ("@" , 1 )[- 1 ]
362+
363+ # If it looks like IPv6 but lacks brackets
364+ if host_candidate .count (":" ) > 1 and not (host_candidate .startswith ("[" ) and "]" in host_candidate ):
365+ # Try to determine if it's IP:PORT or just IP
366+ # We prioritize IP:PORT if the last part is digits
367+ possible_ips = []
368+ h_p , _ , port = host_candidate .rpartition (":" )
369+ if port .isdigit () and h_p .count (":" ) >= 1 :
370+ possible_ips .append ((h_p , port ))
371+ possible_ips .append ((host_candidate , "" ))
372+
373+ for ip , p_val in possible_ips :
374+ try :
375+ ipaddress .IPv6Address (ip )
376+ repaired = f"[{ ip } ]"
377+ if p_val :
378+ repaired += f":{ p_val } "
379+ url_to_parse = url_to_parse .replace (host_candidate , repaired , 1 )
380+ break
381+ except ValueError :
382+ continue
343383
344384 for attr in self .__attrs__ :
345385 setattr (self , attr , None )
346386
347- parsed = urlparse (url .lstrip ())
387+ # 2. Parse and Validate
388+ try :
389+ # First, check for malformed brackets in the string we're about to parse
390+ # We strictly enforce one '[' and one ']' in the authority if any exist
391+ authority = ""
392+ if "://" in url_to_parse :
393+ authority = url_to_parse .split ("://" , 1 )[1 ].split ("/" , 1 )[0 ]
394+ else :
395+ authority = url_to_parse .split ("/" , 1 )[0 ]
396+
397+ if "[" in authority or "]" in authority :
398+ if authority .count ("[" ) != 1 or authority .count ("]" ) != 1 :
399+ raise ValueError ("Malformed bracketed host" )
400+
401+ start = authority .find ("[" )
402+ end = authority .find ("]" )
403+ if start > end :
404+ raise ValueError ("Invalid bracket order" )
405+
406+ # Content inside brackets MUST be a valid IPv6
407+ ip_content = authority [start + 1 : end ]
408+ try :
409+ ipaddress .IPv6Address (ip_content )
410+ except ValueError :
411+ raise ValueError (f"Invalid IPv6 in brackets: { ip_content } " )
412+
413+ parsed = urlparse (url_to_parse )
414+
415+ except (ValueError , AttributeError ) as e :
416+ raise URLError (f"Invalid URL: { url } . { str (e )} " ) from e
348417
349418 self .auth = parsed .username , parsed .password
350419 self .scheme = parsed .scheme
@@ -363,14 +432,14 @@ def _prepare(self, url: Union["URL", str, bytes]) -> ParseResult:
363432 try :
364433 self .host = idna .encode (hostname ).decode ("ascii" )
365434 except idna .IDNAError :
366- raise URLError ("Invalid IDNA hostname: %s" % hostname )
435+ raise URLError (f "Invalid IDNA hostname: { hostname } " )
367436
368437 self .port = ""
369438 try :
370439 if parsed .port :
371440 self .port = str (parsed .port )
372441 except ValueError as e :
373- raise URLError ("%s. port range must be 0 - 65535." % e . args [ 0 ] )
442+ raise URLError (f" { e . args [ 0 ] } . port range must be 0 - 65535." )
374443
375444 self .path = parsed .path
376445 self .fragment = parsed .fragment
@@ -388,25 +457,28 @@ def _build(self, secure: bool = False) -> str:
388457 Returns:
389458 The final URL string.
390459 """
391- urls = [self .scheme , "://" ]
460+ scheme = self .scheme or ""
461+ urls = [scheme , "://" ] if scheme else []
392462 authority = self .netloc
393463 if self .username or self .password :
464+ username = self .username or ""
394465 password = self .password or ""
395466 if secure :
396467 password = "[secure]"
397468
398469 authority = "@" .join (
399470 [
400- ":" .join ([self . username , password ]),
471+ ":" .join ([username , password ]),
401472 self .netloc ,
402473 ]
403474 )
404475
405476 urls .append (authority )
477+ path = self .path or ""
406478 if self .query :
407- urls .append ("?" .join ([self . path , self .query ]))
479+ urls .append ("?" .join ([path , self .query ]))
408480 else :
409- urls .append (self . path )
481+ urls .append (path )
410482
411483 if self .fragment :
412484 urls .append ("#" + self .fragment )
0 commit comments