@@ -417,51 +417,11 @@ def is_url_type(annotation):
417417 """
418418 Checks if a type annotation is a URL type (directly or nested in a container).
419419
420- This function is part of the URL transformation system that handles Pydantic models
421- with URL fields during extraction operations. When extracting data from web pages,
422- URLs are represented as numeric IDs in the accessibility tree, so we need to:
423-
424- 1. Identify which fields in Pydantic models are URL types
425- 2. Transform those fields to numeric types during extraction
426- 3. Convert the numeric IDs back to actual URLs in the final result
427-
428- Pydantic V2 Compatibility Notes:
429- --------------------------------
430- Modern Pydantic versions (V2+) can create complex type annotations that include
431- subscripted generics (e.g., typing.Annotated[...] with constraints). These
432- subscripted generics cannot be used directly with Python's issubclass() function,
433- which raises TypeError: "Subscripted generics cannot be used with class and
434- instance checks".
435-
436- To handle this, we use a try-catch approach when checking for URL types, allowing
437- the function to gracefully handle both simple type annotations and complex
438- subscripted generics that Pydantic V2 may generate.
439-
440- URL Type Detection Strategy:
441- ---------------------------
442- 1. Direct URL types: AnyUrl, HttpUrl from Pydantic
443- 2. Container types: list[URL], Optional[URL], Union[URL, None]
444- 3. Nested combinations: list[Optional[AnyUrl]], etc.
445-
446420 Args:
447- annotation: Type annotation to check. Can be a simple type, generic type,
448- or complex Pydantic V2 subscripted generic.
421+ annotation: Type annotation to check
449422
450423 Returns:
451- bool: True if the annotation represents a URL type (directly or nested),
452- False otherwise.
453-
454- Examples:
455- >>> is_url_type(AnyUrl)
456- True
457- >>> is_url_type(list[HttpUrl])
458- True
459- >>> is_url_type(Optional[AnyUrl])
460- True
461- >>> is_url_type(str)
462- False
463- >>> is_url_type(typing.Annotated[pydantic_core.Url, UrlConstraints(...)])
464- False # Safely handles subscripted generics without crashing
424+ bool: True if it's a URL type, False otherwise
465425 """
466426 if annotation is None :
467427 return False
@@ -477,16 +437,16 @@ def is_url_type(annotation):
477437 # We gracefully skip these rather than crashing, as they're not simple URL types
478438 pass
479439
480- # Check for URL types nested in generic containers
440+ # Check for URL in generic containers
481441 origin = get_origin (annotation )
482442
483- # Handle list[URL], List[URL], etc.
443+ # Handle list[URL]
484444 if origin in (list , list ):
485445 args = get_args (annotation )
486446 if args :
487447 return is_url_type (args [0 ])
488448
489- # Handle Optional[URL] / Union[URL, None], etc.
449+ # Handle Optional[URL] / Union[URL, None]
490450 elif origin is Union :
491451 args = get_args (annotation )
492452 return any (is_url_type (arg ) for arg in args )
0 commit comments