375375# string literal as a type expression.
376376_MULTIPLE_WORDS_NONTYPE_RE = re .compile (r'\s*[^\s.\'"|\[]+\s+[^\s.\'"|\[]' )
377377
378+ # Matches any valid Python identifier, including identifiers with Unicode characters.
379+ #
380+ # [^\d\W] = word character that is not a digit
381+ # \w = word character
382+ # \Z = match end of string; does not allow a trailing \n, unlike $
383+ _IDENTIFIER_RE = re .compile (r"^[^\d\W]\w*\Z" , re .UNICODE )
384+
385+ # Matches if the string contains at least one identifier-start character
386+ # (letter or underscore).
387+ _CONTAINS_IDENTIFIER_RE = re .compile (r"[^\W\d]" , re .UNICODE )
388+
389+ # Matches a dotted identifier (e.g. 'builtins.tuple', 'typing.Mapping', 'a.b.c').
390+ _DOTTED_IDENTIFIER_RE = re .compile (r"^[^\d\W]\w*(\.[^\d\W]\w*)+\Z" , re .UNICODE )
391+
392+ # Matches a dotted name (one or more identifier components joined by '.').
393+ # Accepts a bare identifier with zero dots. Used to extract every
394+ # dotted identifier from inside a stringified type expression.
395+ _CONTAINED_DOTTED_IDENTIFIER_RE = re .compile (r"[^\W\d]\w*(?:\.[^\W\d]\w*)*" , re .UNICODE )
396+
397+ # Matches several patterns that never appear in valid type expressions
398+ # NOTE: Allows '*' for (PEP 646 Unpack) and '+' for (Literal[+N])
399+ _NONTYPE_PATTERN_RE = re .compile (
400+ # Characters never valid in a type expression
401+ r"[!:/<>@%$^?;&~`\\]|"
402+ # '-' not directly preceded by '[' (which can occur in Literal[-N])
403+ # NOTE: Incorrectly rejects multi-element edge cases like Literal[-1, -2]
404+ # which appear in stringified type expressions, which are expected
405+ # to be rare in practice.
406+ r"(?<!\[)-|"
407+ # Leading '.' (incomplete dotted name, file extension, etc)
408+ r"^\.|"
409+ # Trailing '.' (incomplete dotted name, file extension, etc)
410+ r"\.$"
411+ )
412+
413+ # Matches if the first character of the string is invalid as the start of
414+ # a type expression
415+ _NONTYPE_FIRST_CHAR_RE = re .compile (
416+ # Any non-word char other than '*' (which is reserved for PEP 646 Unpack:
417+ # 'tuple[int, *Ts]') or whitespace
418+ r"\A[^\s*\w]|"
419+ # A digit
420+ r"\A\d" ,
421+ re .UNICODE ,
422+ )
423+
378424
379425class SemanticAnalyzer (
380426 NodeVisitor [None ], SemanticAnalyzerInterface , SemanticAnalyzerPluginInterface , SplittingVisitor
@@ -8090,6 +8136,26 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
80908136 return
80918137 elif isinstance (maybe_type_expr , StrExpr ):
80928138 str_value = maybe_type_expr .value # cache
8139+ # (TODO: Experiment with the ordering of all the following filters,
8140+ # to frontload those most efficient at rejecting early.)
8141+ # Filter out string literals with no identifier-start characters
8142+ # (pure punctuation/digits/whitespace) which cannot be type expressions
8143+ if not _CONTAINS_IDENTIFIER_RE .search (str_value ):
8144+ maybe_type_expr .as_type = None
8145+ return
8146+ # Filter out string literals whose first non-whitespace character
8147+ # cannot start a valid type expression (a digit, or punctuation
8148+ # other than '*').
8149+ if _NONTYPE_FIRST_CHAR_RE .match (str_value ):
8150+ maybe_type_expr .as_type = None
8151+ return
8152+ # Filter out string literals with common patterns that could not
8153+ # possibly be in a type expression
8154+ if _MULTIPLE_WORDS_NONTYPE_RE .match (str_value ):
8155+ # A common pattern in string literals containing a sentence.
8156+ # But cannot be a type expression.
8157+ maybe_type_expr .as_type = None
8158+ return
80938159 # Filter out string literals which look like an identifier but
80948160 # cannot be a type expression, for a few common reasons
80958161 if str_value .isidentifier ():
@@ -8116,7 +8182,40 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
81168182 # 2. unbound_paramspec: f'ParamSpec "{name}" is unbound' [codes.VALID_TYPE]
81178183 maybe_type_expr .as_type = None
81188184 return
8119- else : # does not look like an identifier
8185+ if (
8186+ isinstance (node , Var )
8187+ and isinstance (get_proper_type (node .type ), Instance )
8188+ and not self .var_is_typing_special_form (node )
8189+ ):
8190+ # Var whose declared type is a concrete instance: it is
8191+ # a value (local, parameter, module-level constant),
8192+ # not a type expression.
8193+ maybe_type_expr .as_type = None
8194+ return
8195+ if isinstance (node , (FuncDef , OverloadedFuncDef , MypyFile )):
8196+ # Functions and modules are never type expressions.
8197+ maybe_type_expr .as_type = None
8198+ return
8199+ elif _DOTTED_IDENTIFIER_RE .fullmatch (str_value ):
8200+ # Dotted-name string (e.g. "builtins.tuple", "typing.Mapping").
8201+ # Look up the leftmost component; if it can't possibly be a
8202+ # type prefix, bail. Mirrors the IndexExpr-with-MemberExpr-base
8203+ # filter logic below.
8204+ leftmost = str_value .split ("." , 1 )[0 ]
8205+ sym = self .lookup (leftmost , UnboundType (leftmost ), suppress_errors = True )
8206+ if sym is None :
8207+ # Leftmost component does not refer to anything in scope
8208+ maybe_type_expr .as_type = None
8209+ return
8210+ node = sym .node # cache
8211+ if isinstance (node , PlaceholderNode ) and not node .becomes_typeinfo :
8212+ maybe_type_expr .as_type = None
8213+ return
8214+ if isinstance (node , Var ) and not self .var_is_typing_special_form (node ):
8215+ # Leftmost component is a Var: cannot be a type prefix
8216+ maybe_type_expr .as_type = None
8217+ return
8218+ else : # does not look like an identifier or dotted identifier
81208219 if '"' in str_value or "'" in str_value :
81218220 # Only valid inside a Literal[...] or Annotated[..., ...] type
81228221 if "[" not in str_value :
@@ -8135,6 +8234,34 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
81358234 # But cannot be a type expression.
81368235 maybe_type_expr .as_type = None
81378236 return
8237+ # Skip some checks when a non-zero even number of single or double quotes
8238+ # signals a possible Literal[...] component, whose quoted content
8239+ # could contain anything: symbols or identifiers that would be
8240+ # incorrectly processed by some checks.
8241+ sq = str_value .count ("'" )
8242+ dq = str_value .count ('"' )
8243+ if not ((sq > 0 and sq % 2 == 0 ) or (dq > 0 and dq % 2 == 0 )):
8244+ # Filter out string literals containing characters or boundary
8245+ # patterns that never appear in valid type expressions
8246+ # (e.g. '/', ':', '<', '>', '@', leading/trailing '.').
8247+ if _NONTYPE_PATTERN_RE .search (str_value ):
8248+ maybe_type_expr .as_type = None
8249+ return
8250+ # A string that can spell a valid type must contain 1+ dotted names,
8251+ # all of whose leftmost identifiers must exist in the local scope.
8252+ found = False
8253+ for m in _CONTAINED_DOTTED_IDENTIFIER_RE .finditer (str_value ):
8254+ found = True
8255+ leftmost = m .group ().split ("." , 1 )[0 ]
8256+ if (
8257+ self .lookup (leftmost , UnboundType (leftmost ), suppress_errors = True )
8258+ is None
8259+ ):
8260+ maybe_type_expr .as_type = None
8261+ return
8262+ if not found :
8263+ maybe_type_expr .as_type = None
8264+ return
81388265 elif isinstance (maybe_type_expr , IndexExpr ):
81398266 if isinstance (maybe_type_expr .base , NameExpr ):
81408267 if isinstance (
@@ -8208,6 +8335,8 @@ def var_is_typing_special_form(var: Var) -> bool:
82088335 "typing.Literal" ,
82098336 "typing_extensions.Literal" ,
82108337 "typing.Optional" ,
8338+ "typing.Self" ,
8339+ "typing_extensions.Self" ,
82118340 "typing.TypeGuard" ,
82128341 "typing_extensions.TypeGuard" ,
82138342 "typing.TypeIs" ,
0 commit comments