@@ -1228,13 +1228,29 @@ def _extract_symbol_imports_from_file(file_path: Path, repo_root: Path) -> dict[
12281228 return symbol_imports
12291229
12301230
1231+ def _symbol_start_line (node : ast .FunctionDef | ast .AsyncFunctionDef | ast .ClassDef ) -> int :
1232+ """Return the start line of a symbol, including its decorators.
1233+
1234+ Args:
1235+ node: AST node for a function, async function, or class definition.
1236+
1237+ Returns:
1238+ Line number of the first decorator if present, otherwise the ``def``/``class`` line.
1239+ """
1240+ if node .decorator_list :
1241+ return node .decorator_list [0 ].lineno
1242+ return node .lineno
1243+
1244+
12311245def _build_line_to_symbol_map (source : str ) -> SymbolMap :
12321246 """Build a hierarchical mapping from line ranges to symbols.
12331247
12341248 Parses the AST of the given source to identify top-level definitions
12351249 (functions, async functions, classes, and module-level assignments) and
1236- their line ranges. For classes, also extracts member-level line ranges
1237- and intra-class call graphs.
1250+ their line ranges. Decorator lines are included in the range so that
1251+ changes to markers (e.g. ``@pytest.mark.s390x``) map to the decorated
1252+ symbol. For classes, also extracts member-level line ranges and
1253+ intra-class call graphs.
12381254
12391255 Args:
12401256 source: Python source code text.
@@ -1248,15 +1264,15 @@ def _build_line_to_symbol_map(source: str) -> SymbolMap:
12481264
12491265 for node in ast .iter_child_nodes (tree ):
12501266 if isinstance (node , (ast .FunctionDef , ast .AsyncFunctionDef )):
1251- symbols .append ((node . lineno , node .end_lineno or node .lineno , node .name ))
1267+ symbols .append ((_symbol_start_line ( node = node ) , node .end_lineno or node .lineno , node .name ))
12521268
12531269 elif isinstance (node , ast .ClassDef ):
1254- symbols .append ((node . lineno , node .end_lineno or node .lineno , node .name ))
1270+ symbols .append ((_symbol_start_line ( node = node ) , node .end_lineno or node .lineno , node .name ))
12551271 # Extract class members with line ranges
12561272 members : dict [str , tuple [int , int ]] = {}
12571273 for child in ast .iter_child_nodes (node ):
12581274 if isinstance (child , (ast .FunctionDef , ast .AsyncFunctionDef )):
1259- members [child .name ] = (child . lineno , child .end_lineno or child .lineno )
1275+ members [child .name ] = (_symbol_start_line ( node = child ) , child .end_lineno or child .lineno )
12601276 # Build intra-class call graph
12611277 internal_calls = _build_intra_class_call_graph (class_node = node )
12621278 class_members [node .name ] = ClassMemberInfo (
@@ -1488,6 +1504,43 @@ def _diff_has_deletions(diff_content: str) -> bool:
14881504 return any (line .startswith ("-" ) and not line .startswith ("---" ) for line in diff_content .splitlines ())
14891505
14901506
1507+ def _extract_deleted_symbols_from_diff (diff_content : str ) -> set [str ]:
1508+ """Extract names of functions and classes that were deleted in a diff.
1509+
1510+ Scans deletion lines (``-``) for ``def`` and ``class`` statements to
1511+ identify symbols that were removed. This avoids the need to fetch
1512+ and parse the old file source.
1513+
1514+ Note:
1515+ When a class with methods is deleted, both the class name *and* its member
1516+ method names are returned as separate symbols. This may cause broader
1517+ dependency matching than strictly necessary (a member name like ``setup``
1518+ could collide with an unrelated top-level function), but is acceptable
1519+ because over-reporting is safer than under-reporting for test selection.
1520+
1521+ Args:
1522+ diff_content: Unified diff text.
1523+
1524+ Returns:
1525+ Set of deleted function/class names.
1526+ """
1527+ deleted_symbols : set [str ] = set ()
1528+ for line in diff_content .splitlines ():
1529+ if not line .startswith ("-" ) or line .startswith ("---" ):
1530+ continue
1531+ stripped = line [1 :].strip ()
1532+ # Match function definitions
1533+ func_match = re .match (pattern = r"(?:async\s+)?def\s+(\w+)\s*\(" , string = stripped )
1534+ if func_match :
1535+ deleted_symbols .add (func_match .group (1 ))
1536+ continue
1537+ # Match class definitions
1538+ class_match = re .match (pattern = r"class\s+(\w+)[\s:(]" , string = stripped )
1539+ if class_match :
1540+ deleted_symbols .add (class_match .group (1 ))
1541+ return deleted_symbols
1542+
1543+
14911544def _get_diff_content (
14921545 file_path : Path ,
14931546 base_branch : str ,
@@ -1690,8 +1743,8 @@ def _extract_modified_symbols(
16901743
16911744 Returns:
16921745 ``SymbolClassification`` with modified and new symbol sets, or
1693- ``None`` when symbol-level analysis is not possible (diff failure,
1694- pure deletion, or parse errors). A ``None`` return signals the
1746+ ``None`` when symbol-level analysis is not possible (diff failure
1747+ or parse errors). A ``None`` return signals the
16951748 caller to fall back to file-level dependency tracking.
16961749
16971750 When some changed lines fall outside any named symbol (e.g. import
@@ -1721,7 +1774,10 @@ def _extract_modified_symbols(
17211774 if has_deletions :
17221775 if file_status == "renamed" :
17231776 return SymbolClassification (modified_symbols = set (), new_symbols = set ())
1724- return None # Pure deletion — cannot safely narrow impact
1777+ # Pure deletion — extract deleted symbol names from the diff
1778+ # to enable symbol-level narrowing instead of conservative fallback
1779+ deleted_symbols = _extract_deleted_symbols_from_diff (diff_content = diff_content )
1780+ return SymbolClassification (modified_symbols = deleted_symbols , new_symbols = set ())
17251781 return SymbolClassification (modified_symbols = set (), new_symbols = set ())
17261782
17271783 try :
@@ -1777,7 +1833,25 @@ def _extract_modified_symbols(
17771833 # or potentially impactful executable code.
17781834 if line_number <= len (source_lines ):
17791835 line_content = source_lines [line_number - 1 ].strip ()
1780- if not line_content or line_content .startswith (("#" , "import " , "from " , '"""' , "'''" , '"' , "'" )):
1836+ if not line_content or line_content .startswith ((
1837+ "#" ,
1838+ "import " ,
1839+ "from " ,
1840+ '"""' ,
1841+ "'''" ,
1842+ '"' ,
1843+ "'" ,
1844+ "if TYPE_CHECKING:" ,
1845+ "if typing.TYPE_CHECKING:" ,
1846+ ")" , # Closing paren of multi-line import; safe because
1847+ # the opening line (e.g. "setup(") would trigger fallback.
1848+ "__all__" ,
1849+ )):
1850+ has_unattributed = True
1851+ elif re .match (pattern = r"^[A-Za-z_]\w*(?:\s+as\s+\w+)?\s*,\s*$" , string = line_content ):
1852+ # Import continuation line (e.g. "TIMEOUT_2MIN," inside
1853+ # a multi-line "from ... import (...)" block).
1854+ # Trailing comma is required to avoid matching bare identifiers.
17811855 has_unattributed = True
17821856 else :
17831857 # Executable module-level code — conservative fallback
0 commit comments