Skip to content

Commit 6a00f8e

Browse files
committed
feat(analyzer): decorator ranges, pure-deletion narrowing, safe-line checks
- Add _symbol_start_line() to include decorator lines in symbol ranges, so marker-only changes (e.g. @pytest.mark.s390x) map to the decorated symbol instead of triggering file-level fallback - Pure-deletion diffs now extract deleted def/class names via _extract_deleted_symbols_from_diff() for symbol-level narrowing instead of returning None (conservative fallback) - Treat TYPE_CHECKING guards, closing parens, __all__, and import continuation lines as safe unmapped lines Signed-off-by: rnetser <rnetser@redhat.com>
1 parent 08cf872 commit 6a00f8e

2 files changed

Lines changed: 466 additions & 13 deletions

File tree

scripts/tests_analyzer/pytest_marker_analyzer.py

Lines changed: 83 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,13 +1228,29 @@ def _extract_symbol_imports_from_file(file_path: Path, repo_root: Path) -> dict[
12281228
return symbol_imports
12291229

12301230

1231+
def _symbol_start_line(node: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef) -> int:
1232+
"""Return the start line of a symbol, including its decorators.
1233+
1234+
Args:
1235+
node: AST node for a function, async function, or class definition.
1236+
1237+
Returns:
1238+
Line number of the first decorator if present, otherwise the ``def``/``class`` line.
1239+
"""
1240+
if node.decorator_list:
1241+
return node.decorator_list[0].lineno
1242+
return node.lineno
1243+
1244+
12311245
def _build_line_to_symbol_map(source: str) -> SymbolMap:
12321246
"""Build a hierarchical mapping from line ranges to symbols.
12331247
12341248
Parses the AST of the given source to identify top-level definitions
12351249
(functions, async functions, classes, and module-level assignments) and
1236-
their line ranges. For classes, also extracts member-level line ranges
1237-
and intra-class call graphs.
1250+
their line ranges. Decorator lines are included in the range so that
1251+
changes to markers (e.g. ``@pytest.mark.s390x``) map to the decorated
1252+
symbol. For classes, also extracts member-level line ranges and
1253+
intra-class call graphs.
12381254
12391255
Args:
12401256
source: Python source code text.
@@ -1248,15 +1264,15 @@ def _build_line_to_symbol_map(source: str) -> SymbolMap:
12481264

12491265
for node in ast.iter_child_nodes(tree):
12501266
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
1251-
symbols.append((node.lineno, node.end_lineno or node.lineno, node.name))
1267+
symbols.append((_symbol_start_line(node=node), node.end_lineno or node.lineno, node.name))
12521268

12531269
elif isinstance(node, ast.ClassDef):
1254-
symbols.append((node.lineno, node.end_lineno or node.lineno, node.name))
1270+
symbols.append((_symbol_start_line(node=node), node.end_lineno or node.lineno, node.name))
12551271
# Extract class members with line ranges
12561272
members: dict[str, tuple[int, int]] = {}
12571273
for child in ast.iter_child_nodes(node):
12581274
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
1259-
members[child.name] = (child.lineno, child.end_lineno or child.lineno)
1275+
members[child.name] = (_symbol_start_line(node=child), child.end_lineno or child.lineno)
12601276
# Build intra-class call graph
12611277
internal_calls = _build_intra_class_call_graph(class_node=node)
12621278
class_members[node.name] = ClassMemberInfo(
@@ -1488,6 +1504,43 @@ def _diff_has_deletions(diff_content: str) -> bool:
14881504
return any(line.startswith("-") and not line.startswith("---") for line in diff_content.splitlines())
14891505

14901506

1507+
def _extract_deleted_symbols_from_diff(diff_content: str) -> set[str]:
1508+
"""Extract names of functions and classes that were deleted in a diff.
1509+
1510+
Scans deletion lines (``-``) for ``def`` and ``class`` statements to
1511+
identify symbols that were removed. This avoids the need to fetch
1512+
and parse the old file source.
1513+
1514+
Note:
1515+
When a class with methods is deleted, both the class name *and* its member
1516+
method names are returned as separate symbols. This may cause broader
1517+
dependency matching than strictly necessary (a member name like ``setup``
1518+
could collide with an unrelated top-level function), but is acceptable
1519+
because over-reporting is safer than under-reporting for test selection.
1520+
1521+
Args:
1522+
diff_content: Unified diff text.
1523+
1524+
Returns:
1525+
Set of deleted function/class names.
1526+
"""
1527+
deleted_symbols: set[str] = set()
1528+
for line in diff_content.splitlines():
1529+
if not line.startswith("-") or line.startswith("---"):
1530+
continue
1531+
stripped = line[1:].strip()
1532+
# Match function definitions
1533+
func_match = re.match(pattern=r"(?:async\s+)?def\s+(\w+)\s*\(", string=stripped)
1534+
if func_match:
1535+
deleted_symbols.add(func_match.group(1))
1536+
continue
1537+
# Match class definitions
1538+
class_match = re.match(pattern=r"class\s+(\w+)[\s:(]", string=stripped)
1539+
if class_match:
1540+
deleted_symbols.add(class_match.group(1))
1541+
return deleted_symbols
1542+
1543+
14911544
def _get_diff_content(
14921545
file_path: Path,
14931546
base_branch: str,
@@ -1690,8 +1743,8 @@ def _extract_modified_symbols(
16901743
16911744
Returns:
16921745
``SymbolClassification`` with modified and new symbol sets, or
1693-
``None`` when symbol-level analysis is not possible (diff failure,
1694-
pure deletion, or parse errors). A ``None`` return signals the
1746+
``None`` when symbol-level analysis is not possible (diff failure
1747+
or parse errors). A ``None`` return signals the
16951748
caller to fall back to file-level dependency tracking.
16961749
16971750
When some changed lines fall outside any named symbol (e.g. import
@@ -1721,7 +1774,10 @@ def _extract_modified_symbols(
17211774
if has_deletions:
17221775
if file_status == "renamed":
17231776
return SymbolClassification(modified_symbols=set(), new_symbols=set())
1724-
return None # Pure deletion — cannot safely narrow impact
1777+
# Pure deletion — extract deleted symbol names from the diff
1778+
# to enable symbol-level narrowing instead of conservative fallback
1779+
deleted_symbols = _extract_deleted_symbols_from_diff(diff_content=diff_content)
1780+
return SymbolClassification(modified_symbols=deleted_symbols, new_symbols=set())
17251781
return SymbolClassification(modified_symbols=set(), new_symbols=set())
17261782

17271783
try:
@@ -1777,7 +1833,25 @@ def _extract_modified_symbols(
17771833
# or potentially impactful executable code.
17781834
if line_number <= len(source_lines):
17791835
line_content = source_lines[line_number - 1].strip()
1780-
if not line_content or line_content.startswith(("#", "import ", "from ", '"""', "'''", '"', "'")):
1836+
if not line_content or line_content.startswith((
1837+
"#",
1838+
"import ",
1839+
"from ",
1840+
'"""',
1841+
"'''",
1842+
'"',
1843+
"'",
1844+
"if TYPE_CHECKING:",
1845+
"if typing.TYPE_CHECKING:",
1846+
")", # Closing paren of multi-line import; safe because
1847+
# the opening line (e.g. "setup(") would trigger fallback.
1848+
"__all__",
1849+
)):
1850+
has_unattributed = True
1851+
elif re.match(pattern=r"^[A-Za-z_]\w*(?:\s+as\s+\w+)?\s*,\s*$", string=line_content):
1852+
# Import continuation line (e.g. "TIMEOUT_2MIN," inside
1853+
# a multi-line "from ... import (...)" block).
1854+
# Trailing comma is required to avoid matching bare identifiers.
17811855
has_unattributed = True
17821856
else:
17831857
# Executable module-level code — conservative fallback

0 commit comments

Comments
 (0)