Skip to content

Commit d337bff

Browse files
committed
refactor: reduce cognitive complexity in ppr, python_semantics, git
Extract helper functions to reduce SonarCloud cognitive complexity: - ppr.py: extract PPR iteration and normalization helpers - python_semantics.py: extract AST collection helpers - git.py: extract diff parsing helpers
1 parent b3000d2 commit d337bff

3 files changed

Lines changed: 129 additions & 91 deletions

File tree

src/treemapper/diffctx/git.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -41,54 +41,55 @@ def get_diff_text(repo_root: Path, diff_range: str) -> str:
4141
return run_git(repo_root, ["diff", diff_range])
4242

4343

44+
def _parse_hunk_header(match: re.Match[str], path: Path) -> DiffHunk:
45+
old_start = int(match.group(1))
46+
old_len_str = match.group(2)
47+
old_len = int(old_len_str) if old_len_str else 1
48+
new_start = int(match.group(3))
49+
new_len_str = match.group(4)
50+
new_len = int(new_len_str) if new_len_str else 1
51+
52+
return DiffHunk(
53+
path=path,
54+
new_start=new_start,
55+
new_len=new_len,
56+
old_start=old_start,
57+
old_len=old_len,
58+
)
59+
60+
61+
def _parse_path_line(line: str, repo_root: Path) -> tuple[str, Path | None]:
62+
if line.startswith("--- a/"):
63+
return "old", repo_root / line.removeprefix("--- a/").strip()
64+
if line.startswith("--- /dev/null"):
65+
return "old", None
66+
if line.startswith("+++ b/"):
67+
return "new", repo_root / line.removeprefix("+++ b/").strip()
68+
if line.startswith("+++ /dev/null"):
69+
return "new", None
70+
return "", None
71+
72+
4473
def parse_diff(repo_root: Path, diff_range: str) -> list[DiffHunk]:
4574
output = run_git(repo_root, ["diff", "--unified=0", diff_range])
4675
hunks: list[DiffHunk] = []
4776
old_path: Path | None = None
4877
new_path: Path | None = None
4978

5079
for line in output.splitlines():
51-
if line.startswith("--- a/"):
52-
rel_path = line.removeprefix("--- a/").strip()
53-
old_path = repo_root / rel_path
54-
continue
55-
56-
if line.startswith("--- /dev/null"):
57-
old_path = None
80+
path_type, path = _parse_path_line(line, repo_root)
81+
if path_type == "old":
82+
old_path = path
5883
continue
59-
60-
if line.startswith("+++ b/"):
61-
rel_path = line.removeprefix("+++ b/").strip()
62-
new_path = repo_root / rel_path
63-
continue
64-
65-
if line.startswith("+++ /dev/null"):
66-
new_path = None
84+
if path_type == "new":
85+
new_path = path
6786
continue
6887

6988
match = _HUNK_RE.match(line)
7089
if match:
71-
# For deletions, use old_path; for additions/modifications, use new_path
7290
current_path = new_path if new_path else old_path
73-
if not current_path:
74-
continue
75-
76-
old_start = int(match.group(1))
77-
old_len_str = match.group(2)
78-
old_len = int(old_len_str) if old_len_str else 1
79-
new_start = int(match.group(3))
80-
new_len_str = match.group(4)
81-
new_len = int(new_len_str) if new_len_str else 1
82-
83-
hunks.append(
84-
DiffHunk(
85-
path=current_path,
86-
new_start=new_start,
87-
new_len=new_len,
88-
old_start=old_start,
89-
old_len=old_len,
90-
)
91-
)
91+
if current_path:
92+
hunks.append(_parse_hunk_header(match, current_path))
9293

9394
return hunks
9495

src/treemapper/diffctx/ppr.py

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,50 @@
44
from .types import FragmentId
55

66

7+
def _initialize_ppr_scores(
8+
nodes: list[FragmentId], valid_seeds: set[FragmentId]
9+
) -> tuple[dict[FragmentId, float], dict[FragmentId, float]]:
10+
p = {n: (1.0 / len(valid_seeds) if n in valid_seeds else 0.0) for n in nodes}
11+
return p, dict(p)
12+
13+
14+
def _ppr_iteration(
15+
nodes: list[FragmentId],
16+
graph: Graph,
17+
scores: dict[FragmentId, float],
18+
out_sum: dict[FragmentId, float],
19+
base: dict[FragmentId, float],
20+
p: dict[FragmentId, float],
21+
alpha: float,
22+
) -> dict[FragmentId, float]:
23+
new_scores: dict[FragmentId, float] = dict(base)
24+
dangling_mass = 0.0
25+
26+
for src in nodes:
27+
nbrs = graph.neighbors(src)
28+
total = out_sum[src]
29+
if total <= 0 or not nbrs:
30+
dangling_mass += scores[src]
31+
continue
32+
contrib = alpha * scores[src]
33+
for dst, w in nbrs.items():
34+
new_scores[dst] += contrib * (w / total)
35+
36+
if dangling_mass > 0:
37+
add = alpha * dangling_mass
38+
for n in nodes:
39+
new_scores[n] += add * p[n]
40+
41+
return new_scores
42+
43+
44+
def _normalize_scores(scores: dict[FragmentId, float]) -> dict[FragmentId, float]:
45+
total = sum(scores.values())
46+
if total > 0:
47+
return {n: s / total for n, s in scores.items()}
48+
return scores
49+
50+
751
def personalized_pagerank(
852
graph: Graph,
953
seeds: set[FragmentId],
@@ -15,46 +59,19 @@ def personalized_pagerank(
1559
return {}
1660

1761
nodes = list(graph.nodes)
18-
19-
# Filter seeds to only include nodes that exist in the graph
2062
valid_seeds = seeds & graph.nodes
2163
if not valid_seeds:
2264
return {n: 1.0 / len(nodes) for n in nodes}
2365

24-
p = {n: (1.0 / len(valid_seeds) if n in valid_seeds else 0.0) for n in nodes}
25-
scores = dict(p)
26-
66+
p, scores = _initialize_ppr_scores(nodes, valid_seeds)
2767
out_sum = {n: sum(graph.neighbors(n).values()) for n in nodes}
28-
2968
base = {n: (1.0 - alpha) * p[n] for n in nodes}
3069

3170
for _ in range(max_iter):
32-
new_scores: dict[FragmentId, float] = dict(base)
33-
34-
dangling_mass = 0.0
35-
for src in nodes:
36-
nbrs = graph.neighbors(src)
37-
total = out_sum[src]
38-
if total <= 0 or not nbrs:
39-
dangling_mass += scores[src]
40-
continue
41-
contrib = alpha * scores[src]
42-
for dst, w in nbrs.items():
43-
new_scores[dst] += contrib * (w / total)
44-
45-
if dangling_mass > 0:
46-
add = alpha * dangling_mass
47-
for n in nodes:
48-
new_scores[n] += add * p[n]
49-
71+
new_scores = _ppr_iteration(nodes, graph, scores, out_sum, base, p, alpha)
5072
delta = sum(abs(new_scores[n] - scores[n]) for n in nodes)
5173
scores = new_scores
5274
if delta < tol:
5375
break
5476

55-
total = sum(scores.values())
56-
if total > 0:
57-
for n in scores:
58-
scores[n] /= total
59-
60-
return scores
77+
return _normalize_scores(scores)

src/treemapper/diffctx/python_semantics.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,22 @@ def _names_from_expr(expr: ast.AST | None) -> set[str]:
2525
return out
2626

2727

28-
def analyze_python_fragment(code: str) -> PyFragmentInfo:
29-
if not code.strip():
30-
return PyFragmentInfo(frozenset(), frozenset(), frozenset(), frozenset())
31-
32-
dedented = textwrap.dedent(code)
33-
try:
34-
tree = ast.parse(dedented)
35-
except SyntaxError:
36-
return PyFragmentInfo(frozenset(), frozenset(), frozenset(), frozenset())
37-
28+
def _collect_defines(tree: ast.Module) -> set[str]:
3829
defines: set[str] = set()
39-
refs: set[str] = set()
40-
calls: set[str] = set()
41-
type_refs: set[str] = set()
42-
43-
# Only collect top-level definitions (not nested functions/classes)
4430
for stmt in tree.body:
4531
if isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
4632
defines.add(stmt.name)
33+
return defines
34+
35+
36+
def _collect_refs_and_calls(tree: ast.Module) -> tuple[set[str], set[str]]:
37+
refs: set[str] = set()
38+
calls: set[str] = set()
4739

4840
for node in ast.walk(tree):
4941
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
5042
refs.add(node.id)
51-
52-
if isinstance(node, ast.Attribute):
43+
elif isinstance(node, ast.Attribute):
5344
refs.add(node.attr)
5445

5546
if isinstance(node, ast.Call):
@@ -59,17 +50,46 @@ def analyze_python_fragment(code: str) -> PyFragmentInfo:
5950
elif isinstance(func, ast.Attribute):
6051
calls.add(func.attr)
6152

53+
return refs, calls
54+
55+
56+
def _extract_func_type_refs(node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
57+
type_refs = _names_from_expr(node.returns)
58+
for a in node.args.args + node.args.kwonlyargs:
59+
type_refs |= _names_from_expr(a.annotation)
60+
if node.args.vararg is not None:
61+
type_refs |= _names_from_expr(node.args.vararg.annotation)
62+
if node.args.kwarg is not None:
63+
type_refs |= _names_from_expr(node.args.kwarg.annotation)
64+
return type_refs
65+
66+
67+
def _collect_type_refs(tree: ast.Module) -> set[str]:
68+
type_refs: set[str] = set()
69+
for node in ast.walk(tree):
6270
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
63-
type_refs |= _names_from_expr(node.returns)
64-
for a in node.args.args + node.args.kwonlyargs:
65-
type_refs |= _names_from_expr(a.annotation)
66-
if node.args.vararg is not None:
67-
type_refs |= _names_from_expr(node.args.vararg.annotation)
68-
if node.args.kwarg is not None:
69-
type_refs |= _names_from_expr(node.args.kwarg.annotation)
70-
71-
if isinstance(node, ast.AnnAssign):
71+
type_refs |= _extract_func_type_refs(node)
72+
elif isinstance(node, ast.AnnAssign):
7273
type_refs |= _names_from_expr(node.annotation)
74+
return type_refs
75+
76+
77+
_EMPTY_INFO = PyFragmentInfo(frozenset(), frozenset(), frozenset(), frozenset())
78+
79+
80+
def analyze_python_fragment(code: str) -> PyFragmentInfo:
81+
if not code.strip():
82+
return _EMPTY_INFO
83+
84+
dedented = textwrap.dedent(code)
85+
try:
86+
tree = ast.parse(dedented)
87+
except SyntaxError:
88+
return _EMPTY_INFO
89+
90+
defines = _collect_defines(tree)
91+
refs, calls = _collect_refs_and_calls(tree)
92+
type_refs = _collect_type_refs(tree)
7393

7494
return PyFragmentInfo(
7595
defines=frozenset(defines),

0 commit comments

Comments
 (0)