Skip to content

Commit a1a3c1b

Browse files
cdeustclaude
andcommitted
fix(graph_quality_scorer): BUG #6 — score AST-derived SYMBOL nodes
Previously every node with type="symbol" (from ADR-0046's AST extractor) fell through to _score_default and received a uniform 0.5 quality with label "unscored node type". The new _score_symbol heuristic ranks symbols by connectivity (central / connected / present / isolated), visibility (public vs private by name convention), symbol-type anchor bonus (class / struct / trait), and signature presence. Mirror change applied to agentic-ai's graph-quality-scorer.ts in the same shape so the TS-side consumer and Python-side server stay 1:1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 80c5106 commit a1a3c1b

1 file changed

Lines changed: 56 additions & 0 deletions

File tree

mcp_server/core/graph_quality_scorer.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ def _score_node(node: dict[str, Any], conns: int, total: int) -> tuple[float, st
6363
"memory": _score_memory,
6464
"entity": _score_entity,
6565
"discussion": _score_discussion,
66+
# source: Spike B' BUG #6 fix — AST-derived SYMBOL nodes (ADR-0046)
67+
# previously fell through to _score_default ("unscored node type"),
68+
# giving every symbol a uniform 0.5 quality. Now scored by connection
69+
# count + visibility heuristic: well-connected public symbols rank
70+
# higher than isolated private helpers.
71+
"symbol": _score_symbol,
6672
}
6773
scorer = scorers.get(ntype, _score_default)
6874
return scorer(node, conns, total)
@@ -258,3 +264,53 @@ def _score_discussion(n: dict, conns: int, total: int) -> tuple[float, str]:
258264

259265
def _score_default(n: dict, conns: int, total: int) -> tuple[float, str]:
260266
return 0.5, "unscored node type"
267+
268+
269+
def _score_symbol(n: dict, conns: int, total: int) -> tuple[float, str]:
270+
"""Score AST-derived SYMBOL nodes by connectivity + visibility.
271+
272+
source: Spike B' BUG #6 — previously no scorer existed; all AST symbols
273+
received the default 0.5. We rank symbols by how integrated they are
274+
into the graph (`conns`) and whether they're externally callable
275+
(public vs private by name convention) — well-connected public symbols
276+
are the "important" surface of a codebase.
277+
"""
278+
name = n.get("name", "") or n.get("label", "")
279+
sym_type = n.get("symbol_type", "")
280+
is_private = isinstance(name, str) and name.startswith("_") and not name.startswith("__")
281+
282+
parts: list[str] = []
283+
q = 0.0
284+
285+
# Connectivity: highly-connected symbols are central
286+
if conns >= 10:
287+
q += 0.5
288+
parts.append(f"central ({conns} edges)")
289+
elif conns >= 3:
290+
q += 0.3
291+
parts.append(f"connected ({conns} edges)")
292+
elif conns >= 1:
293+
q += 0.15
294+
parts.append(f"{conns} edges")
295+
else:
296+
parts.append("isolated")
297+
298+
# Visibility: public symbols are the codebase's external surface
299+
if not is_private:
300+
q += 0.2
301+
parts.append("public")
302+
else:
303+
parts.append("private")
304+
305+
# Symbol type bonus: classes/traits anchor inheritance trees
306+
if sym_type in ("class", "struct", "trait"):
307+
q += 0.15
308+
parts.append(f"{sym_type}")
309+
elif sym_type:
310+
parts.append(sym_type)
311+
312+
# Has signature — indicates parser captured full surface
313+
if n.get("signature"):
314+
q += 0.05
315+
316+
return min(max(q, 0.0), 1.0), " | ".join(parts)

0 commit comments

Comments
 (0)