33These metrics help users understand their codebase structure at a glance.
44
55Public API:
6- god_nodes(G, top_n, min_degree) → list[GodNode]
7- surprising_connections(G, communities) → list[SurprisingConnection]
8- hub_files(G, top_n) → list[HubFile]
9- analyze(G, communities, cohesion_scores) → GraphReport
10- report_to_markdown(report) → str
6+ god_nodes(G, top_n, min_degree, project_paths) → list[GodNode]
7+ surprising_connections(G, communities) → list[SurprisingConnection]
8+ hub_files(G, top_n) → list[HubFile]
9+ analyze(G, communities, cohesion_scores,
10+ project_paths) → GraphReport
11+ report_to_markdown(report) → str
1112"""
1213
1314from __future__ import annotations
1415
16+ import os
17+ from collections import defaultdict
1518from dataclasses import dataclass , field
1619from typing import Optional
1720
2225
2326@dataclass
2427class GodNode :
25- """A node with unusually high degree (hub / bottleneck) ."""
26- node_id : str
27- label : str
28- type : str
29- in_degree : int
30- out_degree : int
31- degree : int
32- centrality : float
33- source_file : str
28+ """A directory with unusually high cross-boundary coupling ."""
29+ folder_path : str # relative path within project: "lib/utils" or "src-tauri/src"
30+ label : str # folder name: "utils"
31+ project : str # owning project: "desktop"
32+ child_count : int # number of nodes inside this folder
33+ in_degree : int # external → folder edges
34+ out_degree : int # folder → external edges
35+ degree : int # total cross-boundary edges
36+ centrality : float # degree / (total_nodes - child_count)
3437
3538
3639@dataclass
@@ -70,37 +73,123 @@ class GraphReport:
7073
7174# ── Analysis functions ────────────────────────────────────────────────────────
7275
76+ def _infer_project_paths (G : nx .DiGraph ) -> dict [str , str ]:
77+ """Infer project root paths from source_file attributes in the graph.
78+
79+ Groups nodes by their ``project`` attribute, then finds the common path
80+ prefix of all source_file directories within each project.
81+ """
82+ project_dirs : dict [str , list [str ]] = defaultdict (list )
83+ for _node_id , data in G .nodes (data = True ):
84+ sf = data .get ("source_file" , "" )
85+ proj = data .get ("project" , "" )
86+ if sf and proj :
87+ project_dirs [proj ].append (os .path .dirname (os .path .abspath (sf )))
88+
89+ result : dict [str , str ] = {}
90+ for proj , dirs in project_dirs .items ():
91+ if dirs :
92+ result [proj ] = os .path .commonpath (dirs )
93+ return result
94+
95+
7396def god_nodes (
7497 G : nx .DiGraph ,
7598 top_n : int = 20 ,
7699 min_degree : int = 5 ,
100+ project_paths : Optional [dict [str , str ]] = None ,
77101) -> list [GodNode ]:
78- """Find nodes with the highest degree (potential god classes / bottlenecks).
102+ """Find directories with the highest cross-boundary coupling.
103+
104+ Counts only edges that cross folder boundaries (cross-boundary edges).
105+ Intra-folder edges are ignored, so a single large wrapper file can no
106+ longer dominate solely because of its high node-level degree.
79107
80108 Args:
81109 G: the knowledge graph
82- top_n: return at most this many nodes
83- min_degree: minimum total degree to qualify
110+ top_n: return at most this many folders
111+ min_degree: minimum cross-boundary edge count to qualify
112+ project_paths: optional dict mapping project name → absolute project
113+ root path. When None, paths are inferred automatically
114+ from source_file attributes via ``_infer_project_paths``.
84115
85116 Returns:
86- List of GodNode sorted by degree descending.
117+ List of GodNode (folder-level) sorted by degree descending.
87118 """
88- centrality = nx .degree_centrality (G )
119+ if project_paths is None :
120+ project_paths = _infer_project_paths (G )
121+
122+ total_nodes = G .number_of_nodes ()
123+
124+ # Step 1: build node → (folder_key, folder_path, project) mapping.
125+ # folder_key uses "{project}/{rel}" for cross-project uniqueness.
126+ # folder_path stores only the relative portion shown in the report.
127+ node_folder_key : dict [str , str ] = {}
128+ key_to_rel : dict [str , str ] = {}
129+ key_to_project : dict [str , str ] = {}
89130
90- results : list [GodNode ] = []
91131 for node_id , data in G .nodes (data = True ):
92- deg = G .degree (node_id )
93- if deg < min_degree :
132+ sf = data .get ("source_file" , "" )
133+ proj = data .get ("project" , "" )
134+ if not sf :
135+ continue
136+ dirname = os .path .dirname (os .path .abspath (sf ))
137+ if proj and proj in project_paths :
138+ try :
139+ rel = os .path .relpath (dirname , project_paths [proj ])
140+ except ValueError :
141+ rel = dirname
142+ # Skip nodes whose source lives outside the project root
143+ if rel .startswith (".." ):
144+ rel = dirname
145+ else :
146+ rel = dirname
147+ key = f"{ proj } /{ rel } " if proj else rel
148+ node_folder_key [node_id ] = key
149+ key_to_rel [key ] = rel
150+ key_to_project [key ] = proj
151+
152+ # Step 2: count cross-boundary edges in a single pass.
153+ folder_in : dict [str , int ] = defaultdict (int )
154+ folder_out : dict [str , int ] = defaultdict (int )
155+ folder_children : dict [str , set ] = defaultdict (set )
156+
157+ for node_id in G .nodes ():
158+ fk = node_folder_key .get (node_id )
159+ if fk :
160+ folder_children [fk ].add (node_id )
161+
162+ for src , tgt in G .edges ():
163+ src_key = node_folder_key .get (src )
164+ tgt_key = node_folder_key .get (tgt )
165+ if src_key is None or tgt_key is None :
94166 continue
167+ if src_key != tgt_key :
168+ folder_out [src_key ] += 1
169+ folder_in [tgt_key ] += 1
170+
171+ # Step 3: filter, build GodNode list, sort.
172+ results : list [GodNode ] = []
173+ for folder_key in folder_children :
174+ in_d = folder_in .get (folder_key , 0 )
175+ out_d = folder_out .get (folder_key , 0 )
176+ degree = in_d + out_d
177+ if degree < min_degree :
178+ continue
179+ child_count = len (folder_children [folder_key ])
180+ centrality = degree / max (1 , total_nodes - child_count )
181+ rel = key_to_rel .get (folder_key , folder_key )
182+ proj = key_to_project .get (folder_key , "" )
183+ label = os .path .basename (rel ) if rel not in ("." , "" ) else "(root)"
95184 results .append (GodNode (
96- node_id = node_id ,
97- label = data . get ( " label" , node_id ) ,
98- type = data . get ( "type" , "unknown" ) ,
99- in_degree = G . in_degree ( node_id ) ,
100- out_degree = G . out_degree ( node_id ) ,
101- degree = deg ,
102- centrality = centrality . get ( node_id , 0.0 ) ,
103- source_file = data . get ( "source_file" , "" ) ,
185+ folder_path = rel ,
186+ label = label ,
187+ project = proj ,
188+ child_count = child_count ,
189+ in_degree = in_d ,
190+ out_degree = out_d ,
191+ degree = degree ,
192+ centrality = centrality ,
104193 ))
105194
106195 results .sort (key = lambda n : n .degree , reverse = True )
@@ -207,13 +296,16 @@ def analyze(
207296 G : nx .DiGraph ,
208297 communities : Optional [dict [str , int ]] = None ,
209298 cohesion_scores : Optional [dict [int , float ]] = None ,
299+ project_paths : Optional [dict [str , str ]] = None ,
210300) -> GraphReport :
211301 """Run all analyses and return a unified GraphReport.
212302
213303 Args:
214304 G: built knowledge graph (output of build.py + optional enrich.py)
215305 communities: optional community mapping from cluster.py
216306 cohesion_scores: optional per-community cohesion scores from cluster.score_all()
307+ project_paths: optional dict mapping project name → absolute project root path.
308+ When None, paths are inferred automatically from the graph.
217309 """
218310 report = GraphReport (
219311 node_count = G .number_of_nodes (),
@@ -224,7 +316,7 @@ def analyze(
224316 isolated_nodes = sum (1 for n in G .nodes () if G .degree (n ) == 0 ),
225317 )
226318
227- report .god_nodes = god_nodes (G )
319+ report .god_nodes = god_nodes (G , project_paths = project_paths )
228320 report .hub_files = hub_files (G )
229321
230322 if communities :
@@ -248,12 +340,14 @@ def report_to_markdown(report: GraphReport) -> str:
248340 ]
249341
250342 if report .god_nodes :
251- lines += ["## God Nodes (High Coupling)" , "" ]
252- lines .append (f"{ 'Node' :<40} { 'Type' :<12} { 'Degree' :>6} { 'Centrality' :>10} " )
253- lines .append ("-" * 72 )
343+ lines += ["## God Nodes (High-Coupling Directories)" , "" ]
344+ lines .append (
345+ f"{ 'Folder' :<44} { 'Project' :<12} { 'Cross-Edges' :>11} { 'Children' :>8} { 'Centrality' :>10} "
346+ )
347+ lines .append ("-" * 89 )
254348 for gn in report .god_nodes [:10 ]:
255349 lines .append (
256- f"{ gn .label :<40 } { gn .type :<12} { gn .degree :>6 } { gn .centrality :>10.4f} "
350+ f"{ gn .folder_path :<44 } { gn .project :<12} { gn .degree :>11 } { gn . child_count :>8 } { gn .centrality :>10.4f} "
257351 )
258352 lines .append ("" )
259353
0 commit comments