@@ -45,83 +45,99 @@ func (p *Pipeline) passCommunities() {
4545 slog .Info ("pass.communities.done" , "communities" , communityCount , "member_of" , memberOfCount )
4646}
4747
48- // louvainCommunities implements a simplified Louvain algorithm for community detection.
48+ // louvainCommunities implements the Louvain algorithm for community detection.
49+ // Uses per-community degree accumulators for O(m) per iteration instead of O(N^2).
4950// Returns a map of community_id → []node_id.
5051func louvainCommunities (adj map [int64 ]map [int64 ]bool , allNodes map [int64 ]bool ) map [int ][]int64 {
5152 nodeCommunity := make (map [int64 ]int , len (allNodes ))
52- communityID := 0
53+ commID := 0
5354 for nodeID := range allNodes {
54- nodeCommunity [nodeID ] = communityID
55- communityID ++
55+ nodeCommunity [nodeID ] = commID
56+ commID ++
5657 }
5758
59+ // Pre-compute node degrees
60+ nodeDegree := make (map [int64 ]float64 , len (allNodes ))
5861 totalEdges := 0
59- for _ , neighbors := range adj {
62+ for nodeID , neighbors := range adj {
63+ nodeDegree [nodeID ] = float64 (len (neighbors ))
6064 totalEdges += len (neighbors )
6165 }
6266 m := float64 (totalEdges ) / 2.0
6367 if m == 0 {
6468 m = 1
6569 }
6670
71+ // Per-community accumulator: sum of degrees of all members.
72+ // Updated incrementally when nodes move between communities.
73+ commSumTot := make (map [int ]float64 , len (allNodes ))
74+ for nodeID , comm := range nodeCommunity {
75+ commSumTot [comm ] = nodeDegree [nodeID ]
76+ }
77+
6778 improved := true
6879 for iteration := 0 ; improved && iteration < 50 ; iteration ++ {
69- improved = louvainIteration (adj , allNodes , nodeCommunity , m )
80+ improved = louvainIteration (adj , nodeCommunity , nodeDegree , commSumTot , m )
7081 }
7182
7283 return groupAndFilter (nodeCommunity )
7384}
7485
7586// louvainIteration runs one pass of greedy modularity optimization.
76- // Returns true if any node changed community.
77- func louvainIteration (adj map [int64 ]map [int64 ]bool , allNodes map [int64 ]bool , nodeCommunity map [int64 ]int , m float64 ) bool {
87+ // For each node, computes modularity gain for neighboring communities in O(degree)
88+ // using pre-maintained commSumTot accumulators. Returns true if any node moved.
89+ func louvainIteration (
90+ adj map [int64 ]map [int64 ]bool ,
91+ nodeCommunity map [int64 ]int ,
92+ nodeDegree map [int64 ]float64 ,
93+ commSumTot map [int ]float64 ,
94+ m float64 ,
95+ ) bool {
7896 improved := false
79- for nodeID := range allNodes {
97+ m2 := 2.0 * m * m
98+
99+ for nodeID , neighbors := range adj {
80100 currentComm := nodeCommunity [nodeID ]
101+ ki := nodeDegree [nodeID ]
81102
82- neighborComms := make (map [int ]bool )
83- for neighborID := range adj [nodeID ] {
84- neighborComms [nodeCommunity [neighborID ]] = true
103+ // Aggregate edges to each neighboring community: O(degree)
104+ edgesToComm := make (map [int ]float64 , len (neighbors ))
105+ for neighborID := range neighbors {
106+ edgesToComm [nodeCommunity [neighborID ]]++
85107 }
86108
87- bestComm , bestGain := currentComm , 0.0
88- ki := float64 (len (adj [nodeID ]))
109+ // Remove self from current community for fair comparison
110+ commSumTot [currentComm ] -= ki
111+ kiInCurrent := edgesToComm [currentComm ]
112+ removeCost := kiInCurrent / m - ki * commSumTot [currentComm ]/ m2
113+
114+ bestComm := currentComm
115+ bestGain := 0.0
89116
90- for comm := range neighborComms {
117+ for comm , kiIn := range edgesToComm {
91118 if comm == currentComm {
92119 continue
93120 }
94- gain := modularityGain ( nodeID , comm , adj , nodeCommunity , ki , m )
121+ gain := kiIn / m - ki * commSumTot [ comm ] / m2 - removeCost
95122 if gain > bestGain {
96123 bestGain = gain
97124 bestComm = comm
98125 }
99126 }
100127
128+ // Restore / update accumulator
101129 if bestComm != currentComm && bestGain > 1e-10 {
102130 nodeCommunity [nodeID ] = bestComm
131+ commSumTot [bestComm ] += ki
132+ // currentComm already had ki subtracted
103133 improved = true
134+ } else {
135+ commSumTot [currentComm ] += ki // restore
104136 }
105137 }
106138 return improved
107139}
108140
109- // modularityGain calculates the gain from moving nodeID to targetComm.
110- func modularityGain (nodeID int64 , targetComm int , adj map [int64 ]map [int64 ]bool , nodeCommunity map [int64 ]int , ki , m float64 ) float64 {
111- kiIn := 0.0
112- sumTot := 0.0
113- for otherID , otherComm := range nodeCommunity {
114- if otherComm != targetComm {
115- continue
116- }
117- if adj [nodeID ][otherID ] {
118- kiIn ++
119- }
120- sumTot += float64 (len (adj [otherID ]))
121- }
122- return kiIn / m - ki * sumTot / (2 * m * m )
123- }
124-
125141// groupAndFilter groups nodes by community and filters out singletons.
126142func groupAndFilter (nodeCommunity map [int64 ]int ) map [int ][]int64 {
127143 communities := make (map [int ][]int64 )
0 commit comments