Skip to content

Commit bb8456e

Browse files
ECG Community Detection implementation (#502)
1 parent e4194c0 commit bb8456e

6 files changed

Lines changed: 286 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
We follow SemVer as most of the Julia ecosystem. Below you might see the "breaking" label even for minor version bumps -- we use it a bit more loosely to denote things that are not breaking by SemVer's definition but might cause breakage to people using internal or experimental APIs or undocumented implementation details.
44

55
## unreleased
6-
76
- `is_articulation(g, v)` for checking whether a single vertex is an articulation point
87
- The iFUB algorithm is used for faster diameter calculation and now supports weighted graph diameter calculation
8+
- ECG community detection algorithm
99

1010
## v1.14.0 - 2026-02-26
1111
- **(breaking)** `neighbors`, `inneighbors`, and `outneighbors` now return an immutable `FrozenVector` instead of `Vector`

docs/src/algorithms/community.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Pages = [
2020
"community/core-periphery.jl",
2121
"community/label_propagation.jl",
2222
"community/louvain.jl",
23+
"community/ecg.jl",
2324
"community/modularity.jl",
2425
"community/rich_club.jl",
2526
]

src/Graphs.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,8 @@ export
327327
triangles,
328328
label_propagation,
329329
louvain,
330+
ecg,
331+
ecg_weights,
330332
maximal_cliques,
331333
maximum_clique,
332334
clique_number,
@@ -553,6 +555,7 @@ include("centrality/radiality.jl")
553555
include("community/modularity.jl")
554556
include("community/label_propagation.jl")
555557
include("community/louvain.jl")
558+
include("community/ecg.jl")
556559
include("community/core-periphery.jl")
557560
include("community/clustering.jl")
558561
include("community/cliques.jl")

src/community/ecg.jl

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""
2+
ecg(g; γ=1, ensemble_size=16, min_edge_weight=0.05, min_weight_outside_2core=true, distmx=weights(g), max_moves=1000, max_merges=1000, move_tol=1e-9, merge_tol=1e-9, rng=nothing, seed=nothing)
3+
4+
Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the
5+
proportion of time the endpoints are in the same cluster of a Louvain without merges before running
6+
a final Louvain to detect communities.
7+
8+
### Optional Arguments
9+
- `distmx=weights(g)`: distance matrix for weighted graphs
10+
- `ensemble_size=16`: the number of no merge Louvains in the ensemble
11+
- `min_edge_weight=0.05`: the minimum edge weight passed to the final Louvain (to retain the original topology).
12+
- `min_weight_outside_2core=true`: a flag to set the weight of edges outside the 2-core to the minimum value. If the graph is directed, the coreness is computed only using out degrees. Must be false is the graph has loops or parallel edges.
13+
- `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more
14+
communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it
15+
leads to the traditional definition of the modularity.
16+
- `max_moves=1000`: maximum number of rounds moving vertices before merging for each Louvain.
17+
- `max_merges=1000`: maximum number of merges in the final Louvain.
18+
- `move_tol=1e-9`: necessary increase of modularity to move a vertex in each Louvain.
19+
- `merge_tol=1e-9`: necessary increase of modularity in the move stage to merge in the final Louvain.
20+
- `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed.
21+
- `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed.
22+
23+
### References
24+
- [Valérie Poulin and François Théberge. Ensemble Clustering for Graphs: Comparisons and Applications. Applied Network Science, 4:4 (2019)][https://doi.org/10.1007/s41109-019-0162-z]
25+
26+
27+
# Examples
28+
```jldoctest
29+
julia> using Graphs
30+
31+
julia> barbell = blockdiag(complete_graph(3), complete_graph(3));
32+
33+
julia> add_edge!(barbell, 1, 4);
34+
35+
julia> ecg(barbell)
36+
6-element Vector{Int64}:
37+
1
38+
1
39+
1
40+
2
41+
2
42+
2
43+
44+
julia> ecg(barbell, γ=0.01)
45+
6-element Vector{Int64}:
46+
1
47+
1
48+
1
49+
1
50+
1
51+
1
52+
```
53+
"""
54+
function ecg(
55+
g::AbstractGraph{T};
56+
γ=1.0,
57+
ensemble_size::Integer=16,
58+
min_edge_weight::Real=0.05,
59+
min_weight_outside_2core::Bool=true,
60+
distmx::AbstractArray{<:Number}=weights(g),
61+
max_moves::Integer=1000,
62+
max_merges::Integer=1000,
63+
move_tol::Real=1e-9,
64+
merge_tol::Real=1e-9,
65+
rng::Union{Nothing,AbstractRNG}=nothing,
66+
seed::Union{Nothing,Integer}=nothing,
67+
) where {T}
68+
min_weight_outside_2core &&
69+
has_self_loops(g) &&
70+
throw(
71+
ArgumentError("min_weight_outside_2core must be false if the graph has loops.")
72+
)
73+
rng = rng_from_rng_or_seed(rng, seed)
74+
if nv(g) == 0
75+
return T[]
76+
end
77+
ensemble_weights = ecg_weights(
78+
g;
79+
γ=γ,
80+
ensemble_size=ensemble_size,
81+
distmx=distmx,
82+
max_moves=max_moves,
83+
move_tol=move_tol,
84+
rng=rng,
85+
)
86+
if min_weight_outside_2core
87+
corenum = core_number(g)
88+
indices = findall(
89+
i -> (corenum[i[1]] < 2) || (corenum[i[2]] < 2),
90+
CartesianIndices(ensemble_weights),
91+
)
92+
ensemble_weights[indices] .= 0.0
93+
end
94+
weights =
95+
(1-min_edge_weight)*ensemble_weights +
96+
min_edge_weight * adjacency_matrix(g, Float64)
97+
return louvain(
98+
g;
99+
γ=γ,
100+
distmx=weights,
101+
max_moves=max_moves,
102+
max_merges=max_merges,
103+
move_tol=move_tol,
104+
merge_tol=merge_tol,
105+
rng=rng,
106+
)
107+
end
108+
109+
"""
110+
ecg_weights(g; γ=1.0, ensemble_size=16, distmx=weights(g), max_moves=1000, move_tol=1e-9, rng=nothing, seed=nothing)
111+
112+
Compute edge weights via an ensemble of no merge Louvains. The weight of each edge is
113+
the proportion of time the endpoints are in the same community.
114+
"""
115+
function ecg_weights(
116+
g::AbstractGraph{T};
117+
γ=1.0,
118+
ensemble_size::Integer=16,
119+
distmx::AbstractArray{<:Number}=weights(g),
120+
max_moves::Integer=1000,
121+
move_tol::Real=1e-9,
122+
rng::Union{Nothing,AbstractRNG}=nothing,
123+
seed::Union{Nothing,Integer}=nothing,
124+
) where {T}
125+
rng = rng_from_rng_or_seed(rng, seed)
126+
# Create sparse adjacency matrix full of explicit zeros
127+
ensemble_weights = adjacency_matrix(g, Float64)
128+
ensemble_weights.nzval .= 0
129+
130+
for _ in 1:ensemble_size
131+
ensemble_communities = louvain(
132+
g;
133+
γ=γ,
134+
distmx=distmx,
135+
max_moves=max_moves,
136+
max_merges=0,
137+
move_tol=move_tol,
138+
rng=rng,
139+
)
140+
for e in edges(g)
141+
if ensemble_communities[src(e)] == ensemble_communities[dst(e)]
142+
ensemble_weights[src(e), dst(e)] += 1 / ensemble_size
143+
if !is_directed(g)
144+
ensemble_weights[dst(e), src(e)] += 1 / ensemble_size
145+
end
146+
end
147+
end
148+
end
149+
150+
return ensemble_weights
151+
end

test/community/ecg.jl

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
@testset "ECG" begin
2+
# Test ecg_weights
3+
# Undirected
4+
barbell = barbell_graph(3, 3)
5+
c = sparse(
6+
[
7+
0.0 1.0 1.0 0.0 0.0 0.0;
8+
1.0 0.0 1.0 0.0 0.0 0.0;
9+
1.0 1.0 0.0 0.0 0.0 0.0;
10+
0.0 0.0 0.0 0.0 1.0 1.0;
11+
0.0 0.0 0.0 1.0 0.0 1.0;
12+
0.0 0.0 0.0 1.0 1.0 0.0
13+
],
14+
)
15+
for g in test_generic_graphs(barbell)
16+
r = ecg_weights(g)
17+
dropzeros!(r)
18+
@test c == r
19+
end
20+
21+
# Empty, no edges
22+
empty = SimpleGraph(10)
23+
c = spzeros(10, 10)
24+
for g in test_generic_graphs(empty)
25+
r = @inferred ecg_weights(g)
26+
dropzeros!(r)
27+
@test c == r
28+
end
29+
30+
# Empty, no nodes
31+
empty = SimpleGraph()
32+
c = spzeros(0, 0)
33+
for g in test_generic_graphs(empty)
34+
r = @inferred ecg_weights(g)
35+
@test c == r
36+
end
37+
38+
# Undirected loops
39+
loops = complete_graph(2)
40+
add_edge!(loops, 1, 1)
41+
add_edge!(loops, 2, 2)
42+
c = sparse([
43+
2.0 0.0;
44+
0.0 2.0
45+
])
46+
for g in test_generic_graphs(loops)
47+
r = ecg_weights(g)
48+
dropzeros!(r)
49+
@test c == r
50+
end
51+
52+
# Directed
53+
triangle = SimpleDiGraph(3)
54+
add_edge!(triangle, 1, 2)
55+
add_edge!(triangle, 2, 3)
56+
add_edge!(triangle, 3, 1)
57+
58+
# Directed Loops
59+
barbell = blockdiag(triangle, triangle)
60+
add_edge!(barbell, 1, 4)
61+
c = sparse(
62+
[
63+
0.0 1.0 0.0 0.0 0.0 0.0;
64+
0.0 0.0 1.0 0.0 0.0 0.0;
65+
1.0 0.0 0.0 0.0 0.0 0.0;
66+
0.0 0.0 0.0 0.0 1.0 0.0;
67+
0.0 0.0 0.0 0.0 0.0 1.0;
68+
0.0 0.0 0.0 1.0 0.0 0.0
69+
],
70+
)
71+
for g in test_generic_graphs(barbell)
72+
r = ecg_weights(g)
73+
dropzeros!(r)
74+
@test r == c
75+
end
76+
77+
# Directed loops
78+
barbell = SimpleDiGraph(2)
79+
add_edge!(barbell, 1, 1)
80+
add_edge!(barbell, 2, 2)
81+
add_edge!(barbell, 1, 2)
82+
c = sparse([
83+
1.0 0.0;
84+
0.0 1.0
85+
])
86+
for g in test_generic_graphs(barbell)
87+
r = ecg_weights(g)
88+
dropzeros!(r)
89+
@test r == c
90+
end
91+
92+
# Test ECG
93+
# Undirected
94+
barbell = barbell_graph(3, 3)
95+
c = [1, 1, 1, 2, 2, 2]
96+
for g in test_generic_graphs(barbell)
97+
r = ecg(g)
98+
@test c == r
99+
end
100+
101+
# Directed
102+
triangle = SimpleDiGraph(3)
103+
add_edge!(triangle, 1, 2)
104+
add_edge!(triangle, 2, 3)
105+
add_edge!(triangle, 3, 1)
106+
107+
barbell = blockdiag(triangle, triangle)
108+
add_edge!(barbell, 1, 4)
109+
c = [1, 1, 1, 2, 2, 2]
110+
for g in test_generic_graphs(barbell)
111+
r = ecg(g)
112+
@test r == c
113+
end
114+
115+
# Empty, no edges
116+
empty = SimpleGraph(10)
117+
c = collect(1:10)
118+
for g in test_generic_graphs(empty)
119+
r = ecg(g)
120+
@test c == r
121+
end
122+
123+
# Empty, no nodes
124+
empty = SimpleGraph()
125+
for g in test_generic_graphs(empty)
126+
r = ecg(g)
127+
@test length(r) == 0
128+
end
129+
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ tests = [
118118
"traversals/all_simple_paths",
119119
"community/cliques",
120120
"community/core-periphery",
121+
"community/ecg",
121122
"community/independent_sets",
122123
"community/label_propagation",
123124
"community/louvain",

0 commit comments

Comments
 (0)