From c08790bd839e0f3969dbb89d35afcbf53f62793f Mon Sep 17 00:00:00 2001 From: dferre97 Date: Mon, 28 Jul 2025 15:24:59 +0200 Subject: [PATCH 1/6] Add coalesce_graph function for COO graphs and is_coalesced flag --- GNNGraphs/src/GNNGraphs.jl | 1 + GNNGraphs/src/gnngraph.jl | 14 ++++++++++++++ GNNGraphs/src/transform.jl | 25 +++++++++++++++++++++++++ GNNGraphs/src/utils.jl | 2 +- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/GNNGraphs/src/GNNGraphs.jl b/GNNGraphs/src/GNNGraphs.jl index 1c4947d45..28ac7a4e3 100644 --- a/GNNGraphs/src/GNNGraphs.jl +++ b/GNNGraphs/src/GNNGraphs.jl @@ -76,6 +76,7 @@ export add_nodes, remove_self_loops, remove_edges, remove_multi_edges, + coalesce_graph, set_edge_weight, to_bidirected, to_unidirected, diff --git a/GNNGraphs/src/gnngraph.jl b/GNNGraphs/src/gnngraph.jl index 0a818b4d2..b90cfc032 100644 --- a/GNNGraphs/src/gnngraph.jl +++ b/GNNGraphs/src/gnngraph.jl @@ -113,6 +113,20 @@ struct GNNGraph{T <: Union{COO_T, ADJMAT_T}} <: AbstractGNNGraph{T} ndata::DataStore edata::DataStore gdata::DataStore + is_coalesced::Bool # only for :coo, true if the graph is coalesced, i.e., indices ordered by row and no multi edges +end + +# GNNGraph constructor setting the is_coalesced field to false +function GNNGraph(graph::T, + num_nodes::Int, + num_edges::Int, + num_graphs::Int, + graph_indicator::Union{Nothing, AVecI}, + ndata::DataStore, + edata::DataStore, + gdata::DataStore) where {T <: Union{COO_T, ADJMAT_T}} + return GNNGraph{T}(graph, num_nodes, num_edges, num_graphs, + graph_indicator, ndata, edata, gdata, false) end function GNNGraph(data::D; diff --git a/GNNGraphs/src/transform.jl b/GNNGraphs/src/transform.jl index afc94b517..4a8ea3480 100644 --- a/GNNGraphs/src/transform.jl +++ b/GNNGraphs/src/transform.jl @@ -184,6 +184,31 @@ function remove_multi_edges(g::GNNGraph{<:COO_T}; aggr = +) g.ndata, edata, g.gdata) end +""" + coalesce_graph(g::GNNGraph) + +Return a new GNNGraph where all multiple edges between the same pair of nodes are merged (using aggr for edge weights and features), and the edge indices are sorted lexicographically (by source, then target). +This method is only applicable to graphs of type `:coo`. +""" +function coalesce_graph(g::GNNGraph{<:COO_T}, aggr = +) + # remove multi-edges first + g = remove_multi_edges(g, aggr = aggr) + # Order indices using sort_edge_index + s, t = edge_index(g) + w = get_edge_weight(g) + edata = g.edata + s, t, perm = sort_edge_index(s, t) + + w = isnothing(w) ? nothing : getobs(w, perm) + edata = getobs(edata, perm) + + # Create a new GNNGraph with the sorted indices and no multi edges + return GNNGraph((s, t, w), + g.num_nodes, length(s), g.num_graphs, + g.graph_indicator, + g.ndata, edata, g.gdata, true) +end + """ remove_nodes(g::GNNGraph, nodes_to_remove::AbstractVector) diff --git a/GNNGraphs/src/utils.jl b/GNNGraphs/src/utils.jl index 9b12f43f1..fb5c87712 100644 --- a/GNNGraphs/src/utils.jl +++ b/GNNGraphs/src/utils.jl @@ -41,7 +41,7 @@ are sorted first according to the `ui` and then according to `vi`. function sort_edge_index(u, v) uv = collect(zip(u, v)) p = sortperm(uv) # isless lexicographically defined for tuples - return u[p], v[p] + return (u = u[p], v = v[p], perm = p) end From f02209e4b515ce3cdcd718b8c5fe9bab64db2f13 Mon Sep 17 00:00:00 2001 From: dferre97 Date: Mon, 28 Jul 2025 18:44:22 +0200 Subject: [PATCH 2/6] Simplify coalesce_graph function --- GNNGraphs/src/transform.jl | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/GNNGraphs/src/transform.jl b/GNNGraphs/src/transform.jl index 4a8ea3480..aaf1ed3ae 100644 --- a/GNNGraphs/src/transform.jl +++ b/GNNGraphs/src/transform.jl @@ -181,7 +181,7 @@ function remove_multi_edges(g::GNNGraph{<:COO_T}; aggr = +) return GNNGraph((s, t, w), g.num_nodes, num_edges, g.num_graphs, g.graph_indicator, - g.ndata, edata, g.gdata) + g.ndata, edata, g.gdata, true) end """ @@ -191,22 +191,7 @@ Return a new GNNGraph where all multiple edges between the same pair of nodes ar This method is only applicable to graphs of type `:coo`. """ function coalesce_graph(g::GNNGraph{<:COO_T}, aggr = +) - # remove multi-edges first - g = remove_multi_edges(g, aggr = aggr) - # Order indices using sort_edge_index - s, t = edge_index(g) - w = get_edge_weight(g) - edata = g.edata - s, t, perm = sort_edge_index(s, t) - - w = isnothing(w) ? nothing : getobs(w, perm) - edata = getobs(edata, perm) - - # Create a new GNNGraph with the sorted indices and no multi edges - return GNNGraph((s, t, w), - g.num_nodes, length(s), g.num_graphs, - g.graph_indicator, - g.ndata, edata, g.gdata, true) + return remove_multi_edges(g, aggr = aggr) end """ From 2dec8086c86123cf543542a277e8b1151c9c514f Mon Sep 17 00:00:00 2001 From: dferre97 Date: Mon, 28 Jul 2025 18:44:54 +0200 Subject: [PATCH 3/6] Revert back to old sort_edge_index function --- GNNGraphs/src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GNNGraphs/src/utils.jl b/GNNGraphs/src/utils.jl index fb5c87712..9b12f43f1 100644 --- a/GNNGraphs/src/utils.jl +++ b/GNNGraphs/src/utils.jl @@ -41,7 +41,7 @@ are sorted first according to the `ui` and then according to `vi`. function sort_edge_index(u, v) uv = collect(zip(u, v)) p = sortperm(uv) # isless lexicographically defined for tuples - return (u = u[p], v = v[p], perm = p) + return u[p], v[p] end From 54d2fd1da6f7463be4ffb2421120a69ca6c5bc51 Mon Sep 17 00:00:00 2001 From: dferre97 Date: Wed, 6 Aug 2025 11:19:09 +0200 Subject: [PATCH 4/6] Rename remove_multi_edges to coalesce, update references, add deprecation --- GNNGraphs/src/GNNGraphs.jl | 3 +-- GNNGraphs/src/deprecations.jl | 2 ++ GNNGraphs/src/transform.jl | 29 +++++++++-------------------- GNNGraphs/test/transform.jl | 6 +++--- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/GNNGraphs/src/GNNGraphs.jl b/GNNGraphs/src/GNNGraphs.jl index 28ac7a4e3..783d98b15 100644 --- a/GNNGraphs/src/GNNGraphs.jl +++ b/GNNGraphs/src/GNNGraphs.jl @@ -75,8 +75,7 @@ export add_nodes, rand_edge_split, remove_self_loops, remove_edges, - remove_multi_edges, - coalesce_graph, + coalesce, set_edge_weight, to_bidirected, to_unidirected, diff --git a/GNNGraphs/src/deprecations.jl b/GNNGraphs/src/deprecations.jl index d6a8e1797..04273cf4d 100644 --- a/GNNGraphs/src/deprecations.jl +++ b/GNNGraphs/src/deprecations.jl @@ -11,3 +11,5 @@ function Base.getproperty(vds::Vector{DataStore}, s::Symbol) return [getdata(ds)[s] for ds in vds] end end + +@deprecate remove_multi_edges(g::GNNGraph{<:COO_T}; aggr = +) Base.coalesce(g; aggr = aggr) \ No newline at end of file diff --git a/GNNGraphs/src/transform.jl b/GNNGraphs/src/transform.jl index aaf1ed3ae..ce8d90b6a 100644 --- a/GNNGraphs/src/transform.jl +++ b/GNNGraphs/src/transform.jl @@ -44,7 +44,7 @@ end Return a graph constructed from `g` where self-loops (edges from a node to itself) are removed. -See also [`add_self_loops`](@ref) and [`remove_multi_edges`](@ref). +See also [`add_self_loops`](@ref) and [`coalesce`](@ref). """ function remove_self_loops(g::GNNGraph{<:COO_T}) s, t = edge_index(g) @@ -146,15 +146,14 @@ function remove_edges(g::GNNGraph{<:COO_T}, p = 0.5) end """ - remove_multi_edges(g::GNNGraph; aggr=+) + coalesce(g::GNNGraph; aggr=+) -Remove multiple edges (also called parallel edges or repeated edges) from graph `g`. -Possible edge features are aggregated according to `aggr`, that can take value -`+`,`min`, `max` or `mean`. +Return a new GNNGraph where all multiple edges between the same pair of nodes are merged (using aggr for edge weights and features), and the edge indices are sorted lexicographically (by source, then target). +This method is only applicable to graphs of type `:coo`. -See also [`remove_self_loops`](@ref), [`has_multi_edges`](@ref), and [`to_bidirected`](@ref). +`aggr` can take value `+`,`min`, `max` or `mean`. """ -function remove_multi_edges(g::GNNGraph{<:COO_T}; aggr = +) +function Base.coalesce(g::GNNGraph{<:COO_T}; aggr = +) s, t = edge_index(g) w = get_edge_weight(g) edata = g.edata @@ -184,16 +183,6 @@ function remove_multi_edges(g::GNNGraph{<:COO_T}; aggr = +) g.ndata, edata, g.gdata, true) end -""" - coalesce_graph(g::GNNGraph) - -Return a new GNNGraph where all multiple edges between the same pair of nodes are merged (using aggr for edge weights and features), and the edge indices are sorted lexicographically (by source, then target). -This method is only applicable to graphs of type `:coo`. -""" -function coalesce_graph(g::GNNGraph{<:COO_T}, aggr = +) - return remove_multi_edges(g, aggr = aggr) -end - """ remove_nodes(g::GNNGraph, nodes_to_remove::AbstractVector) @@ -451,7 +440,7 @@ end to_bidirected(g) Adds a reverse edge for each edge in the graph, then calls -[`remove_multi_edges`](@ref) with `mean` aggregation to simplify the graph. +[`coalesce`](@ref) with `mean` aggregation to simplify the graph. See also [`is_bidirected`](@ref). @@ -515,7 +504,7 @@ function to_bidirected(g::GNNGraph{<:COO_T}) g.graph_indicator, g.ndata, edata, g.gdata) - return remove_multi_edges(g; aggr = mean) + return coalesce(g; aggr = mean) end """ @@ -535,7 +524,7 @@ function to_unidirected(g::GNNGraph{<:COO_T}) g.graph_indicator, g.ndata, g.edata, g.gdata) - return remove_multi_edges(g; aggr = mean) + return coalesce(g; aggr = mean) end function Graphs.SimpleGraph(g::GNNGraph) diff --git a/GNNGraphs/test/transform.jl b/GNNGraphs/test/transform.jl index 275efe006..7606ffb1f 100644 --- a/GNNGraphs/test/transform.jl +++ b/GNNGraphs/test/transform.jl @@ -345,7 +345,7 @@ end end end -@testitem "remove_multi_edges" setup=[GraphsTestModule] begin +@testitem "coalesce" setup=[GraphsTestModule] begin using .GraphsTestModule for GRAPH_T in GRAPH_TYPES if GRAPH_T == :coo @@ -353,14 +353,14 @@ end s, t = edge_index(g) g1 = add_edges(g, s[1:5], t[1:5]) @test g1.num_edges == g.num_edges + 5 - g2 = remove_multi_edges(g1, aggr = +) + g2 = coalesce(g1, aggr = +) @test g2.num_edges == g.num_edges @test sort_edge_index(edge_index(g2)) == sort_edge_index(edge_index(g)) # Default aggregation is + g1 = GNNGraph(g1, edata = (e1 = ones(3, g1.num_edges), e2 = 2 * ones(g1.num_edges))) g1 = set_edge_weight(g1, 3 * ones(g1.num_edges)) - g2 = remove_multi_edges(g1) + g2 = coalesce(g1) @test g2.num_edges == g.num_edges @test sort_edge_index(edge_index(g2)) == sort_edge_index(edge_index(g)) @test count(g2.edata.e1[:, i] == 2 * ones(3) for i in 1:(g2.num_edges)) == 5 From 773ec4d76c49a1e7201cafdfe85c2a51d10618c1 Mon Sep 17 00:00:00 2001 From: dferre97 Date: Mon, 11 Aug 2025 08:59:45 +0200 Subject: [PATCH 5/6] Add is_coalesced function to check if graph is coalesced --- GNNGraphs/src/query.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/GNNGraphs/src/query.jl b/GNNGraphs/src/query.jl index b427c5916..e2e5f9bdb 100644 --- a/GNNGraphs/src/query.jl +++ b/GNNGraphs/src/query.jl @@ -103,6 +103,18 @@ Graphs.ne(g::GNNGraph) = g.num_edges Graphs.has_vertex(g::GNNGraph, i::Int) = 1 <= i <= g.num_nodes Graphs.vertices(g::GNNGraph) = 1:(g.num_nodes) +""" + is_coalesced(g::GNNGraph) -> Bool + +Check whether the given `GNNGraph` `g` is coalesced (see [`coalesce`](@ref)). Only meaningful for COO graphs. + +# Arguments +- `g::GNNGraph`: The graph to check. + +# Returns +- `Bool`: Whether the graph is coalesced. If the graph is not of type COO, this function will always return `false`. +""" +is_coalesced(g::GNNGraph) = g.is_coalesced """ neighbors(g::GNNGraph, i::Integer; dir=:out) From 262373b07d855e803ed61901e743a053c6d0af6c Mon Sep 17 00:00:00 2001 From: dferre97 Date: Tue, 12 Aug 2025 11:12:20 +0200 Subject: [PATCH 6/6] Add tests to verify is_coalesced for various graph transformations --- GNNGraphs/test/transform.jl | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/GNNGraphs/test/transform.jl b/GNNGraphs/test/transform.jl index 7606ffb1f..256d851bf 100644 --- a/GNNGraphs/test/transform.jl +++ b/GNNGraphs/test/transform.jl @@ -714,3 +714,51 @@ end end end end + +@testitem "graph transform ops set is_coalesced=false" setup=[GraphsTestModule] begin + using .GraphsTestModule + g = rand_graph(5, 10, graph_type=:coo) + g = coalesce(g) # ensure the graph is coalesced to start with + + # add_self_loops + g1 = add_self_loops(g) + @test g1.is_coalesced == false + + # remove_self_loops + g2 = add_self_loops(g) # ensure there are self-loops to remove + g2 = remove_self_loops(g2) + @test g2.is_coalesced == false + + # remove_edges + g3 = remove_edges(g, [1]) + @test g3.is_coalesced == false + + # add_edges + g4 = add_edges(g, [1], [2]) + @test g4.is_coalesced == false + + # perturb_edges + g5 = perturb_edges(g, 0.5) + @test g5.is_coalesced == false + + # remove_nodes + g6 = remove_nodes(g, [1]) + @test g6.is_coalesced == false + + # add_nodes + g7 = add_nodes(g, 2) + @test g7.is_coalesced == false + + # rand_edge_split returns two graphs + g8a, g8b = rand_edge_split(g, 0.5) + @test g8a.is_coalesced == false + @test g8b.is_coalesced == false + + # negative_sample + g9 = negative_sample(g, num_neg_edges=3) + @test g9.is_coalesced == false + + # ppr_diffusion + g11 = ppr_diffusion(g) + @test g11.is_coalesced == false +end