GraphNeuralNetworks.jl/src/GNNGraphs/gnngraph.jl at 277f3ce1c09bd31d556561a02a276b67cbabaeb6 · JuliaGraphs/GraphNeuralNetworks.jl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#===================================
Define GNNGraph type as a subtype of Graphs' AbstractGraph.
For the core methods to be implemented by any AbstractGraph, see
https://juliagraphs.org/Graphs.jl/latest/types/#AbstractGraph-Type
https://juliagraphs.org/Graphs.jl/latest/developing/#Developing-Alternate-Graph-Types
=============================================#

const COO_T = Tuple{T, T, V} where {T <: AbstractVector, V}
const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
const ADJMAT_T = AbstractMatrix
const SPARSE_T = AbstractSparseMatrix # subset of ADJMAT_T
const CUMAT_T = Union{CUDA.AnyCuMatrix, CUDA.CUSPARSE.CuSparseMatrix}


"""
    GNNGraph(data; [graph_type, ndata, edata, gdata, num_nodes, graph_indicator, dir])
    GNNGraph(g::GNNGraph; [ndata, edata, gdata])

A type representing a graph structure that also stores
feature arrays associated to nodes, edges, and the graph itself.

A `GNNGraph` can be constructed out of different `data` objects
expressing the connections inside the graph. The internal representation type
is determined by `graph_type`.

When constructed from another `GNNGraph`, the internal graph representation
is preserved and shared. The node/edge/graph features are retained
as well, unless explicitely set by the keyword arguments
`ndata`, `edata`, and `gdata`.

A `GNNGraph` can also represent multiple graphs batched togheter
(see [`Flux.batch`](@ref) or [`SparseArrays.blockdiag`](@ref)).
The field `g.graph_indicator` contains the graph membership
of each node.

`GNNGraph`s are always directed graphs, therefore each edge is defined
by a source node and a target node (see [`edge_index`](@ref)).
Self loops (edges connecting a node to itself) and multiple edges
(more than one edge between the same pair of nodes) are supported.

A `GNNGraph` is a Graphs.jl's `AbstractGraph`, therefore it supports most
functionality from that library.

# Arguments

- `data`: Some data representing the graph topology. Possible type are
    - An adjacency matrix
    - An adjacency list.
    - A tuple containing the source and target vectors (COO representation)
    - A Graphs' graph.
- `graph_type`: A keyword argument that specifies
                the underlying representation used by the GNNGraph.
                Currently supported values are
    - `:coo`. Graph represented as a tuple `(source, target)`, such that the `k`-th edge
              connects the node `source[k]` to node `target[k]`.
              Optionally, also edge weights can be given: `(source, target, weights)`.
    - `:sparse`. A sparse adjacency matrix representation.
    - `:dense`. A dense adjacency matrix representation.
    Defaults to `:coo`, currently the most supported type.
- `dir`: The assumed edge direction when given adjacency matrix or adjacency list input data `g`.
        Possible values are `:out` and `:in`. Default `:out`.
- `num_nodes`: The number of nodes. If not specified, inferred from `g`. Default `nothing`.
- `graph_indicator`: For batched graphs, a vector containing the graph assigment of each node. Default `nothing`.
- `ndata`: Node features. A named tuple of arrays whose last dimension has size `num_nodes`.
- `edata`: Edge features. A named tuple of arrays whose last dimension has size `num_edges`.
- `gdata`: Graph features. A named tuple of arrays whose last dimension has size `num_graphs`.

# Usage.

```julia
using Flux, GraphNeuralNetworks

# Construct from adjacency list representation
data = [[2,3], [1,4,5], [1], [2,5], [2,4]]
g = GNNGraph(data)

# Number of nodes, edges, and batched graphs
g.num_nodes  # 5
g.num_edges  # 10
g.num_graphs # 1

# Same graph in COO representation
s = [1,1,2,2,2,3,4,4,5,5]
t = [2,3,1,4,5,3,2,5,2,4]
g = GNNGraph(s, t)

# From a Graphs' graph
g = GNNGraph(erdos_renyi(100, 20))

# Add 2 node feature arrays
g = GNNGraph(g, ndata = (x=rand(100, g.num_nodes), y=rand(g.num_nodes)))

# Add node features and edge features with default names `x` and `e`
g = GNNGraph(g, ndata = rand(100, g.num_nodes), edata = rand(16, g.num_edges))

g.ndata.x
g.ndata.e

# Send to gpu
g = g |> gpu

# Collect edges' source and target nodes.
# Both source and target are vectors of length num_edges
source, target = edge_index(g)
```
"""
struct GNNGraph{T<:Union{COO_T,ADJMAT_T}} <: AbstractGraph{Int}
    graph::T
    num_nodes::Int
    num_edges::Int
    num_graphs::Int
    graph_indicator       # vector of ints or nothing
    ndata::NamedTuple
    edata::NamedTuple
    gdata::NamedTuple
end

@functor GNNGraph

function GNNGraph(data;
                        num_nodes = nothing,
                        graph_indicator = nothing,
                        graph_type = :coo,
                        dir = :out,
                        ndata = (;),
                        edata = (;),
                        gdata = (;),
                        )

    @assert graph_type ∈ [:coo, :dense, :sparse] "Invalid graph_type $graph_type requested"
    @assert dir ∈ [:in, :out]

    if graph_type == :coo
        graph, num_nodes, num_edges = to_coo(data; num_nodes, dir)
    elseif graph_type == :dense
        graph, num_nodes, num_edges = to_dense(data; dir)
    elseif graph_type == :sparse
        graph, num_nodes, num_edges = to_sparse(data; dir)
    end

    num_graphs = !isnothing(graph_indicator) ? maximum(graph_indicator) : 1

    ndata = normalize_graphdata(ndata, default_name=:x, n=num_nodes)
    edata = normalize_graphdata(edata, default_name=:e, n=num_edges, duplicate_if_needed=true)
    gdata = normalize_graphdata(gdata, default_name=:u, n=num_graphs)

    GNNGraph(graph,
            num_nodes, num_edges, num_graphs,
            graph_indicator,
            ndata, edata, gdata)
end

# COO convenience constructors
GNNGraph(s::AbstractVector, t::AbstractVector, v = nothing; kws...) = GNNGraph((s, t, v); kws...)
GNNGraph((s, t)::NTuple{2}; kws...) = GNNGraph((s, t, nothing); kws...)

# GNNGraph(g::AbstractGraph; kws...) = GNNGraph(adjacency_matrix(g, dir=:out); kws...)

function GNNGraph(g::AbstractGraph; kws...)
    s = Graphs.src.(Graphs.edges(g))
    t = Graphs.dst.(Graphs.edges(g))
    if !Graphs.is_directed(g)
        # add reverse edges since GNNGraph is directed
        s, t = [s; t], [t; s]
    end
    GNNGraph((s, t); num_nodes=Graphs.nv(g), kws...)
end


function GNNGraph(g::GNNGraph; ndata=g.ndata, edata=g.edata, gdata=g.gdata, graph_type=nothing)

    ndata = normalize_graphdata(ndata, default_name=:x, n=g.num_nodes)
    edata = normalize_graphdata(edata, default_name=:e, n=g.num_edges, duplicate_if_needed=true)
    gdata = normalize_graphdata(gdata, default_name=:u, n=g.num_graphs)

    if !isnothing(graph_type)
        if graph_type == :coo
            graph, num_nodes, num_edges = to_coo(g.graph; g.num_nodes)
        elseif graph_type == :dense
            graph, num_nodes, num_edges = to_dense(g.graph)
        elseif graph_type == :sparse
            graph, num_nodes, num_edges = to_sparse(g.graph)
        end
        @assert num_nodes == g.num_nodes
        @assert num_edges == g.num_edges
    else
        graph = g.graph
    end
    GNNGraph(graph,
            g.num_nodes, g.num_edges, g.num_graphs,
            g.graph_indicator,
            ndata, edata, gdata)
end

function Base.show(io::IO, g::GNNGraph{T}) where T
    println(io, "GNNGraph{$T}:
    num_nodes = $(g.num_nodes)
    num_edges = $(g.num_edges)
    num_graphs = $(g.num_graphs)")
    println(io, "    ndata:")
    for k in keys(g.ndata)
        println(io, "        $k => $(size(g.ndata[k]))")
    end
    println(io, "    edata:")
    for k in keys(g.edata)
        println(io, "        $k => $(size(g.edata[k]))")
    end
    println(io, "    gdata:")
    for k in keys(g.gdata)
        println(io, "        $k => $(size(g.gdata[k]))")
    end
end

### StatsBase/LearnBase compatibility
StatsBase.nobs(g::GNNGraph) = g.num_graphs
LearnBase.getobs(g::GNNGraph, i) = getgraph(g, i)

# Flux's Dataloader compatibility. Related PR https://github.com/FluxML/Flux.jl/pull/1683
Flux.Data._nobs(g::GNNGraph) = g.num_graphs
Flux.Data._getobs(g::GNNGraph, i) = getgraph(g, i)

#########################
Base.:(==)(g1::GNNGraph, g2::GNNGraph) = all(k -> getfield(g1,k)==getfield(g2,k), fieldnames(typeof(g1)))