fix egnn + equivariance/invariance tests

GiovanniCanali · dario-coscia · GiovanniCanali · commit d1f2094209b6 · 2025-06-01T14:18:02.000+02:00
Co-authored-by: Dario Coscia &lt;dariocos99@gmail.com&gt;
diff --git a/pina/model/block/message_passing/en_equivariant_network_block.py b/pina/model/block/message_passing/en_equivariant_network_block.py
@@ -10,26 +10,21 @@
 class EnEquivariantNetworkBlock(MessagePassing):
     """
     Implementation of the E(n) Equivariant Graph Neural Network block.
-
     This block is used to perform message-passing between nodes and edges in a
     graph neural network, following the scheme proposed by Satorras et al. in
     2021. It serves as an inner block in a larger graph neural network
     architecture.
-
     The message between two nodes connected by an edge is computed by applying a
     linear transformation to the sender node features and the edge features,
     together with the squared euclidean distance between the sender and
     recipient node positions, followed by a non-linear activation function.
     Messages are then aggregated using an aggregation scheme (e.g., sum, mean,
     min, max, or product).
-
     The update step is performed by applying another MLP to the concatenation of
     the incoming messages and the node features. Here, also the node
     positions are updated by adding the incoming messages divided by the
     degree of the recipient node.
-
     .. seealso::
-
         **Original reference** Satorras, V. G., Hoogeboom, E., Welling, M.
         (2021). *E(n) Equivariant Graph Neural Networks.*
         In International Conference on Machine Learning.
@@ -51,7 +46,6 @@ def __init__(
     ):
         """
         Initialization of the :class:`EnEquivariantNetworkBlock` class.
-
         :param int node_feature_dim: The dimension of the node features.
         :param int edge_feature_dim: The dimension of the edge features.
         :param int pos_dim: The dimension of the position features.
@@ -102,14 +96,24 @@ def __init__(
         )
 
         # Layer for updating the node features
-        self.update_net = FeedForward(
+        self.update_feat_net = FeedForward(
             input_dimensions=node_feature_dim + pos_dim,
             output_dimensions=node_feature_dim,
             inner_size=hidden_dim,
             n_layers=n_update_layers,
             func=activation,
         )
 
+        # Layer for updating the node positions
+        # The output dimension is set to 1 for equivariant updates
+        self.update_pos_net = FeedForward(
+            input_dimensions=pos_dim,
+            output_dimensions=1,
+            inner_size=hidden_dim,
+            n_layers=n_update_layers,
+            func=activation,
+        )
+
     def forward(self, x, pos, edge_index, edge_attr=None):
         """
         Forward pass of the block, triggering the message-passing routine.
@@ -143,22 +147,62 @@ def message(self, x_i, x_j, pos_i, pos_j, edge_attr):
         :param edge_attr: The edge attributes.
         :type edge_attr: torch.Tensor | LabelTensor
         :return: The message to be passed.
-        :rtype: torch.Tensor
+        :rtype: tuple(torch.Tensor, torch.Tensor)
         """
-        dist = torch.norm(pos_i - pos_j, dim=-1, keepdim=True) ** 2
+        # Compute the euclidean distance between the sender and recipient nodes
+        diff = pos_i - pos_j
+        dist = torch.norm(diff, dim=-1, keepdim=True) ** 2
+
+        # Compute the message input
         if edge_attr is None:
             input_ = torch.cat((x_i, x_j, dist), dim=-1)
         else:
             input_ = torch.cat((x_i, x_j, dist, edge_attr), dim=-1)
 
-        return self.message_net(input_)
+        # Compute the messages and their equivariant counterpart
+        m_ij = self.message_net(input_)
+        message = diff * self.update_pos_net(m_ij)
+
+        return message, m_ij
 
-    def update(self, message, x, pos, edge_index):
+    def aggregate(self, inputs, index, ptr=None, dim_size=None):
+        """
+        Aggregate the messages at the nodes during message passing.
+
+        This method receives a tuple of tensors corresponding to the messages
+        to be aggregated. Both messages are aggregated separately according to
+        the specified aggregation scheme.
+
+        :param tuple(torch.Tensor) inputs: Tuple containing two messages to
+            aggregate.
+        :param index: The indices of target nodes for each message. This tensor
+            specifies which node each message is aggregated into.
+        :type index: torch.Tensor | LabelTensor
+        :param ptr: Optional tensor to specify the slices of messages for each
+            node (used in some aggregation strategies). Default is None.
+        :type ptr: torch.Tensor | LabelTensor
+        :param int dim_size: Optional size of the output dimension, i.e.,
+            number of nodes. Default is None.
+        :return: Tuple of aggregated tensors corresponding to (aggregated
+            messages for position updates, aggregated messages for feature
+            updates).
+        :rtype: tuple(torch.Tensor, torch.Tensor)
+        """
+        # Unpack the messages from the inputs
+        message, m_ij = inputs
+
+        # Aggregate messages as usual using self.aggr method
+        agg_message = super().aggregate(message, index, ptr, dim_size)
+        agg_m_ij = super().aggregate(m_ij, index, ptr, dim_size)
+
+        return agg_message, agg_m_ij
+
+    def update(self, aggregated_inputs, x, pos, edge_index):
         """
         Update the node features and the node coordinates with the received
         messages.
 
-        :param torch.Tensor message: The message to be passed.
+        :param tuple(torch.Tensor) aggregated_inputs: The messages to be passed.
         :param x: The node features.
         :type x: torch.Tensor | LabelTensor
         :param pos: The euclidean coordinates of the nodes.
@@ -167,10 +211,14 @@ def update(self, message, x, pos, edge_index):
         :return: The updated node features and node positions.
         :rtype: tuple(torch.Tensor, torch.Tensor)
         """
-        # Update the node features
-        x = self.update_net(torch.cat((x, message), dim=-1))
+        # aggregated_inputs is tuple (agg_message, agg_m_ij)
+        agg_message, agg_m_ij = aggregated_inputs
+
+        # Update node features with aggregated m_ij
+        x = self.update_feat_net(torch.cat((x, agg_m_ij), dim=-1))
+
+        # Degree for normalization of position updates
+        c = degree(edge_index[1], pos.shape[0]).unsqueeze(-1).clamp(min=1)
+        pos = pos + agg_message / c
 
-        # Update the node positions
-        c = degree(edge_index[0], pos.shape[0]).unsqueeze(-1)
-        pos = pos + message / c
         return x, pos
diff --git a/tests/test_messagepassing/test_equivariant_network_block.py b/tests/test_messagepassing/test_equivariant_network_block.py
@@ -128,3 +128,38 @@ def test_backward(edge_feature_dim):
     loss.backward()
     assert x.grad.shape == x.shape
     assert pos.grad.shape == pos.shape
+
+
+def test_equivariance():
+
+    # Graph to be fully connected and undirected
+    edge_index = torch.combinations(torch.arange(x.shape[0]), r=2).T
+    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
+
+    # Random rotation (det(rotation) should be 1)
+    rotation = torch.linalg.qr(torch.rand(pos.shape[-1], pos.shape[-1])).Q
+    if torch.det(rotation) < 0:
+        rotation[:, 0] *= -1
+
+    # Random translation
+    translation = torch.rand(1, pos.shape[-1])
+
+    model = EnEquivariantNetworkBlock(
+        node_feature_dim=x.shape[1],
+        edge_feature_dim=0,
+        pos_dim=pos.shape[1],
+        hidden_dim=64,
+        n_message_layers=2,
+        n_update_layers=2,
+    ).eval()
+
+    h1, pos1 = model(edge_index=edge_index, x=x, pos=pos)
+    h2, pos2 = model(
+        edge_index=edge_index, x=x, pos=pos @ rotation.T + translation
+    )
+
+    # Transform model output
+    pos1_transformed = (pos1 @ rotation.T) + translation
+
+    assert torch.allclose(pos2, pos1_transformed, atol=1e-5)
+    assert torch.allclose(h1, h2, atol=1e-5)
diff --git a/tests/test_messagepassing/test_radial_field_network_block.py b/tests/test_messagepassing/test_radial_field_network_block.py
@@ -65,3 +65,28 @@ def test_backward():
     loss = torch.mean(output_)
     loss.backward()
     assert x.grad.shape == x.shape
+
+
+def test_equivariance():
+
+    # Graph to be fully connected and undirected
+    edge_index = torch.combinations(torch.arange(x.shape[0]), r=2).T
+    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
+
+    # Random rotation (det(rotation) should be 1)
+    rotation = torch.linalg.qr(torch.rand(x.shape[-1], x.shape[-1])).Q
+    if torch.det(rotation) < 0:
+        rotation[:, 0] *= -1
+
+    # Random translation
+    translation = torch.rand(1, x.shape[-1])
+
+    model = RadialFieldNetworkBlock(node_feature_dim=x.shape[1]).eval()
+
+    pos1 = model(edge_index=edge_index, x=x)
+    pos2 = model(edge_index=edge_index, x=x @ rotation.T + translation)
+
+    # Transform model output
+    pos1_transformed = (pos1 @ rotation.T) + translation
+
+    assert torch.allclose(pos2, pos1_transformed, atol=1e-5)
diff --git a/tests/test_messagepassing/test_schnet_block.py b/tests/test_messagepassing/test_schnet_block.py
@@ -71,3 +71,25 @@ def test_backward():
     loss = torch.mean(output_)
     loss.backward()
     assert x.grad.shape == x.shape
+
+
+def test_invariance():
+
+    # Graph to be fully connected and undirected
+    edge_index = torch.combinations(torch.arange(x.shape[0]), r=2).T
+    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
+
+    # Random rotation (det(rotation) should be 1)
+    rotation = torch.linalg.qr(torch.rand(pos.shape[-1], pos.shape[-1])).Q
+    if torch.det(rotation) < 0:
+        rotation[:, 0] *= -1
+
+    # Random translation
+    translation = torch.rand(1, pos.shape[-1])
+
+    model = SchnetBlock(node_feature_dim=x.shape[1]).eval()
+
+    out1 = model(edge_index=edge_index, x=x, pos=pos)
+    out2 = model(edge_index=edge_index, x=x, pos=pos @ rotation.T + translation)
+
+    assert torch.allclose(out1, out2, atol=1e-5)