feat(aggregation): Add ConFIG (#223)

ValerianRey · web-flow · commit 4aa3d6b67ab0 · 2025-02-01T11:39:23.000+01:00
* Add ConFIG
* Add ConFIG unit tests
* Add ConFIG to the list of supported Aggregators in README.md
* Add ConFIG to the interactive plotter
* Add documentation entry for ConFIG
* Add doc test for ConFIG
* Add changelog entry
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,11 @@ changes that do not affect the user.
 
 - Added Python 3.13 classifier in pyproject.toml (we now also run tests on Python 3.13 in the CI).
 
+### Added
+
+- New aggregator `ConFIG` from [ConFIG: Towards Conflict-free Training of Physics
+Informed Neural Networks](https://arxiv.org/pdf/2408.11104).
+
 ## [0.4.1] - 2025-01-02
 
 ### Fixed
diff --git a/README.md b/README.md
@@ -113,6 +113,7 @@ TorchJD provides many existing aggregators from the literature, listed in the fo
 | [UPGrad](https://torchjd.org/docs/aggregation/upgrad/) (recommended) | [Jacobian Descent For Multi-Objective Optimization](https://arxiv.org/pdf/2406.16232)                                                                               |
 | [AlignedMTL](https://torchjd.org/docs/aggregation/aligned_mtl/)      | [Independent Component Alignment for Multi-Task Learning](https://arxiv.org/pdf/2305.19000)                                                                         |
 | [CAGrad](https://torchjd.org/docs/aggregation/cagrad/)               | [Conflict-Averse Gradient Descent for Multi-task Learning](https://arxiv.org/pdf/2110.14048)                                                                        |
+| [ConFIG](https://torchjd.org/docs/aggregation/config/)               | [ConFIG: Towards Conflict-free Training of Physics Informed Neural Networks](https://arxiv.org/pdf/2408.11104)                                                      |
 | [Constant](https://torchjd.org/docs/aggregation/constant/)           | -                                                                                                                                                                   |
 | [DualProj](https://torchjd.org/docs/aggregation/dualproj/)           | [Gradient Episodic Memory for Continual Learning](https://arxiv.org/pdf/1706.08840)                                                                                 |
 | [GradDrop](https://torchjd.org/docs/aggregation/graddrop/)           | [Just Pick a Sign: Optimizing Deep Multitask Models with Gradient Sign Dropout](https://arxiv.org/pdf/2010.06808)                                                   |
diff --git a/docs/source/docs/aggregation/config.rst b/docs/source/docs/aggregation/config.rst
@@ -0,0 +1,9 @@
+:hide-toc:
+
+ConFIG
+======
+
+.. automodule:: torchjd.aggregation.config
+    :members:
+    :undoc-members:
+    :exclude-members: forward
diff --git a/docs/source/docs/aggregation/index.rst b/docs/source/docs/aggregation/index.rst
@@ -38,6 +38,10 @@ In TorchJD, an aggregator is a class that inherits from the abstract class
      - |no|
      - |no|
      - |yes|
+   * - :doc:`ConFIG <config>`
+     - |no|
+     - |yes|
+     - |yes|
    * - :doc:`Constant <constant>`
      - |no|
      - |yes|
@@ -140,6 +144,7 @@ In TorchJD, an aggregator is a class that inherits from the abstract class
     upgrad.rst
     aligned_mtl.rst
     cagrad.rst
+    config.rst
     constant.rst
     dualproj.rst
     graddrop.rst
diff --git a/src/torchjd/aggregation/__init__.py b/src/torchjd/aggregation/__init__.py
@@ -1,6 +1,7 @@
 from .aligned_mtl import AlignedMTL
 from .bases import Aggregator
 from .cagrad import CAGrad
+from .config import ConFIG
 from .constant import Constant
 from .dualproj import DualProj
 from .graddrop import GradDrop
diff --git a/src/torchjd/aggregation/config.py b/src/torchjd/aggregation/config.py
@@ -0,0 +1,91 @@
+# The code of this file was partly adapted from
+# https://github.com/tum-pbs/ConFIG/tree/main/conflictfree.
+# It is therefore also subject to the following license.
+#
+# MIT License
+#
+# Copyright (c) 2024 TUM Physics-based Simulation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+import torch
+from torch import Tensor
+
+from torchjd.aggregation._pref_vector_utils import (
+    _check_pref_vector,
+    _pref_vector_to_str_suffix,
+    _pref_vector_to_weighting,
+)
+from torchjd.aggregation.bases import Aggregator
+from torchjd.aggregation.sum import _SumWeighting
+
+
+class ConFIG(Aggregator):
+    """
+    :class:`~torchjd.aggregation.bases.Aggregator` as defined in Equation 2 of `ConFIG: Towards
+    Conflict-free Training of Physics Informed Neural Networks <https://arxiv.org/pdf/2408.11104>`_.
+
+    :param pref_vector: The preference vector used to weight the rows. If not provided, defaults to
+        equal weights of 1.
+
+    .. admonition::
+        Example
+
+        Use ConFIG to aggregate a matrix.
+
+        >>> from torch import tensor
+        >>> from torchjd.aggregation import ConFIG
+        >>>
+        >>> A = ConFIG()
+        >>> J = tensor([[-4., 1., 1.], [6., 1., 1.]])
+        >>>
+        >>> A(J)
+        tensor([0.1588, 2.0706, 2.0706])
+
+    .. note::
+        This implementation was adapted from the `official implementation
+        <https://github.com/tum-pbs/ConFIG/tree/main/conflictfree>`_.
+    """
+
+    def __init__(self, pref_vector: Tensor | None = None):
+        super().__init__()
+        _check_pref_vector(pref_vector)
+        self.weighting = _pref_vector_to_weighting(pref_vector, default=_SumWeighting())
+        self._pref_vector = pref_vector
+
+    def forward(self, matrix: Tensor) -> Tensor:
+        weights = self.weighting(matrix)
+        units = torch.nan_to_num((matrix / (matrix.norm(dim=1)).unsqueeze(1)), 0.0)
+        best_direction = torch.linalg.pinv(units) @ weights
+
+        if best_direction.norm() == 0:
+            unit_target_vector = torch.zeros_like(best_direction)
+        else:
+            unit_target_vector = best_direction / best_direction.norm()
+
+        length = torch.sum(torch.stack([torch.dot(grad, unit_target_vector) for grad in matrix]))
+
+        return length * unit_target_vector
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(pref_vector={repr(self._pref_vector)})"
+
+    def __str__(self) -> str:
+        return f"ConFIG{_pref_vector_to_str_suffix(self._pref_vector)}"
diff --git a/tests/doc/test_aggregation.py b/tests/doc/test_aggregation.py
@@ -37,6 +37,17 @@ def test_cagrad():
     assert_close(A(J), tensor([0.1835, 1.2041, 1.2041]), rtol=0, atol=1e-4)
 
 
+def test_config():
+    from torch import tensor
+
+    from torchjd.aggregation import ConFIG
+
+    A = ConFIG()
+    J = tensor([[-4.0, 1.0, 1.0], [6.0, 1.0, 1.0]])
+
+    assert_close(A(J), tensor([0.1588, 2.0706, 2.0706]), rtol=0, atol=1e-4)
+
+
 def test_constant():
     from torch import tensor
 
diff --git a/tests/plots/interactive_plotter.py b/tests/plots/interactive_plotter.py
@@ -14,6 +14,7 @@
     MGDA,
     AlignedMTL,
     CAGrad,
+    ConFIG,
     DualProj,
     GradDrop,
     Mean,
@@ -44,6 +45,7 @@ def main() -> None:
     aggregators = [
         AlignedMTL(),
         CAGrad(c=0.5),
+        ConFIG(),
         DualProj(),
         GradDrop(),
         IMTLG(),
diff --git a/tests/unit/aggregation/test_config.py b/tests/unit/aggregation/test_config.py
@@ -0,0 +1,23 @@
+import torch
+from pytest import mark
+
+from torchjd.aggregation import ConFIG
+
+from ._property_testers import ExpectedStructureProperty
+
+
+# For some reason, some permutation-invariance property tests fail with the pinv-based
+# implementation.
+@mark.parametrize("aggregator", [ConFIG()])
+class TestConfig(ExpectedStructureProperty):
+    pass
+
+
+def test_representations():
+    A = ConFIG()
+    assert repr(A) == "ConFIG(pref_vector=None)"
+    assert str(A) == "ConFIG"
+
+    A = ConFIG(pref_vector=torch.tensor([1.0, 2.0, 3.0], device="cpu"))
+    assert repr(A) == "ConFIG(pref_vector=tensor([1., 2., 3.]))"
+    assert str(A) == "ConFIG([1., 2., 3.])"