several things:

PierreQuinton · PierreQuinton · commit 9ee68f88fd9c · 2026-01-15T11:15:00.000+01:00
- moves Matrix and PSDMatrix to compute_gramian (not best position probably, but should be in _utils)
- Change return type of compute_gramian to PSDMatrix
- Add compute_gramian_sum (note that the responsability of casting to PSDMatrix is given to _utils now).
- add _gramian_based version of jac_to_grad. Note that we could put the tensordot(weights, jacobian, dims=1) in _utils as a weight_generalize_matrix method.
diff --git a/src/torchjd/_utils/__init__.py b/src/torchjd/_utils/__init__.py
@@ -1,3 +1,3 @@
-from .compute_gramian import compute_gramian
+from .compute_gramian import Matrix, PSDMatrix, compute_gramian, compute_gramian_sum
 
-__all__ = ["compute_gramian"]
+__all__ = ["compute_gramian", "compute_gramian_sum", "Matrix", "PSDMatrix"]
diff --git a/src/torchjd/_utils/compute_gramian.py b/src/torchjd/_utils/compute_gramian.py
@@ -1,8 +1,13 @@
+from typing import Annotated, cast
+
 import torch
 from torch import Tensor
 
+Matrix = Annotated[Tensor, "ndim=2"]
+PSDMatrix = Annotated[Matrix, "Positive semi-definite"]
+
 
-def compute_gramian(generalized_matrix: Tensor) -> Tensor:
+def compute_gramian(generalized_matrix: Tensor) -> PSDMatrix:
     """
     Computes the `Gramian matrix <https://en.wikipedia.org/wiki/Gram_matrix>`_ of a given
     generalized matrix. Specifically, this is equivalent to
@@ -12,4 +17,9 @@ def compute_gramian(generalized_matrix: Tensor) -> Tensor:
     """
     dims = list(range(1, generalized_matrix.ndim))
     gramian = torch.tensordot(generalized_matrix, generalized_matrix, dims=(dims, dims))
-    return gramian
+    return cast(PSDMatrix, gramian)
+
+
+def compute_gramian_sum(generalized_matrices: list[Tensor]) -> PSDMatrix:
+    gramian = sum([compute_gramian(matrix) for matrix in generalized_matrices])
+    return cast(PSDMatrix, gramian)
diff --git a/src/torchjd/aggregation/_aggregator_bases.py b/src/torchjd/aggregation/_aggregator_bases.py
@@ -4,7 +4,8 @@
 
 from torchjd._utils import compute_gramian
 
-from ._weighting_bases import Matrix, PSDMatrix, Weighting
+from .._utils.compute_gramian import Matrix, PSDMatrix
+from ._weighting_bases import Weighting
 
 
 class Aggregator(nn.Module, ABC):
@@ -80,3 +81,4 @@ class GramianWeightedAggregator(WeightedAggregator):
 
     def __init__(self, weighting: Weighting[PSDMatrix]):
         super().__init__(weighting << compute_gramian)
+        self.psd_weighting = weighting
diff --git a/src/torchjd/aggregation/_aligned_mtl.py b/src/torchjd/aggregation/_aligned_mtl.py
@@ -28,10 +28,11 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
 from ._mean import MeanWeighting
 from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class AlignedMTL(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_cagrad.py b/src/torchjd/aggregation/_cagrad.py
@@ -1,7 +1,8 @@
 from typing import cast
 
+from .._utils.compute_gramian import PSDMatrix
 from ._utils.check_dependencies import check_dependencies_are_installed
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 check_dependencies_are_installed(["cvxpy", "clarabel"])
 
diff --git a/src/torchjd/aggregation/_constant.py b/src/torchjd/aggregation/_constant.py
@@ -1,8 +1,9 @@
 from torch import Tensor
 
+from .._utils.compute_gramian import Matrix
 from ._aggregator_bases import WeightedAggregator
 from ._utils.str import vector_to_str
-from ._weighting_bases import Matrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class Constant(WeightedAggregator):
diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py
@@ -2,13 +2,14 @@
 
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
 from ._mean import MeanWeighting
 from ._utils.dual_cone import project_weights
 from ._utils.gramian import normalize, regularize
 from ._utils.non_differentiable import raise_non_differentiable_error
 from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class DualProj(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_flattening.py b/src/torchjd/aggregation/_flattening.py
@@ -2,7 +2,8 @@
 
 from torch import Tensor
 
-from torchjd.aggregation._weighting_bases import GeneralizedWeighting, PSDMatrix, Weighting
+from torchjd._utils.compute_gramian import PSDMatrix
+from torchjd.aggregation._weighting_bases import GeneralizedWeighting, Weighting
 from torchjd.autogram._gramian_utils import reshape_gramian
 
 
diff --git a/src/torchjd/aggregation/_imtl_g.py b/src/torchjd/aggregation/_imtl_g.py
@@ -1,9 +1,10 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
 from ._utils.non_differentiable import raise_non_differentiable_error
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class IMTLG(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_krum.py b/src/torchjd/aggregation/_krum.py
@@ -2,8 +2,9 @@
 from torch import Tensor
 from torch.nn import functional as F
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class Krum(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_mean.py b/src/torchjd/aggregation/_mean.py
@@ -1,8 +1,9 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import Matrix
 from ._aggregator_bases import WeightedAggregator
-from ._weighting_bases import Matrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class Mean(WeightedAggregator):
diff --git a/src/torchjd/aggregation/_mgda.py b/src/torchjd/aggregation/_mgda.py
@@ -1,8 +1,9 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class MGDA(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_nash_mtl.py b/src/torchjd/aggregation/_nash_mtl.py
@@ -25,8 +25,9 @@
 
 # mypy: ignore-errors
 
+from .._utils.compute_gramian import Matrix
 from ._utils.check_dependencies import check_dependencies_are_installed
-from ._weighting_bases import Matrix, Weighting
+from ._weighting_bases import Weighting
 
 check_dependencies_are_installed(["cvxpy", "ecos"])
 
diff --git a/src/torchjd/aggregation/_pcgrad.py b/src/torchjd/aggregation/_pcgrad.py
@@ -1,9 +1,10 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
 from ._utils.non_differentiable import raise_non_differentiable_error
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class PCGrad(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_random.py b/src/torchjd/aggregation/_random.py
@@ -2,8 +2,9 @@
 from torch import Tensor
 from torch.nn import functional as F
 
+from .._utils.compute_gramian import Matrix
 from ._aggregator_bases import WeightedAggregator
-from ._weighting_bases import Matrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class Random(WeightedAggregator):
diff --git a/src/torchjd/aggregation/_sum.py b/src/torchjd/aggregation/_sum.py
@@ -1,8 +1,9 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import Matrix
 from ._aggregator_bases import WeightedAggregator
-from ._weighting_bases import Matrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class Sum(WeightedAggregator):
diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py
@@ -3,13 +3,14 @@
 import torch
 from torch import Tensor
 
+from .._utils.compute_gramian import PSDMatrix
 from ._aggregator_bases import GramianWeightedAggregator
 from ._mean import MeanWeighting
 from ._utils.dual_cone import project_weights
 from ._utils.gramian import normalize, regularize
 from ._utils.non_differentiable import raise_non_differentiable_error
 from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting
-from ._weighting_bases import PSDMatrix, Weighting
+from ._weighting_bases import Weighting
 
 
 class UPGrad(GramianWeightedAggregator):
diff --git a/src/torchjd/aggregation/_utils/pref_vector.py b/src/torchjd/aggregation/_utils/pref_vector.py
@@ -1,7 +1,8 @@
 from torch import Tensor
 
+from torchjd._utils.compute_gramian import Matrix
 from torchjd.aggregation._constant import ConstantWeighting
-from torchjd.aggregation._weighting_bases import Matrix, Weighting
+from torchjd.aggregation._weighting_bases import Weighting
 
 from .str import vector_to_str
 
diff --git a/src/torchjd/aggregation/_weighting_bases.py b/src/torchjd/aggregation/_weighting_bases.py
@@ -2,15 +2,13 @@
 
 from abc import ABC, abstractmethod
 from collections.abc import Callable
-from typing import Annotated, Generic, TypeVar
+from typing import Generic, TypeVar
 
 from torch import Tensor, nn
 
 _T = TypeVar("_T", contravariant=True)
 _FnInputT = TypeVar("_FnInputT")
 _FnOutputT = TypeVar("_FnOutputT")
-Matrix = Annotated[Tensor, "ndim=2"]
-PSDMatrix = Annotated[Matrix, "Positive semi-definite"]
 
 
 class Weighting(Generic[_T], nn.Module, ABC):
diff --git a/src/torchjd/autojac/_jac_to_grad.py b/src/torchjd/autojac/_jac_to_grad.py
@@ -2,9 +2,12 @@
 from typing import cast
 
 import torch
-from torch import Tensor
+from torch import Tensor, tensordot
 
+from torchjd._utils import PSDMatrix, compute_gramian_sum
 from torchjd.aggregation import Aggregator
+from torchjd.aggregation._aggregator_bases import GramianWeightedAggregator
+from torchjd.aggregation._weighting_bases import Weighting
 
 from ._accumulation import TensorWithJac, accumulate_grads
 
@@ -73,11 +76,23 @@ def jac_to_grad(
     if not retain_jac:
         _free_jacs(tensors_)
 
-    gradients = _jacobian_based(aggregator, jacobians, tensors_)
+    if isinstance(aggregator, GramianWeightedAggregator):
+        gradients = _gramian_based(aggregator.psd_weighting, jacobians, tensors_)
+    else:
+        gradients = _jacobian_based(aggregator, jacobians, tensors_)
 
     accumulate_grads(tensors_, gradients)
 
 
+def _gramian_based(
+    weighting: Weighting[PSDMatrix], jacobians: list[Tensor], tensors: list[TensorWithJac]
+) -> list[Tensor]:
+    gramian = compute_gramian_sum(jacobians)
+    weights = weighting(gramian)
+    gradients = [tensordot(weights, jacobian, dims=1) for jacobian in jacobians]
+    return gradients
+
+
 def _jacobian_based(
     aggregator: Aggregator, jacobians: list[Tensor], tensors: list[TensorWithJac]
 ) -> list[Tensor]: