SimplexLab · ValerianRey · Feb 23, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 23, 2026
@@ -40,10 +40,8 @@ changelog does not include internal changes that do not affect the user.
   jac_to_grad(shared_module.parameters(), aggregator)
   ```
 
-- Removed an unnecessary memory duplication. This should significantly improve the memory efficiency
-  of `autojac`.
-- Removed an unnecessary internal cloning of gradient. This should slightly improve the memory
-  efficiency of `autojac`.
+- Removed several unnecessary memory duplications. This should significantly improve the memory
+  efficiency and speed of `autojac`.
 
 ## [0.8.1] - 2026-01-07
 

@@ -1,9 +1,13 @@
+from collections import deque
 from collections.abc import Iterable
+from typing import cast
 
 import torch
 from torch import Tensor
 
-from torchjd.aggregation import Aggregator
+from torchjd._linalg import PSDMatrix, compute_gramian
+from torchjd.aggregation import Aggregator, Weighting
+from torchjd.aggregation._aggregator_bases import GramianWeightedAggregator
 
 from ._accumulation import TensorWithJac, accumulate_grads, is_tensor_with_jac
 
@@ -63,29 +67,61 @@ def jac_to_grad(
     if len(tensors_) == 0:
         return
 
-    jacobians = [t.jac for t in tensors_]
+    jacobians = deque(t.jac for t in tensors_)
 
-    if not all([jacobian.shape[0] == jacobians[0].shape[0] for jacobian in jacobians[1:]]):
+    if not all([jacobian.shape[0] == jacobians[0].shape[0] for jacobian in jacobians]):
         raise ValueError("All Jacobians should have the same number of rows.")
 
     if not retain_jac:
         _free_jacs(tensors_)
 
+    if isinstance(aggregator, GramianWeightedAggregator):
+        # When it's possible, avoid the concatenation of the jacobians that can be very costly in
+        # memory.
+        gradients = _gramian_based(aggregator.weighting.weighting, jacobians, tensors_)
+    else:
+        gradients = _jacobian_based(aggregator, jacobians, tensors_)
+    accumulate_grads(tensors_, gradients)
+
+
+def _jacobian_based(
+    aggregator: Aggregator, jacobians: deque[Tensor], tensors: list[TensorWithJac]
+) -> list[Tensor]:
     jacobian_matrix = _unite_jacobians(jacobians)
     gradient_vector = aggregator(jacobian_matrix)
-    gradients = _disunite_gradient(gradient_vector, jacobians, tensors_)
-    accumulate_grads(tensors_, gradients)
+    gradients = _disunite_gradient(gradient_vector, tensors)
+    return gradients
 
 
-def _unite_jacobians(jacobians: list[Tensor]) -> Tensor:
-    jacobian_matrices = [jacobian.reshape(jacobian.shape[0], -1) for jacobian in jacobians]
+def _gramian_based(
+    weighting: Weighting[PSDMatrix], jacobians: deque[Tensor], tensors: list[TensorWithJac]
+) -> list[Tensor]:
+    gramian = _compute_gramian_sum(jacobians)
+    weights = weighting(gramian)
+
+    gradients = list[Tensor]()
+    while jacobians:
+        jacobian = jacobians.popleft()  # get jacobian + dereference it to free memory asap
+        gradients.append(torch.tensordot(weights, jacobian, dims=1))
+
+    return gradients
+
+
+def _compute_gramian_sum(jacobians: deque[Tensor]) -> PSDMatrix:
+    gramian = sum([compute_gramian(matrix) for matrix in jacobians])
+    return cast(PSDMatrix, gramian)
+
+
+def _unite_jacobians(jacobians: deque[Tensor]) -> Tensor:
+    jacobian_matrices = list[Tensor]()
+    while jacobians:
+        jacobian = jacobians.popleft()  # get jacobian + dereference it to free memory asap
+        jacobian_matrices.append(jacobian.reshape(jacobian.shape[0], -1))
     jacobian_matrix = torch.concat(jacobian_matrices, dim=1)
     return jacobian_matrix
 
 
-def _disunite_gradient(
-    gradient_vector: Tensor, jacobians: list[Tensor], tensors: list[TensorWithJac]
-) -> list[Tensor]:
+def _disunite_gradient(gradient_vector: Tensor, tensors: list[TensorWithJac]) -> list[Tensor]:
     gradient_vectors = gradient_vector.split([t.numel() for t in tensors])
     gradients = [g.view(t.shape) for g, t in zip(gradient_vectors, tensors)]
     return gradients