Merge branch 'main' into revamp-interface

ValerianRey · ValerianRey · commit 0bcf1c0b1a24 · 2026-01-08T16:00:48.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,11 @@ changelog does not include internal changes that do not affect the user.
 
 ## [Unreleased]
 
+### Changed
+
+- Removed an unnecessary internal cloning of gradient. This should slightly improve the memory
+  efficiency of `autojac`.
+
 ## [0.8.1] - 2026-01-07
 
 ### Added
diff --git a/src/torchjd/utils/_accumulation.py b/src/torchjd/utils/_accumulation.py
@@ -13,12 +13,16 @@ def _accumulate_jacs(params: Iterable[Tensor], jacobians: Iterable[Tensor]) -> N
             param_ = cast(TensorWithJac, param)
             param_.jac += jac
         else:
-            # TODO: this could be a serious memory issue
-            # We clone the value because we do not want subsequent accumulations to also affect
-            # this value (in case it is still used outside). We do not detach from the
-            # computation graph because the value can have grad_fn that we want to keep track of
-            # (in case it was obtained via create_graph=True and a differentiable aggregator).
-            param.__setattr__("jac", jac.clone())
+            # We do not clone the value to save memory and time, so subsequent modifications of
+            # the value of key.grad (subsequent accumulations) will also affect the value of
+            # gradients[key] and outside changes to the value of gradients[key] will also affect
+            # the value of key.grad. So to be safe, the values of gradients should not be used
+            # anymore after being passed to this function.
+            #
+            # We do not detach from the computation graph because the value can have grad_fn
+            # that we want to keep track of (in case it was obtained via create_graph=True and a
+            # differentiable aggregator).
+            param.__setattr__("jac", jac)
 
 
 def _accumulate_grads(params: Iterable[Tensor], gradients: Iterable[Tensor]) -> None:
@@ -27,7 +31,7 @@ def _accumulate_grads(params: Iterable[Tensor], gradients: Iterable[Tensor]) ->
         if hasattr(param, "grad") and param.grad is not None:
             param.grad += grad
         else:
-            param.grad = grad.clone()
+            param.grad = grad
 
 
 def _check_expects_grad(tensor: Tensor) -> None:
diff --git a/tests/unit/autojac/_transform/test_accumulate.py b/tests/unit/autojac/_transform/test_accumulate.py
@@ -45,11 +45,12 @@ def test_multiple_accumulation(iterations: int):
     value1 = ones_([])
     value2 = ones_([1])
     value3 = ones_([2, 3])
-    input = {key1: value1, key2: value2, key3: value3}
 
     accumulate = Accumulate()
 
     for i in range(iterations):
+        # Clone values to ensure that we accumulate values that are not ever used afterwards
+        input = {key1: value1.clone(), key2: value2.clone(), key3: value3.clone()}
         accumulate(input)
 
     grads = {key1: key1.grad, key2: key2.grad, key3: key3.grad}