Fix mistakes when resolving merge conflict

ValerianRey · ValerianRey · commit df450a78c93c · 2026-04-17T13:12:14.000+02:00
diff --git a/src/torchjd/aggregation/_gradvac.py b/src/torchjd/aggregation/_gradvac.py
@@ -137,9 +137,9 @@ def _ensure_state(self, m: int, dtype: torch.dtype) -> None:
             self._state_key = key
 
 
-class GradVac(GramianWeightedAggregator, Stateful):
+class GradVac(GramianWeightedAggregator, Stochastic):
     r"""
-    :class:`~torchjd.aggregation._mixins.Stateful`
+    :class:`~torchjd.aggregation._mixins.Stochastic`
     :class:`~torchjd.aggregation._aggregator_bases.Aggregator` implementing the aggregation step of
     Gradient Vaccine (GradVac) from `Gradient Vaccine: Investigating and Improving Multi-task
     Optimization in Massively Multilingual Models (ICLR 2021 Spotlight)
@@ -159,22 +159,14 @@ class GradVac(GramianWeightedAggregator, Stateful):
 
     :param beta: EMA decay for :math:`\hat{\phi}`.
     :param eps: Small non-negative constant added to denominators.
-
-    .. note::
-        For each task :math:`i`, the order of other tasks :math:`j` is shuffled independently
-        using the global PyTorch RNG (``torch.randperm``). Seed it with ``torch.manual_seed`` if
-        you need reproducibility.
-
-    .. note::
-        To apply GradVac with the `whole_model`, `enc_dec`, `all_layer` or `all_matrix` grouping
-        strategy, please refer to the :doc:`Grouping </examples/grouping>` examples.
+    :param seed: Seed for the internal random number generator. If ``None``, a seed is drawn from
+        the global PyTorch RNG to fork an independent stream.
     """
 
-    gramian_weighting: GradVacWeighting
-
-    def __init__(self, beta: float = 0.5, eps: float = 1e-8) -> None:
-        weighting = GradVacWeighting(beta=beta, eps=eps)
-        super().__init__(weighting)
+    def __init__(self, beta: float = 0.5, eps: float = 1e-8, seed: int | None = None) -> None:
+        weighting = GradVacWeighting(beta=beta, eps=eps, seed=seed)
+        GramianWeightedAggregator.__init__(self, weighting)
+        Stochastic.__init__(self, generator=weighting.generator)
         self._gradvac_weighting = weighting
         self.register_full_backward_pre_hook(raise_non_differentiable_error)
 
@@ -195,8 +187,9 @@ def eps(self, value: float) -> None:
         self._gradvac_weighting.eps = value
 
     def reset(self) -> None:
-        """Clears EMA state so the next forward starts from zero targets."""
+        """Resets the random number generator and clears the EMA state."""
 
+        Stochastic.reset(self)
         self._gradvac_weighting.reset()
 
     def __repr__(self) -> str:
diff --git a/src/torchjd/aggregation/_pcgrad.py b/src/torchjd/aggregation/_pcgrad.py
@@ -54,16 +54,19 @@ def forward(self, gramian: PSDMatrix, /) -> Tensor:
         return weights.to(device)
 
 
-class PCGrad(GramianWeightedAggregator):
+class PCGrad(GramianWeightedAggregator, Stochastic):
     """
     :class:`~torchjd.aggregation._aggregator_bases.Aggregator` as defined in algorithm 1 of
     `Gradient Surgery for Multi-Task Learning <https://arxiv.org/pdf/2001.06782.pdf>`_.
-    """
 
-    gramian_weighting: PCGradWeighting
+    :param seed: Seed for the internal random number generator. If ``None``, a seed is drawn from
+        the global PyTorch RNG to fork an independent stream.
+    """
 
-    def __init__(self) -> None:
-        super().__init__(PCGradWeighting())
+    def __init__(self, seed: int | None = None) -> None:
+        weighting = PCGradWeighting(seed=seed)
+        GramianWeightedAggregator.__init__(self, weighting)
+        Stochastic.__init__(self, generator=weighting.generator)
 
         # This prevents running into a RuntimeError due to modifying stored tensors in place.
         self.register_full_backward_pre_hook(raise_non_differentiable_error)
diff --git a/src/torchjd/aggregation/_random.py b/src/torchjd/aggregation/_random.py
@@ -30,15 +30,18 @@ def forward(self, matrix: Tensor, /) -> Tensor:
         return weights
 
 
-class Random(WeightedAggregator):
+class Random(WeightedAggregator, Stochastic):
     """
     :class:`~torchjd.aggregation._aggregator_bases.Aggregator` that computes a random combination of
     the rows of the provided matrices, as defined in algorithm 2 of `Reasonable Effectiveness of
     Random Weighting: A Litmus Test for Multi-Task Learning
     <https://arxiv.org/pdf/2111.10603.pdf>`_.
-    """
 
-    weighting: RandomWeighting
+    :param seed: Seed for the internal random number generator. If ``None``, a seed is drawn from
+        the global PyTorch RNG to fork an independent stream.
+    """
 
-    def __init__(self) -> None:
-        super().__init__(RandomWeighting())
+    def __init__(self, seed: int | None = None) -> None:
+        weighting = RandomWeighting(seed=seed)
+        WeightedAggregator.__init__(self, weighting)
+        Stochastic.__init__(self, generator=weighting.generator)