SimplexLab
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 5 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/torchjd/autojac/_transform/_differentiate.py‎
Lines changed: 8 additions & 4 deletions b/‎src/torchjd/autojac/_transform/_differentiate.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/torchjd/autojac/_transform/grad.py‎
Lines changed: 10 additions & 12 deletions b/‎src/torchjd/autojac/_transform/grad.py‎
Lines changed: 10 additions & 12 deletions
diff --git a/‎src/torchjd/autojac/_transform/init.py‎
Lines changed: 4 additions & 4 deletions b/‎src/torchjd/autojac/_transform/init.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/torchjd/autojac/_transform/jac.py‎
Lines changed: 12 additions & 14 deletions b/‎src/torchjd/autojac/_transform/jac.py‎
Lines changed: 12 additions & 14 deletions
diff --git a/‎src/torchjd/autojac/_transform/ordered_set.py‎
Lines changed: 2 additions & 1 deletion b/‎src/torchjd/autojac/_transform/ordered_set.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/torchjd/autojac/_transform/select.py‎
Lines changed: 7 additions & 6 deletions b/‎src/torchjd/autojac/_transform/select.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎src/torchjd/autojac/_utils.py‎
Lines changed: 12 additions & 5 deletions b/‎src/torchjd/autojac/_utils.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎src/torchjd/autojac/backward.py‎
Lines changed: 4 additions & 4 deletions b/‎src/torchjd/autojac/backward.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/torchjd/autojac/mtl_backward.py‎
Lines changed: 11 additions & 10 deletions b/‎src/torchjd/autojac/mtl_backward.py‎
Lines changed: 11 additions & 10 deletions
@@ -19,16 +19,13 @@ changes that do not affect the user.
 - Refactored internal verifications in the autojac engine so that they do not run at runtime
   anymore. This should minimally improve the performance and reduce the memory usage of `backward`
   and `mtl_backward`.
+- Refactored internal typing in the autojac engine so that fewer casts are made and so that code is
+  simplified. This should slightly improve the performance of `backward` and `mtl_backward`.
 - Improved the implementation of `ConFIG` to be simpler and safer when normalizing vectors. It
   should slightly improve the performance of `ConFIG` and minimally affect its behavior.
 
 ### Fixed
 
-- Fixed the behavior of `backward` and `mtl_backward` when some tensors are repeated (i.e. when they
-  appear several times in a list of tensors provided as argument). Instead of raising an exception
-  in these cases, we are now aligned with the behavior of `torch.autograd.backward`. Repeated
-  tensors that we differentiate lead to repeated rows in the Jacobian, prior to aggregation, and
-  repeated tensors with respect to which we differentiate count only once.
 - Fixed an issue with `backward` and `mtl_backward` that could make the ordering of the columns of
   the Jacobians non-deterministic, and that could thus lead to slightly non-deterministic results
   with some aggregators.
 
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Iterable, Sequence
+from typing import Sequence
 
 from torch import Tensor
 
@@ -11,13 +11,17 @@
 class Differentiate(Transform[_A, _A], ABC):
     def __init__(
         self,
-        outputs: Iterable[Tensor],
-        inputs: Iterable[Tensor],
+        outputs: OrderedSet[Tensor],
+        inputs: OrderedSet[Tensor],
         retain_graph: bool,
         create_graph: bool,
     ):
+        # The order of outputs and inputs only matters because we have no guarantee that
+        # torch.autograd.grad is *exactly* equivariant to input permutations and invariant to
+        # output (with their corresponding grad_output) permutations.
+
         self.outputs = list(outputs)
-        self.inputs = OrderedSet(inputs)
+        self.inputs = list(inputs)
         self.retain_graph = retain_graph
         self.create_graph = create_graph
 
 
@@ -1,18 +1,19 @@
-from typing import Iterable, Sequence
+from typing import Sequence
 
 import torch
 from torch import Tensor
 
 from ._differentiate import Differentiate
 from ._materialize import materialize
+from .ordered_set import OrderedSet
 from .tensor_dict import Gradients
 
 
 class Grad(Differentiate[Gradients]):
     def __init__(
         self,
-        outputs: Iterable[Tensor],
-        inputs: Iterable[Tensor],
+        outputs: OrderedSet[Tensor],
+        inputs: OrderedSet[Tensor],
         retain_graph: bool = False,
         create_graph: bool = False,
     ):
@@ -30,22 +31,19 @@ def _differentiate(self, grad_outputs: Sequence[Tensor]) -> tuple[Tensor, ...]:
             the same shape as the corresponding output.
         """
 
-        outputs = list(self.outputs)
-        inputs = list(self.inputs)
-
-        if len(inputs) == 0:
+        if len(self.inputs) == 0:
             return tuple()
 
-        if len(outputs) == 0:
-            return tuple([torch.zeros_like(input) for input in inputs])
+        if len(self.outputs) == 0:
+            return tuple([torch.zeros_like(input) for input in self.inputs])
 
         optional_grads = torch.autograd.grad(
-            outputs,
-            inputs,
+            self.outputs,
+            self.inputs,
             grad_outputs=grad_outputs,
             retain_graph=self.retain_graph,
             create_graph=self.create_graph,
             allow_unused=True,
         )
-        grads = materialize(optional_grads, inputs)
+        grads = materialize(optional_grads, self.inputs)
         return grads
@@ -1,4 +1,4 @@
-from typing import Iterable
+from collections.abc import Set
 
 import torch
 from torch import Tensor
@@ -8,8 +8,8 @@
 
 
 class Init(Transform[EmptyTensorDict, Gradients]):
-    def __init__(self, values: Iterable[Tensor]):
-        self.values = set(values)
+    def __init__(self, values: Set[Tensor]):
+        self.values = values
 
     def __call__(self, input: EmptyTensorDict) -> Gradients:
         r"""
@@ -26,4 +26,4 @@ def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
             raise RequirementError(
                 f"The input_keys should be the empty set. Found input_keys {input_keys}."
             )
-        return self.values
+        return set(self.values)
@@ -1,21 +1,22 @@
 import math
 from functools import partial
 from itertools import accumulate
-from typing import Callable, Iterable, Sequence
+from typing import Callable, Sequence
 
 import torch
 from torch import Size, Tensor
 
 from ._differentiate import Differentiate
 from ._materialize import materialize
+from .ordered_set import OrderedSet
 from .tensor_dict import Jacobians
 
 
 class Jac(Differentiate[Jacobians]):
     def __init__(
         self,
-        outputs: Iterable[Tensor],
-        inputs: Iterable[Tensor],
+        outputs: OrderedSet[Tensor],
+        inputs: OrderedSet[Tensor],
         chunk_size: int | None,
         retain_graph: bool = False,
         create_graph: bool = False,
@@ -37,30 +38,27 @@ def _differentiate(self, jac_outputs: Sequence[Tensor]) -> tuple[Tensor, ...]:
             jac_outputs.
         """
 
-        outputs = list(self.outputs)
-        inputs = list(self.inputs)
-
-        if len(inputs) == 0:
+        if len(self.inputs) == 0:
             return tuple()
 
-        if len(outputs) == 0:
+        if len(self.outputs) == 0:
             return tuple(
                 [
                     torch.empty((0,) + input.shape, device=input.device, dtype=input.dtype)
-                    for input in inputs
+                    for input in self.inputs
                 ]
             )
 
         def _get_vjp(grad_outputs: Sequence[Tensor], retain_graph: bool) -> Tensor:
             optional_grads = torch.autograd.grad(
-                outputs,
-                inputs,
+                self.outputs,
+                self.inputs,
                 grad_outputs=grad_outputs,
                 retain_graph=retain_graph,
                 create_graph=self.create_graph,
                 allow_unused=True,
             )
-            grads = materialize(optional_grads, inputs=inputs)
+            grads = materialize(optional_grads, inputs=self.inputs)
             return torch.concatenate([grad.reshape([-1]) for grad in grads])
 
         # By the Jacobians constraint, this value should be the same for all jac_outputs.
@@ -86,10 +84,10 @@ def _get_vjp(grad_outputs: Sequence[Tensor], retain_graph: bool) -> Tensor:
         jac_matrix_chunks.append(_get_jac_matrix_chunk(jac_outputs_chunk, get_vjp_last))
 
         jac_matrix = torch.vstack(jac_matrix_chunks)
-        lengths = [input.numel() for input in inputs]
+        lengths = [input.numel() for input in self.inputs]
         jac_matrices = _extract_sub_matrices(jac_matrix, lengths)
 
-        shapes = [input.shape for input in inputs]
+        shapes = [input.shape for input in self.inputs]
         jacs = _reshape_matrices(jac_matrices, shapes)
 
         return tuple(jacs)
 
@@ -1,10 +1,11 @@
 from collections import OrderedDict
+from collections.abc import Set
 from typing import Hashable, Iterable, TypeVar
 
 _KeyType = TypeVar("_KeyType", bound=Hashable)
 
 
-class OrderedSet(OrderedDict[_KeyType, None]):
+class OrderedSet(OrderedDict[_KeyType, None], Set[_KeyType]):
     """Ordered collection of distinct elements."""
 
     def __init__(self, elements: Iterable[_KeyType]):
 
@@ -1,4 +1,4 @@
-from typing import Iterable
+from collections.abc import Set
 
 from torch import Tensor
 
@@ -7,17 +7,18 @@
 
 
 class Select(Transform[_A, _A]):
-    def __init__(self, keys: Iterable[Tensor]):
-        self.keys = set(keys)
+    def __init__(self, keys: Set[Tensor]):
+        self.keys = keys
 
     def __call__(self, tensor_dict: _A) -> _A:
         output = {key: tensor_dict[key] for key in self.keys}
         return type(tensor_dict)(output)
 
     def check_keys(self, input_keys: set[Tensor]) -> set[Tensor]:
-        if not self.keys.issubset(input_keys):
+        keys = set(self.keys)
+        if not keys.issubset(input_keys):
             raise RequirementError(
                 f"The input_keys should be a super set of the keys to select. Found input_keys "
-                f"{input_keys} and keys to select {self.keys}."
+                f"{input_keys} and keys to select {keys}."
             )
-        return self.keys
+        return keys
@@ -15,12 +15,19 @@ def check_optional_positive_chunk_size(parallel_chunk_size: int | None) -> None:
         )
 
 
-def as_tensor_list(tensors: Sequence[Tensor] | Tensor) -> list[Tensor]:
+def as_checked_ordered_set(
+    tensors: Sequence[Tensor] | Tensor, variable_name: str
+) -> OrderedSet[Tensor]:
     if isinstance(tensors, Tensor):
-        output = [tensors]
-    else:
-        output = list(tensors)
-    return output
+        tensors = [tensors]
+
+    original_length = len(tensors)
+    output = OrderedSet(tensors)
+
+    if len(output) != original_length:
+        raise ValueError(f"`{variable_name}` should contain unique elements.")
+
+    return OrderedSet(tensors)
 
 
 def get_leaf_tensors(tensors: Iterable[Tensor], excluded: Iterable[Tensor]) -> OrderedSet[Tensor]:
 
@@ -6,7 +6,7 @@
 
 from ._transform import Accumulate, Aggregate, Diagonalize, EmptyTensorDict, Init, Jac, Transform
 from ._transform.ordered_set import OrderedSet
-from ._utils import as_tensor_list, check_optional_positive_chunk_size, get_leaf_tensors
+from ._utils import as_checked_ordered_set, check_optional_positive_chunk_size, get_leaf_tensors
 
 
 def backward(
@@ -69,7 +69,7 @@ def backward(
     """
     check_optional_positive_chunk_size(parallel_chunk_size)
 
-    tensors = as_tensor_list(tensors)
+    tensors = as_checked_ordered_set(tensors, "tensors")
 
     if len(tensors) == 0:
         raise ValueError("`tensors` cannot be empty")
@@ -91,7 +91,7 @@ def backward(
 
 
 def _create_transform(
-    tensors: list[Tensor],
+    tensors: OrderedSet[Tensor],
     aggregator: Aggregator,
     inputs: OrderedSet[Tensor],
     retain_graph: bool,
@@ -103,7 +103,7 @@ def _create_transform(
     init = Init(tensors)
 
     # Transform that turns the gradients into Jacobians.
-    diag = Diagonalize(OrderedSet(tensors))
+    diag = Diagonalize(tensors)
 
     # Transform that computes the required Jacobians.
     jac = Jac(tensors, inputs, parallel_chunk_size, retain_graph)
 
@@ -17,7 +17,7 @@
     Transform,
 )
 from ._transform.ordered_set import OrderedSet
-from ._utils import as_tensor_list, check_optional_positive_chunk_size, get_leaf_tensors
+from ._utils import as_checked_ordered_set, check_optional_positive_chunk_size, get_leaf_tensors
 
 
 def mtl_backward(
@@ -81,7 +81,8 @@ def mtl_backward(
 
     check_optional_positive_chunk_size(parallel_chunk_size)
 
-    features = as_tensor_list(features)
+    losses = as_checked_ordered_set(losses, "losses")
+    features = as_checked_ordered_set(features, "features")
 
     if shared_params is None:
         shared_params = get_leaf_tensors(tensors=features, excluded=[])
@@ -117,8 +118,8 @@ def mtl_backward(
 
 
 def _create_transform(
-    losses: Sequence[Tensor],
-    features: list[Tensor],
+    losses: OrderedSet[Tensor],
+    features: OrderedSet[Tensor],
     aggregator: Aggregator,
     tasks_params: list[OrderedSet[Tensor]],
     shared_params: OrderedSet[Tensor],
@@ -138,7 +139,7 @@ def _create_transform(
         _create_task_transform(
             features,
             task_params,
-            loss,
+            OrderedSet([loss]),
             retain_graph,
         )
         for task_params, loss in zip(tasks_params, losses)
@@ -161,21 +162,21 @@ def _create_transform(
 
 
 def _create_task_transform(
-    features: list[Tensor],
+    features: OrderedSet[Tensor],
     task_params: OrderedSet[Tensor],
-    loss: Tensor,
+    loss: OrderedSet[Tensor],  # contains a single scalar loss
     retain_graph: bool,
 ) -> Transform[EmptyTensorDict, Gradients]:
     # Tensors with respect to which we compute the gradients.
     to_differentiate = OrderedSet(task_params)  # Re-instantiate set to avoid modifying input
-    to_differentiate.update(OrderedSet(features))
+    to_differentiate.update(features)
 
     # Transform that initializes the gradient output to 1.
-    init = Init([loss])
+    init = Init(loss)
 
     # Transform that computes the gradients of the loss w.r.t. the task-specific parameters and
     # the features.
-    grad = Grad([loss], to_differentiate, retain_graph)
+    grad = Grad(loss, to_differentiate, retain_graph)
 
     # Transform that accumulates the gradients w.r.t. the task-specific parameters into their
     # .grad fields.