Improve ForwardBackwardTime

ValerianRey · ValerianRey · commit d9c5aff6a669 · 2025-06-09T00:05:15.000+02:00
diff --git a/src/arena/architectures.py b/src/arena/architectures.py
@@ -0,0 +1,27 @@
+from torch import Tensor, nn
+
+
+class Cifar10Model(nn.Module):
+    N_CLASSES = 10
+
+    def __init__(self, activation_function: type[nn.Module] = nn.ELU):
+        super().__init__()
+
+        self.seq = nn.Sequential(
+            nn.Conv2d(3, 32, 3),
+            activation_function(),
+            nn.Conv2d(32, 64, 3, groups=32),
+            nn.MaxPool2d(2),
+            activation_function(),
+            nn.Conv2d(64, 64, 3, groups=64),
+            nn.MaxPool2d(3),
+            activation_function(),
+            nn.Flatten(),
+            nn.Linear(1024, 128),
+            activation_function(),
+            nn.Linear(128, self.N_CLASSES),
+            nn.Flatten(start_dim=0),
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.seq(x)
diff --git a/src/arena/interfaces.py b/src/arena/interfaces.py
@@ -117,7 +117,7 @@ def __call__(self, _: str):
         def forward_backward(model: Module, input: Tensor, aggregator: GramianWeightedAggregator) -> None:
             output, vgp_fn = vgp_from_module_2(model, input)
             gramian = get_gramian(vgp_fn, output)
-            weights = aggregator.weighting.weighting(gramian)
+            weights = aggregator.weighting(gramian)
             output.backward(weights)
 
         return forward_backward
diff --git a/src/arena/objectives.py b/src/arena/objectives.py
@@ -4,10 +4,11 @@
 
 import torch
 from torch import Tensor
-from torch.nn import Linear, MSELoss, ReLU, Sequential, Module
+from torch.nn import Linear, MSELoss, ReLU, Sequential
 from torch.optim import SGD
-from torchjd.aggregation import Mean, Aggregator, UPGrad
+from torchjd.aggregation import Mean
 
+from arena.architectures import Cifar10Model
 from arena.matrix_samplers import MatrixSampler, NonWeakSampler, NormalSampler, StrictlyWeakSampler, StrongSampler
 
 
@@ -58,19 +59,18 @@ def __str__(self) -> str:
 
 
 class ForwardBackwardTime(Objective):
-    def __init__(self, ns: list[int], device: str, iterations: int):
-        self.ns = ns
+    def __init__(self, device: str, iterations: int):
+        torch.cuda.empty_cache()
         self.device = device
-        shapes = zip(ns[:-1], ns[1:])
-        layers = [Linear(n, m) for n, m in shapes]
-        self.model = Sequential(*layers).to(device=device)
+        self.model = Cifar10Model().to(device=device)
         self.iterations = iterations
+        self.input_shape = (16, 3, 32, 32)
 
     def __call__(self, forward_backward: Callable):
-        aggregator = UPGrad()
+        aggregator = Mean()
         total_time = 0.0
         for i in range(self.iterations + 1):
-            x = torch.randn(self.ns[0], device=self.device)
+            x = torch.randn(self.input_shape, device=self.device)
 
             if self.device.startswith("cuda"):
                 torch.cuda.synchronize()
@@ -86,12 +86,6 @@ def __call__(self, forward_backward: Callable):
         average_runtime = total_time / self.iterations
         return average_runtime
 
-        def __repr__(self) -> str:
-            return f"{self.__class__.__name__}(ns={self.ns}, device={self.device}," f" iterations={self.iterations})"
-
-        def __str__(self) -> str:
-            return f"AT({self.matrix_sampler}, {self.device}, x{self.iterations})"
-
 
 class DualProjectionPrimalFeasibilityObjective(Objective):
     def __init__(self, matrix_sampler: MatrixSampler, device: str, iterations: int):
@@ -283,6 +277,7 @@ def compute_kkt_conditions(
     ],
     "mtl_backward_runtime": [MTLBackwardTime(n_tasks=50, device=device, iterations=100) for device in ["cpu", "cuda"]],
     "gramian_runtime": [GramianTime(100, 1000000, "cuda", 1)],
+    "forward_backward_runtime": [ForwardBackwardTime("cuda", 1)],
     "project_weights": [
         DualProjectionPrimalFeasibilityObjective(matrix_sampler=cls(m, m, m - 1, torch.float32), device=device, iterations=10)
         for cls in [NormalSampler, StrongSampler, StrictlyWeakSampler, NonWeakSampler]