Merge branch 'main' into KhusPatel4450-patch-1

ValerianRey · web-flow · commit 16ff7e697e41 · 2026-07-01T13:44:16.000+02:00
diff --git a/.github/workflows/build-deploy-docs.yml b/.github/workflows/build-deploy-docs.yml
@@ -22,7 +22,7 @@ jobs:
       contents: write
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v7
diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       # This will restore the cache for the current commit if it exists, or the most recent lychee
       # cache otherwise (including those saved for the main branch). It will also save the cache for
@@ -25,7 +25,7 @@ jobs:
       # temporary (rate limiting, network issue, etc.), and we always want to retry those links
       # everytime this action is run.
       - name: Restore lychee cache
-        uses: actions/cache@v5
+        uses: actions/cache@v6
         with:
           path: .lycheecache
           key: cache-lychee-${{ github.sha }}
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -44,7 +44,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v7
@@ -63,7 +63,7 @@ jobs:
           PYTEST_TORCH_DTYPE: ${{ matrix.dtype || 'float32' }}
 
       - name: Upload results to Codecov
-        uses: codecov/codecov-action@v6
+        uses: codecov/codecov-action@v7
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
 
@@ -72,7 +72,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v7
@@ -96,7 +96,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v7
diff --git a/.github/workflows/opencode.yml b/.github/workflows/opencode.yml
@@ -25,7 +25,7 @@ jobs:
       issues: write
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
         with:
           fetch-depth: 1
           persist-credentials: false
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -14,7 +14,7 @@ jobs:
       id-token: write
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v6
+        uses: actions/checkout@v7
 
       - name: Set up uv
         uses: astral-sh/setup-uv@v7
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,7 +10,7 @@ repos:
     -   id: check-merge-conflict  # Check for files that contain merge conflict strings.
 
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.15
+    rev: v0.15.20
     hooks:
       - id: ruff-check
         args: [ --fix, --ignore, FIX ]  # Allow committing with TODOs. Only CI checks should prevent merging with TODOs.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -59,7 +59,7 @@ changelog does not include internal changes that do not affect the user.
 ### Added
 
 - Added `IMTL-L` (the loss-balancing variant of Impartial Multi-Task Learning) from [Towards
-  Impartial Multi-Task Learning](https://openreview.net/pdf?id=IMPnRXEWpvr) (ICLR 2021), a stateful
+  Impartial Multi-Task Learning](https://www.semanticscholar.org/paper/Towards-Impartial-Multi-task-Learning-Liu-Li/45c0828baec1dd53b81f1b2635788fdf27d0792d) (ICLR 2021), a stateful
   `Scalarizer` that learns a per-task scale `s_i` and combines the values as
   `Σ (exp(s_i) · L_i − s_i)`.
 - Added `UW` (Uncertainty Weighting) from [Multi-Task Learning Using Uncertainty to Weigh Losses
@@ -74,7 +74,7 @@ changelog does not include internal changes that do not affect the user.
 ### Added
 
 - Added `STCH` from [Smooth Tchebycheff Scalarization for Multi-Objective
-  Optimization](https://openreview.net/pdf?id=m4dO5L6eCp), a `Scalarizer` that combines the input
+  Optimization](https://arxiv.org/abs/2402.19078), a `Scalarizer` that combines the input
   tensor of values into a smooth approximation of their (weighted, shifted) maximum.
 - Added `MoDoWeighting` from [Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance](https://www.jmlr.org/papers/volume25/23-1287/23-1287.pdf) (JMLR 2024). It is a stateful `Weighting` that maintains task weights across calls via a simplex-projected gradient step on a cross-batch matrix `G = J_1 @ J_2.T`, computed from two independent mini-batches using `autojac.jac`.
 - Added `GeometricMean` (also known as GLS) studied in [MultiNet++: Multi-Stream Feature
diff --git a/README.md b/README.md
@@ -162,7 +162,7 @@ TorchJD provides many existing aggregators from the literature, listed in the fo
 | [FairGrad](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGrad) | [FairGradWeighting](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGradWeighting) | [Fair Resource Allocation in Multi-Task Learning](https://arxiv.org/pdf/2402.15638) |
 | [GradDrop](https://torchjd.org/stable/docs/aggregation/graddrop#torchjd.aggregation.GradDrop) | - | [Just Pick a Sign: Optimizing Deep Multitask Models with Gradient Sign Dropout](https://arxiv.org/pdf/2010.06808) |
 | [GradVac](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVac) | [GradVacWeighting](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVacWeighting) | [Gradient Vaccine: Investigating and Improving Multi-task Optimization in Massively Multilingual Models](https://arxiv.org/pdf/2010.05874) |
-| [IMTLG](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLG) | [IMTLGWeighting](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting) | [Towards Impartial Multi-task Learning](https://discovery.ucl.ac.uk/id/eprint/10120667/) |
+| [IMTLG](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLG) | [IMTLGWeighting](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting) | [Towards Impartial Multi-task Learning](https://www.semanticscholar.org/paper/Towards-Impartial-Multi-task-Learning-Liu-Li/45c0828baec1dd53b81f1b2635788fdf27d0792d) |
 | [Krum](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.Krum) | [KrumWeighting](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.KrumWeighting) | [Machine Learning with Adversaries: Byzantine Tolerant Gradient Descent](https://proceedings.neurips.cc/paper/2017/file/f4b9ec30ad9f68f89b29639786cb62ef-Paper.pdf) |
 | [Mean](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.Mean) | [MeanWeighting](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.MeanWeighting) | - |
 | [MGDA](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDA) | [MGDAWeighting](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDAWeighting) | [Multiple-gradient descent algorithm (MGDA) for multiobjective optimization](https://comptes-rendus.academie-sciences.fr/mathematique/articles/10.1016/j.crma.2012.03.014/) |
diff --git a/src/torchjd/aggregation/_gradvac.py b/src/torchjd/aggregation/_gradvac.py
@@ -135,7 +135,7 @@ class GradVac(GramianWeightedAggregator, Stateful, _NonDifferentiable):
     :class:`~torchjd.aggregation.GramianWeightedAggregator` implementing the aggregation step of
     Gradient Vaccine (GradVac) from `Gradient Vaccine: Investigating and Improving Multi-task
     Optimization in Massively Multilingual Models (ICLR 2021 Spotlight)
-    <https://openreview.net/forum?id=F1vEjWK-lH_>`_.
+    <https://arxiv.org/abs/2010.05874>`_.
 
     For each task :math:`i`, the order in which other tasks :math:`j` are visited is drawn at
     random. For each pair :math:`(i, j)`, the cosine similarity :math:`\phi_{ij}` between the
diff --git a/src/torchjd/scalarization/_imtl_l.py b/src/torchjd/scalarization/_imtl_l.py
@@ -14,7 +14,7 @@ class IMTLL(Scalarizer, Stateful):
     :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using learned
     per-task scales. ``IMTL-L`` is the loss-balancing variant of Impartial
     Multi-Task Learning, proposed in `Towards Impartial Multi-Task Learning
-    <https://openreview.net/pdf?id=IMPnRXEWpvr>`_.
+    <https://www.semanticscholar.org/paper/Towards-Impartial-Multi-task-Learning-Liu-Li/45c0828baec1dd53b81f1b2635788fdf27d0792d>`_.
 
     Each value :math:`L_i` is assigned a learnable scale parameter :math:`s_i`, and the values are
     combined as
diff --git a/src/torchjd/scalarization/_stch.py b/src/torchjd/scalarization/_stch.py
@@ -8,7 +8,7 @@ class STCH(Scalarizer):
     r"""
     :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using smooth
     Tchebycheff scalarization, as defined in `Smooth Tchebycheff Scalarization for Multi-Objective
-    Optimization <https://openreview.net/pdf?id=m4dO5L6eCp>`_.
+    Optimization <https://arxiv.org/abs/2402.19078>`_.
 
     It returns