Skip to content

Commit 1aed3c3

Browse files
authored
feat(scalarization): Add pbi (#746)
1 parent c455c66 commit 1aed3c3

6 files changed

Lines changed: 213 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ changelog does not include internal changes that do not affect the user.
1414
Learning](https://arxiv.org/pdf/2103.13392) (ICDM 2021), a `Scalarizer` that combines a linear
1515
scalarization with a cosine-similarity penalty pulling the vector of values toward a preference
1616
direction.
17+
- Added `PBI` (Penalty-based Boundary Intersection) from [MOEA/D: A Multiobjective Evolutionary
18+
Algorithm Based on Decomposition](https://ieeexplore.ieee.org/document/4358754) (IEEE TEVC 2007), a
19+
`Scalarizer` that decomposes the values into a component along a preference direction and a
20+
penalized perpendicular component.
1721

1822
## [0.15.0] - 2026-06-15
1923

docs/source/docs/scalarization/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Abstract base class
2121
geometric_mean.rst
2222
imtl_l.rst
2323
mean.rst
24+
pbi.rst
2425
random.rst
2526
stch.rst
2627
sum.rst
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
:hide-toc:
2+
3+
PBI
4+
===
5+
6+
.. autoclass:: torchjd.scalarization.PBI
7+
:members: __call__

src/torchjd/scalarization/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from ._geometric_mean import GeometricMean
2727
from ._imtl_l import IMTLL
2828
from ._mean import Mean
29+
from ._pbi import PBI
2930
from ._random import Random
3031
from ._scalarizer_base import Scalarizer
3132
from ._stch import STCH
@@ -40,6 +41,7 @@
4041
"GeometricMean",
4142
"IMTLL",
4243
"Mean",
44+
"PBI",
4345
"Random",
4446
"Scalarizer",
4547
"STCH",

src/torchjd/scalarization/_pbi.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import torch
2+
from torch import Tensor
3+
4+
from ._scalarizer_base import Scalarizer
5+
6+
_EPSILON = 1e-12
7+
8+
9+
class PBI(Scalarizer):
10+
r"""
11+
:class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using the
12+
Penalty-based Boundary Intersection (PBI) scalarization, proposed in `MOEA/D: A Multiobjective
13+
Evolutionary Algorithm Based on Decomposition <https://ieeexplore.ieee.org/document/4358754>`_.
14+
15+
It decomposes the values, relative to a reference point, into a component along a preference
16+
direction and a component perpendicular to it, and penalizes the latter:
17+
18+
.. math::
19+
d_1 = (L - z^*)^\top \hat w, \qquad
20+
d_2 = \lVert (L - z^*) - d_1 \hat w \rVert, \qquad
21+
d_1 + \theta\, d_2,
22+
23+
where:
24+
25+
- :math:`L_i` is the :math:`i`-th input value (the :math:`i`-th objective);
26+
- :math:`z^*` is the reference (ideal) point (the ``reference`` parameter);
27+
- :math:`\hat w = w / \lVert w \rVert` is the normalized preference direction (the ``weights``
28+
parameter);
29+
- :math:`d_1` is the distance along the preference direction and :math:`d_2` is the distance to
30+
it;
31+
- :math:`\theta` is the penalty coefficient applied to :math:`d_2` (the ``theta`` parameter).
32+
33+
:param theta: The penalty coefficient :math:`\theta` applied to the perpendicular distance. Must
34+
be non-negative. A value of ``0`` reduces PBI to the projection onto the preference
35+
direction. The paper uses ``5`` in its experiments; there is no single best value, and the
36+
paper notes that a too large or too small value worsens the result.
37+
:param weights: The preference vector :math:`w`, giving the direction along which the values are
38+
decomposed. Its values should be non-negative. It must have the same shape as the values
39+
passed at call time. To approximate the whole Pareto front rather than a single trade-off, it
40+
should be re-sampled from a Dirichlet distribution and reassigned before every call, e.g. for
41+
``m`` objectives ``pbi.weights = torch.distributions.Dirichlet(torch.ones(m)).sample()``.
42+
:param reference: The reference (ideal) point :math:`z^*` subtracted from the values. It should
43+
be a lower bound on the values. If ``None``, the origin is used, which assumes non-negative
44+
values. If provided, it must have the same shape as the values passed at call time.
45+
46+
.. note::
47+
:math:`d_2` is a Euclidean norm, whose gradient is undefined when the values lie exactly on
48+
the preference direction (:math:`d_2 = 0`). To keep the gradient finite there, a small
49+
constant is added under the square root; this shifts the result by at most around
50+
:math:`10^{-6}` at that point and is negligible elsewhere.
51+
"""
52+
53+
def __init__(self, theta: float, weights: Tensor, reference: Tensor | None = None) -> None:
54+
if theta < 0.0:
55+
raise ValueError(f"Parameter `theta` should be non-negative. Found `theta = {theta}`.")
56+
57+
super().__init__()
58+
self.theta = theta
59+
self.weights = weights
60+
self.reference = reference
61+
62+
def forward(self, values: Tensor, /) -> Tensor:
63+
if self.weights.shape != values.shape:
64+
raise ValueError(
65+
f"Parameter `weights` should have the same shape as `values`. Found "
66+
f"`weights.shape = {tuple(self.weights.shape)}` and `values.shape = "
67+
f"{tuple(values.shape)}`."
68+
)
69+
if self.reference is not None and self.reference.shape != values.shape:
70+
raise ValueError(
71+
f"Parameter `reference` should have the same shape as `values`. Found "
72+
f"`reference.shape = {tuple(self.reference.shape)}` and `values.shape = "
73+
f"{tuple(values.shape)}`."
74+
)
75+
76+
shifted = values if self.reference is None else values - self.reference
77+
f = shifted.flatten()
78+
direction = self.weights.flatten()
79+
direction = direction / direction.norm()
80+
81+
d1 = f @ direction
82+
perpendicular = f - d1 * direction
83+
# `perpendicular` has a zero norm when the values lie exactly on the preference direction
84+
# (always the case for a single-objective input, which has no perpendicular component). The
85+
# norm's gradient is then undefined, so we add a small constant under the square root to keep
86+
# it finite; this shifts the result by at most around 1e-6 there and is negligible elsewhere.
87+
d2 = torch.sqrt(perpendicular @ perpendicular + _EPSILON)
88+
return d1 + self.theta * d2
89+
90+
def __repr__(self) -> str:
91+
return (
92+
f"{self.__class__.__name__}(theta={self.theta}, weights={self.weights!r}, "
93+
f"reference={self.reference!r})"
94+
)
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import torch
2+
from pytest import mark, raises
3+
from torch import Tensor
4+
from utils.tensors import tensor_
5+
6+
from torchjd.scalarization import PBI
7+
8+
from ._asserts import (
9+
assert_grad_flow,
10+
assert_permutation_invariant,
11+
assert_returns_scalar,
12+
)
13+
from ._inputs import all_inputs
14+
15+
16+
def _uniform(values: Tensor) -> Tensor:
17+
"""Uniform preference vector matching the shape of `values`."""
18+
return torch.full_like(values, 1.0 / values.numel())
19+
20+
21+
def test_value() -> None:
22+
# direction = [1, 1] / sqrt(2). For [2, 0]: d1 = sqrt(2), perpendicular = [1, -1] so
23+
# d2 = sqrt(2), and d1 + theta * d2 = 2 * sqrt(2).
24+
out = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
25+
torch.testing.assert_close(out, tensor_(2.0) * tensor_(2.0).sqrt())
26+
27+
28+
def test_theta_zero_is_projection() -> None:
29+
# With theta = 0 only the projection d1 remains. For [2, 0] onto [1, 1] / sqrt(2): d1 = sqrt(2).
30+
out = PBI(theta=0.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
31+
torch.testing.assert_close(out, tensor_(2.0).sqrt())
32+
33+
34+
def test_reference_shifts_values() -> None:
35+
# Subtracting the reference [1, 1] from [3, 1] gives [2, 0], matching the no-reference case.
36+
with_reference = PBI(theta=1.0, weights=tensor_([1.0, 1.0]), reference=tensor_([1.0, 1.0]))
37+
out = with_reference(tensor_([3.0, 1.0]))
38+
expected = PBI(theta=1.0, weights=tensor_([1.0, 1.0]))(tensor_([2.0, 0.0]))
39+
torch.testing.assert_close(out, expected)
40+
41+
42+
def test_full_formula() -> None:
43+
values = tensor_([1.0, 2.0, 4.0])
44+
weights = tensor_([0.5, 0.3, 0.2])
45+
reference = tensor_([0.5, 0.5, 0.5])
46+
theta = 5.0
47+
shifted = values - reference
48+
direction = weights / weights.norm()
49+
d1 = (shifted * direction).sum()
50+
d2 = (shifted - d1 * direction).norm()
51+
expected = d1 + theta * d2
52+
torch.testing.assert_close(PBI(theta, weights=weights, reference=reference)(values), expected)
53+
54+
55+
def test_finite_when_values_on_preference_ray() -> None:
56+
# When the values lie exactly on the preference direction, d2 = 0. The constant under the square
57+
# root keeps both the value and the gradient finite (no nan), which is the whole point of the
58+
# stabilization.
59+
weights = tensor_([1.0, 2.0])
60+
leaf = weights.detach().clone().requires_grad_() # values == weights, so they are on the ray.
61+
out = PBI(theta=5.0, weights=weights)(leaf)
62+
out.backward()
63+
assert out.isfinite()
64+
assert leaf.grad is not None
65+
assert leaf.grad.isfinite().all()
66+
67+
68+
@mark.parametrize("values", all_inputs)
69+
def test_expected_structure(values: Tensor) -> None:
70+
assert_returns_scalar(PBI(theta=5.0, weights=_uniform(values)), values)
71+
72+
73+
@mark.parametrize("values", all_inputs)
74+
def test_grad_flow(values: Tensor) -> None:
75+
assert_grad_flow(PBI(theta=5.0, weights=_uniform(values)), values)
76+
77+
78+
@mark.parametrize("values", all_inputs)
79+
def test_permutation_invariant(values: Tensor) -> None:
80+
# With uniform weights and no reference, both d1 and d2 are symmetric in the inputs.
81+
assert_permutation_invariant(PBI(theta=5.0, weights=_uniform(values)), values)
82+
83+
84+
@mark.parametrize("theta", [-1.0, -0.5])
85+
def test_raises_on_negative_theta(theta: float) -> None:
86+
with raises(ValueError):
87+
PBI(theta=theta, weights=tensor_([0.5, 0.5]))
88+
89+
90+
def test_raises_on_weights_shape_mismatch() -> None:
91+
scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0, 1.0]))
92+
with raises(ValueError):
93+
scalarizer(tensor_([1.0, 1.0]))
94+
95+
96+
def test_raises_on_reference_shape_mismatch() -> None:
97+
scalarizer = PBI(theta=5.0, weights=tensor_([1.0, 1.0]), reference=tensor_([0.0, 0.0, 0.0]))
98+
with raises(ValueError):
99+
scalarizer(tensor_([1.0, 1.0]))
100+
101+
102+
def test_representations() -> None:
103+
s = PBI(theta=5.0, weights=torch.tensor([0.5, 0.5]))
104+
assert repr(s) == "PBI(theta=5.0, weights=tensor([0.5000, 0.5000]), reference=None)"
105+
assert str(s) == "PBI"

0 commit comments

Comments
 (0)