fix: formalize notations and change signs in dual variables (#21)

klamike · gdalle · web-flow · commit 9c8296e196e4 · 2026-05-10T10:23:45.000+02:00
* update dual objective to use `lᵀ|y|⁺ - uᵀ|y|⁻` convention

* [skip ci] typo

* store `c - Aᵀy` in `scratch.x`

* rename `scratch.r` to `scratch.z`

* finish rename

* Add mathematical formulation

* Update src/components/errors.jl

* Rename variables

* Fix mul

---------

Co-authored-by: Guillaume Dalle &lt;22795598+gdalle@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -67,9 +67,12 @@ That's what makes it so cool.
 
 ## References
 
-> [PDLP: A Practical First-Order Method for Large-Scale Linear Programming](https://arxiv.org/abs/2501.07018), Applegate et al. (2025)
-
-> [An Overview of GPU-based First-Order Methods for Linear Programming and Extensions](https://arxiv.org/abs/2506.02174v1), Lu & Yang (2025)
+- [Practical Large-Scale Linear Programming using Primal-Dual Hybrid Gradient](https://arxiv.org/abs/2106.04756), Applegate et al. (2022)
+- [cuPDLP.jl: A GPU Implementation of Restarted Primal-Dual Hybrid Gradient for Linear Programming in Julia](https://arxiv.org/abs/2311.12180), Lu et al. (2024)
+- [cuPDLP-C: A Strengthened Implementation of cuPDLP for Linear Programming by C language](https://arxiv.org/abs/2312.14832), Lu et al. (2024)
+- [cuPDLPx: A Further Enhanced GPU-Based First-Order Solver for Linear Programming](https://arxiv.org/abs/2507.14051), Lu et al. (2025)
+- [PDLP: A Practical First-Order Method for Large-Scale Linear Programming](https://arxiv.org/abs/2501.07018), Applegate et al. (2025)
+- [An Overview of GPU-based First-Order Methods for Linear Programming and Extensions](https://arxiv.org/abs/2506.02174v1), Lu & Yang (2025)
 
 ## Roadmap
 
diff --git a/docs/make.jl b/docs/make.jl
@@ -22,8 +22,8 @@ makedocs(;
         "tutorial.md",
         "api.md",
         "Dev docs" => [
+            "math.md",
             "internals.md",
-            "preconditioning.md",
         ],
     ],
 )
diff --git a/docs/src/math.md b/docs/src/math.md
@@ -0,0 +1,135 @@
+# Math
+
+> See the references in the README for details, but beware that we adopt slightly different notational conventions.
+
+## Primal and dual problems
+
+PDLP solves Linear Programs (LPs) formulated as follows:
+
+```math
+\min_x \quad c^\top x \quad \text{s.t.} \quad \begin{cases}
+\ell_c \leq A x \leq u_c \\
+\ell_v \leq x \leq u_v
+\end{cases}
+```
+
+We associate non-negative multipliers $y_\ell, y_u, z_\ell, z_u \geq 0$ with all four inequality constraints, leading to the following Lagrangian:
+
+```math
+\begin{align*}
+\mathcal{L}(x, y_\ell, y_u, z_\ell, z_u)
+& = c^\top x + y_\ell^\top (\ell_c - A x) + y_u^\top (A x - u_c) + z_\ell^\top (\ell_v - x) + z_u^\top (x - u_v) \\
+& = (c - A^\top y_\ell + A^\top y_u - z_\ell + z_u)^\top x + (y_\ell^\top \ell_c - y_u^\top u_c) + (z_\ell^\top \ell_v - z_u^\top u_v)
+\end{align*}
+```
+
+We interpret signed multipliers $y_\ell, z_\ell$ and $y_u, z_u$ as the positive and negative parts of unsigned multipliers $y$ and $z$, associated with the constraints and the variable bounds respectively:
+
+```math
+y = y_\ell - y_u \quad \text{and} \quad z = z_\ell - z_u
+```
+
+which amounts to
+
+```math
+\begin{align*}
+y_\ell & = y^+ & z_\ell & = z^+ \\
+y_u & = y^- & z_u & = z^-
+\end{align*}
+```
+
+Note that if any of the bounds is infinite, the corresponding signed multiplier is constrained to be zero.
+We sum up these elementwise constraints by writing $y \in \mathcal{Y}$ and $z \in \mathcal{Z}$.
+
+We also define the shortcut
+
+```math
+p(y, \ell, u) = \ell^\top y^+ - u^\top y^-
+```
+
+which leaves us with
+
+```math
+\mathcal{L}(x, y, z) = (c - A^\top y - z)^\top x + p(y; \ell_c, u_c) + p(z; \ell_v, u_v)
+```
+
+From there, we deduce the dual problem:
+
+```math
+\max_{y, z} \quad p(y; \ell_c, u_c) + p(z; \ell_v, u_v) \quad \text{s.t.} \quad \begin{cases}
+0 = c - A^\top y - z \\
+y \in \mathcal{Y} \\
+z \in \mathcal{Z}
+\end{cases}
+```
+
+The primal-dual gap (one of our stopping criteria) thus writes as
+
+```math
+g = c^\top x - \left(p(y; \ell_c, u_c) + p(z; \ell_v, u_v)\right)
+```
+
+## Preconditioning
+
+The original problem $P$ and preconditioned problem $\tilde{P}$ are linked by:
+
+- Constraint matrix $\tilde{A} = D_1 A D_2$ so $A = D_1^{-1} \tilde{A} D_2^{-1}$
+- Transposed constraint matrix $\tilde{A}^\top = D_2 A^\top D_1$ so $A^\top = D_2^{-1} \tilde{A}^\top D_1^{-1}$
+- Primal variable $\tilde{x} = D_2^{-1} x$ so $x = D_2 \tilde{x}$
+- Dual variable for constraints $\tilde{y} = D_1^{-1} y$ so $y = D_1 \tilde{y}$, but $\tilde{\mathcal{Y}} = \mathcal{Y}$
+- Dual variable for bounds $\tilde{z} = D_2 z$ so $z = D_2^{-1} \tilde{z}$, but $\tilde{\mathcal{Z}} = \mathcal{Z}$
+- Cost $\tilde{c} = D_2 c$ so $c = D_2^{-1} \tilde{c}$
+- Bounds $(\tilde{\ell}_v, \tilde{u}_v) = D_2^{-1} (\ell_v, u_v)$ so $(\ell_v, u_v) = D_2 (\tilde{\ell}_v, \tilde{u}_v)$
+- Constraints $(\tilde{\ell}_c, \tilde{u}_c) = D_1 (\ell_c, u_c)$ so $(\ell_c, u_c) = D_1^{-1} (\tilde{\ell}_c, \tilde{u}_c)$
+
+Then we have the following terms in the KKT errors:
+
+```math
+\begin{align*}
+c - A^\top y - z
+& = D_2^{-1} \tilde{c} - D_2^{-1} \tilde{A}^\top D_1^{-1} D_1 \tilde{y} - D_2^{-1} \tilde{z} \\
+& = D_2^{-1}(\tilde{c} - \tilde{A}^\top \tilde{y} - \tilde{z})
+\end{align*}
+```
+
+```math
+\begin{align*}
+Ax - \mathrm{proj}_{[\ell_c,u_c]}(Ax)
+& = D_1^{-1} \tilde{A} D_2^{-1} D_2 \tilde{x} - \mathrm{proj}_{[D_1^{-1} \tilde{\ell}_c, D_1^{-1} \tilde{u}_c]} (D_1^{-1} \tilde{A} D_2^{-1} D_2 \tilde{x}) \\
+& = D_1^{-1} \tilde{A} \tilde{x} - \mathrm{proj}_{[D_1^{-1} \tilde{\ell}_c, D_1^{-1} \tilde{u}_c]} (D_1^{-1} \tilde{A} \tilde{x}) \\
+& = D_1^{-1} \left[\tilde{A} \tilde{x} - \mathrm{proj}_{[\tilde{\ell}_c, \tilde{u}_c]} (\tilde{A} \tilde{x})\right] \\
+\end{align*}
+```
+
+```math
+z - \mathrm{proj}_{\mathcal{Z}}(z) = D_2^{-1} \tilde{z} - \mathrm{proj}_{\tilde{\mathcal{Z}}}(D_2^{-1} \tilde{z}) = D_2^{-1} (\tilde{z} - \mathrm{proj}_{\tilde{\mathcal{Z}}}(\tilde{z}))
+```
+
+```math
+c^\top x = (D_2^{-1} \tilde{c})^\top (D_2 \tilde{x}) = \tilde{c}^\top D_2^{-1} D_2 \tilde{x} = \tilde{c}^\top \tilde{x}
+```
+
+```math
+\begin{align*}
+p(y; \ell_c, u_c)
+& = \ell_c^\top y^+ - u_c^\top y^- \\
+& = (D_1^{-1} \tilde{\ell}_c)^\top (D_1 \tilde{y})^+ - (D_1^{-1} \tilde{u}_c)^\top (D_1 \tilde{y})^- \\
+& = \tilde{\ell}_c^\top D_1^{-1} D_1 \tilde{y}^+ - \tilde{u}_c^\top D_1^{-1} D_1 \tilde{y}^- \\
+& = \tilde{\ell}_c^\top \tilde{y}^+ - \tilde{u}_c \tilde{y}^-
+\end{align*}
+```
+
+```math
+\begin{align*}
+p(z; \ell_v, u_v)
+& = \ell_v^\top z^+ - u_v^\top z^- \\
+& = (D_2 \tilde{\ell}_v)^\top (D_2^{-1} \tilde{z})^+ - (D_2 \tilde{u}_v)^\top (D_2^{-1} \tilde{z})^- \\
+& = \tilde{\ell}_v^\top D_2 D_2^{-1} \tilde{z}^+ - \tilde{u}_v^\top D_2 D_2^{-1} \tilde{z}^- \\
+& = \tilde{\ell}_v^\top \tilde{z}^+ - \tilde{u}_v^\top \tilde{z}^-
+\end{align*}
+```
+
+We make use of a few key observations:
+
+- Projection on $\mathcal{Z}$ commutes with scaling
+- Projection on an interval commutes with scaling if scaling is also applied to the interval in question
diff --git a/docs/src/preconditioning.md b/docs/src/preconditioning.md
diff --git a/src/algorithms/pdhg.jl b/src/algorithms/pdhg.jl
@@ -47,7 +47,7 @@ function initialize(
     η = fixed_stepsize(milp, algo.step_size)
     ω = one(η)
     step_sizes = StepSizes(; η, ω)
-    scratch = Scratch(; x = similar(sol.x), y = similar(sol.y), r = similar(sol.x))
+    scratch = Scratch(sol)
     stats = ConvergenceStats(T; starting_time)
     state = PDHGState(; sol, sol_last, step_sizes, scratch, stats)
     return state
diff --git a/src/algorithms/pdlp.jl b/src/algorithms/pdlp.jl
@@ -55,7 +55,7 @@ function initialize(
     η = fixed_stepsize(milp, algo.step_size)
     ω = primal_weight_init(milp, algo.step_size)
     step_sizes = StepSizes(; η, ω)
-    scratch = Scratch(; x = similar(sol.x), y = similar(sol.y), r = similar(sol.x))
+    scratch = Scratch(sol)
     iteration = IterationCounter(0, 0, 0)
     restart_stats = RestartStats(T)
     stats = ConvergenceStats(T; starting_time)
diff --git a/src/components/errors.jl b/src/components/errors.jl
@@ -71,31 +71,38 @@ function kkt_errors!(
     (; c, lv, uv, A, At, lc, uc, D1, D2) = milp
 
     A_x = mul!(scratch.y, A, x)
-    At_y = mul!(scratch.x, At, y)
-    r = @. scratch.r = proj_multiplier(c - At_y, lv, uv)
+    c_At_y = mul!(scratch.x, At, y, -one(T), zero(T))
+    c_At_y .+= c
+    z = @. scratch.z = proj_multiplier(c_At_y, lv, uv)
 
     primal_diff = @. scratch.y = inv(D1.diag) * (A_x - clamp(A_x, lc, uc))
     primal = norm(primal_diff)
+
     rescaled_combined_bounds = @. scratch.y = inv(D1.diag) * combine(lc, uc)
     primal_scale = one(T) + norm(rescaled_combined_bounds)
 
-    dual_diff = @. scratch.x = inv(D2.diag) * (c - At_y - r)
+    dual_diff = @. scratch.x = inv(D2.diag) * (c_At_y - z)
     dual = norm(dual_diff)
+
     rescaled_obj = @. scratch.x = inv(D2.diag) * c
     dual_scale = one(T) + norm(rescaled_obj)
 
+    # dual objective:   lᵀ|y|⁺ - uᵀ|y|⁻ + lᵥᵀ|z|⁺ - uᵥᵀ|z|⁻
+    #    We reformulate to ∑ⱼ (l⋅|y|⁺ - u⋅|y|⁻)ⱼ + ∑ᵢ (lᵥ⋅|z|⁺ - uᵥ⋅|z|⁻)ᵢ
+    #    where pc = (l⋅|y|⁺ - u⋅|y|⁻) and pv = (lᵥ⋅|z|⁺ - uᵥ⋅|z|⁻)
     pc = @. scratch.y = (
-        safeprod_left(uc, positive_part(-y)) - safeprod_left(lc, negative_part(-y))
+        safeprod_left(lc, positive_part(y)) - safeprod_left(uc, negative_part(y))
     )
-    pv = @. scratch.r = (
-        safeprod_left(uv, positive_part(-r)) - safeprod_left(lv, negative_part(-r))
+    pv = @. scratch.z = (
+        safeprod_left(lv, positive_part(z)) - safeprod_left(uv, negative_part(z))
     )
     pc_sum = sum(pc)
     pv_sum = sum(pv)
     cx = dot(c, x)
+    dobj = pc_sum + pv_sum
 
-    gap = abs(cx + pc_sum + pv_sum)
-    gap_scale = one(T) + abs(pc_sum + pv_sum) + abs(cx)
+    gap = abs(cx - dobj)
+    gap_scale = one(T) + abs(dobj) + abs(cx)
 
     err = KKTErrors(;
         primal,
diff --git a/src/components/scratch.jl b/src/components/scratch.jl
@@ -4,7 +4,7 @@
     "dual scratch (length `ncons`)"
     y::V
     "dual scratch (length `nvar`)"
-    r::V
+    z::V
 end
 
 Scratch(sol::PrimalDualSolution) = Scratch(similar(sol.x), similar(sol.y), similar(sol.x))
diff --git a/test/components/errors.jl b/test/components/errors.jl
@@ -7,7 +7,7 @@ function p(y, l, u)
     y⁻ = CoolPDLP.negative_part.(y)
     u_noinf = CoolPDLP.safe.(u)
     l_noinf = CoolPDLP.safe.(l)
-    return dot(y⁺, u_noinf) - dot(y⁻, l_noinf)
+    return dot(y⁺, l_noinf) - dot(y⁻, u_noinf)
 end
 
 milp, sol = CoolPDLP.random_milp_and_sol(100, 200, 0.4)
@@ -27,10 +27,10 @@ err_p = CoolPDLP.kkt_errors!(scratch, sol_p, milp_p)
 @testset "Correct KKT errors" begin
     @test err.primal ≈ norm(A * x - CoolPDLP.clamp.(A * x, lc, uc))
     @test err.dual ≈ norm(c - At * y - r)
-    @test err.gap ≈ abs(dot(c, x) + p(-y, lc, uc) + p(-r, lv, uv))
+    @test err.gap ≈ abs(dot(c, x) - (p(y, lc, uc) + p(r, lv, uv)))
     @test err.primal_scale ≈ 1 + norm(CoolPDLP.combine.(lc, uc))
     @test err.dual_scale ≈ 1 + norm(c)
-    @test err.gap_scale ≈ 1 + abs(dot(c, x)) + abs(p(-y, lc, uc) + p(-r, lv, uv))
+    @test err.gap_scale ≈ 1 + abs(dot(c, x)) + abs(p(y, lc, uc) + p(r, lv, uv))
 end
 
 @testset "Invariance by preconditioning" begin