add truncerr

Jutho · Jutho · commit aec6c917de98 · 2025-10-10T01:05:21.000+02:00
diff --git a/ext/MatrixAlgebraKitChainRulesCoreExt.jl b/ext/MatrixAlgebraKitChainRulesCoreExt.jl
@@ -2,7 +2,7 @@ module MatrixAlgebraKitChainRulesCoreExt
 
 using MatrixAlgebraKit
 using MatrixAlgebraKit: copy_input, initialize_output, zero!, diagview,
-    TruncatedAlgorithm, findtruncated, findtruncated_svd
+    TruncatedAlgorithm, findtruncated, findtruncated_svd, compute_truncerr!
 using ChainRulesCore
 using LinearAlgebra
 
@@ -113,15 +113,20 @@ for eig in (:eig, :eigh)
             Ac = copy_input($eig_f, A)
             DV = $(eig_f!)(Ac, DV, alg.alg)
             DV′, ind = MatrixAlgebraKit.truncate($eig_t!, DV, alg.trunc)
-            return DV′, $(_make_eig_t_pb)(A, DV, ind)
+            ϵ = compute_truncerr!(diagview(copy(DV[1])), ind)
+            return (DV′..., ϵ), $(_make_eig_t_pb)(A, DV, ind)
         end
         function $(_make_eig_t_pb)(A, DV, ind)
-            function $eig_t_pb(ΔDV)
+            function $eig_t_pb(ΔDVϵ)
                 ΔA = zero(A)
-                MatrixAlgebraKit.$eig_pb!(ΔA, A, DV, unthunk.(ΔDV), ind)
+                ΔD, ΔV, Δϵ = ΔDVϵ
+                if !MatrixAlgebraKit.iszerotangent(Δϵ) && !iszero(unthunk(Δϵ))
+                    throw(ArgumentError("Pullback for eig_trunc! does not yet support non-zero tangent for the truncation error"))
+                end
+                MatrixAlgebraKit.$eig_pb!(ΔA, A, DV, unthunk.((ΔD, ΔV)), ind)
                 return NoTangent(), ΔA, ZeroTangent(), NoTangent()
             end
-            function $eig_t_pb(::Tuple{ZeroTangent, ZeroTangent}) # is this extra definition useful?
+            function $eig_t_pb(::Tuple{ZeroTangent, ZeroTangent, ZeroTangent}) # is this extra definition useful?
                 return NoTangent(), ZeroTangent(), ZeroTangent(), NoTangent()
             end
             return $eig_t_pb
@@ -152,15 +157,20 @@ function ChainRulesCore.rrule(::typeof(svd_trunc!), A, USVᴴ, alg::TruncatedAlg
     Ac = copy_input(svd_compact, A)
     USVᴴ = svd_compact!(Ac, USVᴴ, alg.alg)
     USVᴴ′, ind = MatrixAlgebraKit.truncate(svd_trunc!, USVᴴ, alg.trunc)
-    return USVᴴ′, _make_svd_trunc_pullback(A, USVᴴ, ind)
+    ϵ = compute_truncerr!(diagview(copy(USVᴴ[2])), ind)
+    return (USVᴴ′..., ϵ), _make_svd_trunc_pullback(A, USVᴴ, ind)
 end
 function _make_svd_trunc_pullback(A, USVᴴ, ind)
-    function svd_trunc_pullback(ΔUSVᴴ)
+    function svd_trunc_pullback(ΔUSVᴴϵ)
         ΔA = zero(A)
-        MatrixAlgebraKit.svd_pullback!(ΔA, A, USVᴴ, unthunk.(ΔUSVᴴ), ind)
+        ΔU, ΔS, ΔVᴴ, Δϵ = ΔUSVᴴϵ
+        if !MatrixAlgebraKit.iszerotangent(Δϵ) && !iszero(unthunk(Δϵ))
+            throw(ArgumentError("Pullback for svd_trunc! does not yet support non-zero tangent for the truncation error"))
+        end
+        MatrixAlgebraKit.svd_pullback!(ΔA, A, USVᴴ, unthunk.((ΔU, ΔS, ΔVᴴ)), ind)
         return NoTangent(), ΔA, ZeroTangent(), NoTangent()
     end
-    function svd_trunc_pullback(::Tuple{ZeroTangent, ZeroTangent, ZeroTangent}) # is this extra definition useful?
+    function svd_trunc_pullback(::Tuple{ZeroTangent, ZeroTangent, ZeroTangent, ZeroTangent}) # is this extra definition useful?
         return NoTangent(), ZeroTangent(), ZeroTangent(), NoTangent()
     end
     return svd_trunc_pullback
diff --git a/src/implementations/eig.jl b/src/implementations/eig.jl
@@ -108,7 +108,8 @@ end
 
 function eig_trunc!(A, DV, alg::TruncatedAlgorithm)
     D, V = eig_full!(A, DV, alg.alg)
-    return first(truncate(eig_trunc!, (D, V), alg.trunc))
+    DVtrunc, ind = truncate(eig_trunc!, (D, V), alg.trunc)
+    return DVtrunc..., compute_truncerr!(diagview(D), ind)
 end
 
 # Diagonal logic
diff --git a/src/implementations/eigh.jl b/src/implementations/eigh.jl
@@ -111,7 +111,8 @@ end
 
 function eigh_trunc!(A, DV, alg::TruncatedAlgorithm)
     D, V = eigh_full!(A, DV, alg.alg)
-    return first(truncate(eigh_trunc!, (D, V), alg.trunc))
+    DVtrunc, ind = truncate(eigh_trunc!, (D, V), alg.trunc)
+    return DVtrunc..., compute_truncerr!(diagview(D), ind)
 end
 
 # Diagonal logic
diff --git a/src/implementations/svd.jl b/src/implementations/svd.jl
@@ -237,8 +237,9 @@ function svd_vals!(A::AbstractMatrix, S, alg::LAPACK_SVDAlgorithm)
 end
 
 function svd_trunc!(A, USVᴴ, alg::TruncatedAlgorithm)
-    USVᴴ′ = svd_compact!(A, USVᴴ, alg.alg)
-    return first(truncate(svd_trunc!, USVᴴ′, alg.trunc))
+    U, S, Vᴴ = svd_compact!(A, USVᴴ, alg.alg)
+    USVᴴtrunc, ind = truncate(svd_trunc!, (U, S, Vᴴ), alg.trunc)
+    return USVᴴtrunc..., compute_truncerr!(diagview(S), ind)
 end
 
 # Diagonal logic
diff --git a/src/implementations/truncation.jl b/src/implementations/truncation.jl
@@ -116,3 +116,10 @@ end
 _ind_intersect(A::AbstractVector, B::AbstractVector{Bool}) = _ind_intersect(B, A)
 _ind_intersect(A::AbstractVector{Bool}, B::AbstractVector{Bool}) = A .& B
 _ind_intersect(A, B) = intersect(A, B)
+
+# Compute truncation error as 2-norm of discarded values
+# by destroying original values
+function compute_truncerr!(values::AbstractVector, ind)
+    values[ind] .= zero(eltype(values))
+    return norm(values)
+end
diff --git a/src/interface/eig.jl b/src/interface/eig.jl
@@ -31,16 +31,19 @@ See also [`eig_vals(!)`](@ref eig_vals) and [`eig_trunc(!)`](@ref eig_trunc).
 @functiondef eig_full
 
 """
-    eig_trunc(A; kwargs...) -> D, V
-    eig_trunc(A, alg::AbstractAlgorithm) -> D, V
-    eig_trunc!(A, [DV]; kwargs...) -> D, V
-    eig_trunc!(A, [DV], alg::AbstractAlgorithm) -> D, V
+    eig_trunc(A; kwargs...) -> D, V, ϵ
+    eig_trunc(A, alg::AbstractAlgorithm) -> D, V, ϵ
+    eig_trunc!(A, [DV]; kwargs...) -> D, V, ϵ
+    eig_trunc!(A, [DV], alg::AbstractAlgorithm) -> D, V, ϵ
 
 Compute a partial or truncated eigenvalue decomposition of the matrix `A`,
 such that `A * V ≈ V * D`, where the (possibly rectangular) matrix `V` contains 
 a subset of eigenvectors and the diagonal matrix `D` contains the associated eigenvalues,
 selected according to a truncation strategy.
 
+The function also returns `ϵ`, the truncation error defined as the 2-norm of the 
+discarded eigenvalues.
+
 !!! note
     The bang method `eig_trunc!` optionally accepts the output structure and
     possibly destroys the input matrix `A`. Always use the return value of the function
diff --git a/src/interface/eigh.jl b/src/interface/eigh.jl
@@ -9,15 +9,18 @@ For generic eigenvalue decompositions, see [`eig_full`](@ref).
 """
 
 """
-    eigh_full(A; kwargs...) -> D, V
-    eigh_full(A, alg::AbstractAlgorithm) -> D, V
-    eigh_full!(A, [DV]; kwargs...) -> D, V
-    eigh_full!(A, [DV], alg::AbstractAlgorithm) -> D, V
+    eigh_full(A; kwargs...) -> D, V, ϵ
+    eigh_full(A, alg::AbstractAlgorithm) -> D, V, ϵ
+    eigh_full!(A, [DV]; kwargs...) -> D, V, ϵ
+    eigh_full!(A, [DV], alg::AbstractAlgorithm) -> D, V, ϵ
 
 Compute the full eigenvalue decomposition of the symmetric or hermitian matrix `A`,
 such that `A * V = V * D`, where the unitary matrix `V` contains the orthogonal eigenvectors
 and the real diagonal matrix `D` contains the associated eigenvalues.
 
+The function also returns `ϵ`, the truncation error defined as the 2-norm of the 
+discarded eigenvalues.
+
 !!! note
     The bang method `eigh_full!` optionally accepts the output structure and
     possibly destroys the input matrix `A`. Always use the return value of the function
diff --git a/src/interface/svd.jl b/src/interface/svd.jl
@@ -43,16 +43,19 @@ See also [`svd_full(!)`](@ref svd_full), [`svd_vals(!)`](@ref svd_vals) and
 
 # TODO: decide if we should have `svd_trunc!!` instead
 """
-    svd_trunc(A; kwargs...) -> U, S, Vᴴ
-    svd_trunc(A, alg::AbstractAlgorithm) -> U, S, Vᴴ
-    svd_trunc!(A, [USVᴴ]; kwargs...) -> U, S, Vᴴ
-    svd_trunc!(A, [USVᴴ], alg::AbstractAlgorithm) -> U, S, Vᴴ
+    svd_trunc(A; kwargs...) -> U, S, Vᴴ, ϵ
+    svd_trunc(A, alg::AbstractAlgorithm) -> U, S, Vᴴ, ϵ
+    svd_trunc!(A, [USVᴴ]; kwargs...) -> U, S, Vᴴ, ϵ
+    svd_trunc!(A, [USVᴴ], alg::AbstractAlgorithm) -> U, S, Vᴴ, ϵ
 
 Compute a partial or truncated singular value decomposition (SVD) of `A`, such that
-`A * (Vᴴ)' =  U * S`. Here, `U` is an isometric matrix (orthonormal columns) of size
+`A * (Vᴴ)' ≈ U * S`. Here, `U` is an isometric matrix (orthonormal columns) of size
 `(m, k)`, whereas  `Vᴴ` is a matrix of size `(k, n)` with orthonormal rows and `S` is a
 square diagonal matrix of size `(k, k)`, with `k` is set by the truncation strategy.
 
+The function also returns `ϵ`, the truncation error defined as the 2-norm of the 
+discarded singular values.
+        
 !!! note
     The bang method `svd_trunc!` optionally accepts the output structure and
     possibly destroys the input matrix `A`. Always use the return value of the function
diff --git a/test/amd/eigh.jl b/test/amd/eigh.jl
@@ -46,14 +46,14 @@ end
         r = m - 2
         s = 1 + sqrt(eps(real(T)))
 
-        D1, V1 = @constinferred eigh_trunc(A; alg, trunc=truncrank(r))
+        D1, V1, ϵ1 = @constinferred eigh_trunc(A; alg, trunc=truncrank(r))
         @test length(diagview(D1)) == r
         @test isisometry(V1)
         @test A * V1 ≈ V1 * D1
         @test LinearAlgebra.opnorm(A - V1 * D1 * V1') ≈ D₀[r + 1]
 
         trunc = trunctol(; atol=s * D₀[r + 1])
-        D2, V2 = @constinferred eigh_trunc(A; alg, trunc)
+        D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg, trunc)
         @test length(diagview(D2)) == r
         @test isisometry(V2)
         @test A * V2 ≈ V2 * D2
@@ -75,7 +75,7 @@ end
     A = V * D * V'
     A = (A + A') / 2
     alg = TruncatedAlgorithm(CUSOLVER_QRIteration(), truncrank(2))
-    D2, V2 = @constinferred eigh_trunc(A; alg)
+    D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg)
     @test diagview(D2) ≈ diagview(D)[1:2] rtol = sqrt(eps(real(T)))
     @test_throws ArgumentError eigh_trunc(A; alg, trunc=(; maxrank=2))
 end=#
diff --git a/test/amd/svd.jl b/test/amd/svd.jl
@@ -94,23 +94,23 @@ end
 #         algs = (LAPACK_DivideAndConquer(), LAPACK_QRIteration(), LAPACK_Bisection(),
 #                 LAPACK_Jacobi())
 #     end
-
+#
 #     @testset "size ($m, $n)" for n in (37, m, 63)
 #         @testset "algorithm $alg" for alg in algs
 #             n > m && alg isa LAPACK_Jacobi && continue # not supported
 #             A = randn(rng, T, m, n)
 #             S₀ = svd_vals(A)
 #             minmn = min(m, n)
 #             r = minmn - 2
-
-#             U1, S1, V1ᴴ = @constinferred svd_trunc(A; alg, trunc=truncrank(r))
+#
+#             U1, S1, V1ᴴ, ϵ1 = @constinferred svd_trunc(A; alg, trunc=truncrank(r))
 #             @test length(S1.diag) == r
 #             @test LinearAlgebra.opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
-
+#
 #             s = 1 + sqrt(eps(real(T)))
 #             trunc2 = trunctol(; atol=s * S₀[r + 1])
-
-#             U2, S2, V2ᴴ = @constinferred svd_trunc(A; alg, trunc=trunctol(; atol=s * S₀[r + 1]))
+#
+#             U2, S2, V2ᴴ, ϵ2 = @constinferred svd_trunc(A; alg, trunc=trunctol(; atol=s * S₀[r + 1]))
 #             @test length(S2.diag) == r
 #             @test U1 ≈ U2
 #             @test S1 ≈ S2
diff --git a/test/chainrules.jl b/test/chainrules.jl
@@ -275,7 +275,7 @@ end
             ΔVtrunc = ΔV[:, ind]
             test_rrule(
                 copy_eig_trunc, A, truncalg ⊢ NoTangent();
-                output_tangent = (ΔDtrunc, ΔVtrunc),
+                output_tangent = (ΔDtrunc, ΔVtrunc, zero(real(T))),
                 atol = atol, rtol = rtol
             )
             dA1 = MatrixAlgebraKit.eig_pullback!(zero(A), A, (D, V), (ΔDtrunc, ΔVtrunc), ind)
@@ -290,7 +290,7 @@ end
         ΔVtrunc = ΔV[:, ind]
         test_rrule(
             copy_eig_trunc, A, truncalg ⊢ NoTangent();
-            output_tangent = (ΔDtrunc, ΔVtrunc),
+            output_tangent = (ΔDtrunc, ΔVtrunc, zero(real(T))),
             atol = atol, rtol = rtol
         )
         dA1 = MatrixAlgebraKit.eig_pullback!(zero(A), A, (D, V), (ΔDtrunc, ΔVtrunc), ind)
@@ -351,7 +351,7 @@ end
             ΔVtrunc = ΔV[:, ind]
             test_rrule(
                 copy_eigh_trunc, A, truncalg ⊢ NoTangent();
-                output_tangent = (ΔDtrunc, ΔVtrunc),
+                output_tangent = (ΔDtrunc, ΔVtrunc, zero(real(T))),
                 atol = atol, rtol = rtol
             )
             dA1 = MatrixAlgebraKit.eigh_pullback!(zero(A), A, (D, V), (ΔDtrunc, ΔVtrunc), ind)
@@ -366,7 +366,7 @@ end
         ΔVtrunc = ΔV[:, ind]
         test_rrule(
             copy_eigh_trunc, A, truncalg ⊢ NoTangent();
-            output_tangent = (ΔDtrunc, ΔVtrunc),
+            output_tangent = (ΔDtrunc, ΔVtrunc, zero(real(T))),
             atol = atol, rtol = rtol
         )
         dA1 = MatrixAlgebraKit.eigh_pullback!(zero(A), A, (D, V), (ΔDtrunc, ΔVtrunc), ind)
@@ -399,7 +399,7 @@ end
         test_rrule(
             config, eigh_trunc2, A;
             fkwargs = (; trunc = trunc),
-            output_tangent = (ΔD[ind, ind], ΔV[:, ind]),
+            output_tangent = (ΔD[ind, ind], ΔV[:, ind], zero(real(T))),
             atol = atol, rtol = rtol, rrule_f = rrule_via_ad, check_inferred = false
         )
     end
@@ -408,7 +408,7 @@ end
     test_rrule(
         config, eigh_trunc2, A;
         fkwargs = (; trunc = trunc),
-        output_tangent = (ΔD[ind, ind], ΔV[:, ind]),
+        output_tangent = (ΔD[ind, ind], ΔV[:, ind], zero(real(T))),
         atol = atol, rtol = rtol, rrule_f = rrule_via_ad, check_inferred = false
     )
 end
@@ -446,7 +446,7 @@ end
                 ΔVᴴtrunc = ΔVᴴ[ind, :]
                 test_rrule(
                     copy_svd_trunc, A, truncalg ⊢ NoTangent();
-                    output_tangent = (ΔUtrunc, ΔStrunc, ΔVᴴtrunc),
+                    output_tangent = (ΔUtrunc, ΔStrunc, ΔVᴴtrunc, zero(real(T))),
                     atol = atol, rtol = rtol
                 )
                 dA1 = MatrixAlgebraKit.svd_pullback!(zero(A), A, (U, S, Vᴴ), (ΔUtrunc, ΔStrunc, ΔVᴴtrunc), ind)
@@ -463,7 +463,7 @@ end
             ΔVᴴtrunc = ΔVᴴ[ind, :]
             test_rrule(
                 copy_svd_trunc, A, truncalg ⊢ NoTangent();
-                output_tangent = (ΔUtrunc, ΔStrunc, ΔVᴴtrunc),
+                output_tangent = (ΔUtrunc, ΔStrunc, ΔVᴴtrunc, zero(real(T))),
                 atol = atol, rtol = rtol
             )
             dA1 = MatrixAlgebraKit.svd_pullback!(zero(A), A, (U, S, Vᴴ), (ΔUtrunc, ΔStrunc, ΔVᴴtrunc), ind)
@@ -488,7 +488,7 @@ end
             test_rrule(
                 config, svd_trunc, A;
                 fkwargs = (; trunc = trunc),
-                output_tangent = (ΔU[:, ind], ΔS[ind, ind], ΔVᴴ[ind, :]),
+                output_tangent = (ΔU[:, ind], ΔS[ind, ind], ΔVᴴ[ind, :], zero(real(T))),
                 atol = atol, rtol = rtol, rrule_f = rrule_via_ad, check_inferred = false
             )
         end
@@ -497,7 +497,7 @@ end
         test_rrule(
             config, svd_trunc, A;
             fkwargs = (; trunc = trunc),
-            output_tangent = (ΔU[:, ind], ΔS[ind, ind], ΔVᴴ[ind, :]),
+            output_tangent = (ΔU[:, ind], ΔS[ind, ind], ΔVᴴ[ind, :], zero(real(T))),
             atol = atol, rtol = rtol, rrule_f = rrule_via_ad, check_inferred = false
         )
     end
diff --git a/test/cuda/eig.jl b/test/cuda/eig.jl
@@ -44,13 +44,13 @@ end
         rmin = findfirst(i -> abs(D₀[end - i]) != abs(D₀[end - i - 1]), 1:(m - 2))
         r = length(D₀) - rmin
 
-        D1, V1 = @constinferred eig_trunc(A; alg, trunc=truncrank(r))
+        D1, V1, ϵ1 = @constinferred eig_trunc(A; alg, trunc=truncrank(r))
         @test length(D1.diag) == r
         @test A * V1 ≈ V1 * D1
 
         s = 1 + sqrt(eps(real(T)))
         trunc = trunctol(; atol=s * abs(D₀[r + 1]))
-        D2, V2 = @constinferred eig_trunc(A; alg, trunc)
+        D2, V2, ϵ2 = @constinferred eig_trunc(A; alg, trunc)
         @test length(diagview(D2)) == r
         @test A * V2 ≈ V2 * D2
 
diff --git a/test/cuda/eigh.jl b/test/cuda/eigh.jl
@@ -41,14 +41,14 @@ end
         r = m - 2
         s = 1 + sqrt(eps(real(T)))
 
-        D1, V1 = @constinferred eigh_trunc(A; alg, trunc=truncrank(r))
+        D1, V1, ϵ1 = @constinferred eigh_trunc(A; alg, trunc=truncrank(r))
         @test length(diagview(D1)) == r
         @test isisometry(V1)
         @test A * V1 ≈ V1 * D1
         @test LinearAlgebra.opnorm(A - V1 * D1 * V1') ≈ D₀[r + 1]
 
         trunc = trunctol(; atol = s * D₀[r + 1])
-        D2, V2 = @constinferred eigh_trunc(A; alg, trunc)
+        D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg, trunc)
         @test length(diagview(D2)) == r
         @test isisometry(V2)
         @test A * V2 ≈ V2 * D2
@@ -70,7 +70,7 @@ end
     A = V * D * V'
     A = (A + A') / 2
     alg = TruncatedAlgorithm(CUSOLVER_QRIteration(), truncrank(2))
-    D2, V2 = @constinferred eigh_trunc(A; alg)
+    D2, V2, ϵ2 = @constinferred eigh_trunc(A; alg)
     @test diagview(D2) ≈ diagview(D)[1:2] rtol = sqrt(eps(real(T)))
     @test_throws ArgumentError eigh_trunc(A; alg, trunc=(; maxrank=2))
 end=#
diff --git a/test/cuda/svd.jl b/test/cuda/svd.jl
@@ -103,15 +103,15 @@ end
             minmn = min(m, n)
             r = k
 
-            U1, S1, V1ᴴ = @constinferred svd_trunc(A; alg, trunc = truncrank(r))
+            U1, S1, V1ᴴ, ϵ1 = @constinferred svd_trunc(A; alg, trunc = truncrank(r))
             @test length(S1.diag) == r
             @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
 
             if !(alg isa CUSOLVER_Randomized)
                 s = 1 + sqrt(eps(real(T)))
                 trunc2 = trunctol(; atol = s * S₀[r + 1])
 
-                U2, S2, V2ᴴ = @constinferred svd_trunc(A; alg, trunc = trunctol(; atol = s * S₀[r + 1]))
+                U2, S2, V2ᴴ, ϵ2 = @constinferred svd_trunc(A; alg, trunc = trunctol(; atol = s * S₀[r + 1]))
                 @test length(S2.diag) == r
                 @test U1 ≈ U2
                 @test parent(S1) ≈ parent(S2)
diff --git a/test/eig.jl b/test/eig.jl
diff --git a/test/eigh.jl b/test/eigh.jl
diff --git a/test/svd.jl b/test/svd.jl