-
Notifications
You must be signed in to change notification settings - Fork 7
Some basic svd forward rules and tests #247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 14 commits
33ab47b
838ecab
be9afda
883dadb
1e03d65
d8c9427
ec2a674
9d0f666
8940756
aa3a2ee
267886d
64c849f
dc7bbeb
c5cf3de
17c2158
c89643d
fbcd972
dd2c5a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,7 @@ using MatrixAlgebraKit: eig_trunc_pullback!, eigh_trunc_pullback!, eigh_vals_pul | |
| using MatrixAlgebraKit: left_polar_pullback!, right_polar_pullback! | ||
| using MatrixAlgebraKit: left_polar_pushforward!, right_polar_pushforward! | ||
| using MatrixAlgebraKit: svd_pullback!, svd_trunc_pullback!, svd_vals_pullback! | ||
| using MatrixAlgebraKit: svd_pushforward!, svd_trunc_pushforward!, svd_vals_pushforward! | ||
| using MatrixAlgebraKit: TruncatedAlgorithm | ||
| using LinearAlgebra | ||
|
|
||
|
|
@@ -538,7 +539,7 @@ for (f!, f) in ( | |
| (:svd_compact!, :svd_compact), | ||
| ) | ||
| @eval begin | ||
| @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f!), Any, Tuple{<:Any, <:Any, <:Any}, MatrixAlgebraKit.AbstractAlgorithm} | ||
| @is_primitive Mooncake.DefaultCtx Tuple{typeof($f!), Any, Tuple{<:Any, <:Any, <:Any}, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.rrule!!(::CoDual{typeof($f!)}, A_dA::CoDual, USVᴴ_dUSVᴴ::CoDual, alg_dalg::CoDual) | ||
| A, dA = arrayify(A_dA) | ||
| USVᴴ = Mooncake.primal(USVᴴ_dUSVᴴ) | ||
|
|
@@ -562,7 +563,18 @@ for (f!, f) in ( | |
| end | ||
| return USVᴴ_dUSVᴴ, svd_adjoint | ||
| end | ||
| @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof($f), Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.frule!!(::Dual{typeof($f!)}, A_dA::Dual, USVᴴ_dUSVᴴ::Dual, alg_dalg::Dual) | ||
| A, dA = arrayify(A_dA) | ||
| USVᴴ = Mooncake.primal(USVᴴ_dUSVᴴ) | ||
| dUSVᴴ = Mooncake.tangent(USVᴴ_dUSVᴴ) | ||
| U, dU = arrayify(USVᴴ[1], dUSVᴴ[1]) | ||
| S, dS = arrayify(USVᴴ[2], dUSVᴴ[2]) | ||
| Vᴴ, dVᴴ = arrayify(USVᴴ[3], dUSVᴴ[3]) | ||
| $f!(A, USVᴴ, Mooncake.primal(alg_dalg)) | ||
| svd_pushforward!(dA, A, (U, S, Vᴴ), (dU, dS, dVᴴ)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This again works because |
||
| return USVᴴ_dUSVᴴ | ||
| end | ||
| @is_primitive Mooncake.DefaultCtx Tuple{typeof($f), Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.rrule!!(::CoDual{typeof($f)}, A_dA::CoDual, alg_dalg::CoDual) | ||
| A, dA = arrayify(A_dA) | ||
| USVᴴ = $f(A, Mooncake.primal(alg_dalg)) | ||
|
|
@@ -585,10 +597,23 @@ for (f!, f) in ( | |
| end | ||
| return USVᴴ_codual, svd_adjoint | ||
| end | ||
| function Mooncake.frule!!(::Dual{typeof($f)}, A_dA::Dual, alg_dalg::Dual) | ||
| A, dA = arrayify(A_dA) | ||
| USVᴴ = $f(A, Mooncake.primal(alg_dalg)) | ||
| dUSVᴴ = Mooncake.zero_tangent(USVᴴ) | ||
| USVᴴ_dual = Dual(USVᴴ, dUSVᴴ) | ||
| U, S, Vᴴ = Mooncake.primal(USVᴴ_dual) | ||
| dU_, dS_, dVᴴ_ = Mooncake.tangent(USVᴴ_dual) | ||
| U, dU = arrayify(U, dU_) | ||
| S, dS = arrayify(S, dS_) | ||
| Vᴴ, dVᴴ = arrayify(Vᴴ, dVᴴ_) | ||
| svd_pushforward!(dA, A, (U, S, Vᴴ), (dU, dS, dVᴴ)) | ||
| return USVᴴ_dual | ||
| end | ||
| end | ||
| end | ||
|
|
||
| @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(svd_vals!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| @is_primitive Mooncake.DefaultCtx Tuple{typeof(svd_vals!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.rrule!!(::CoDual{typeof(svd_vals!)}, A_dA::CoDual, S_dS::CoDual, alg_dalg::CoDual) | ||
| # compute primal | ||
| A, dA = arrayify(A_dA) | ||
|
|
@@ -604,8 +629,17 @@ function Mooncake.rrule!!(::CoDual{typeof(svd_vals!)}, A_dA::CoDual, S_dS::CoDua | |
| end | ||
| return S_dS, svd_vals_adjoint | ||
| end | ||
| function Mooncake.frule!!(::Dual{typeof(svd_vals!)}, A_dA::Dual, S_dS::Dual, alg_dalg::Dual) | ||
| # compute primal | ||
| A, dA = arrayify(A_dA) | ||
| S, dS = arrayify(S_dS) | ||
| USVᴴ = svd_compact(A, Mooncake.primal(alg_dalg)) | ||
|
kshyatt marked this conversation as resolved.
Outdated
|
||
| copy!(S, diagview(USVᴴ[2])) | ||
| svd_vals_pushforward!(dA, A, USVᴴ, dS) | ||
| return S_dS | ||
| end | ||
|
|
||
| @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(svd_vals), Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| @is_primitive Mooncake.DefaultCtx Tuple{typeof(svd_vals), Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.rrule!!(::CoDual{typeof(svd_vals)}, A_dA::CoDual, alg_dalg::CoDual) | ||
| # compute primal | ||
| A, dA = arrayify(A_dA) | ||
|
|
@@ -624,6 +658,16 @@ function Mooncake.rrule!!(::CoDual{typeof(svd_vals)}, A_dA::CoDual, alg_dalg::Co | |
| end | ||
| return S_codual, svd_vals_adjoint | ||
| end | ||
| function Mooncake.frule!!(::Dual{typeof(svd_vals)}, A_dA::Dual, alg_dalg::Dual) | ||
| # compute primal | ||
| A, dA = arrayify(A_dA) | ||
| USVᴴ = svd_compact(A, Mooncake.primal(alg_dalg)) | ||
| S = diagview(USVᴴ[2]) | ||
| S_dual = Dual(S, Mooncake.zero_tangent(S)) | ||
| S_, dS = arrayify(S_dual) | ||
| svd_vals_pushforward!(dA, A, USVᴴ, dS) | ||
| return S_dual | ||
| end | ||
|
|
||
| @is_primitive Mooncake.DefaultCtx Mooncake.ReverseMode Tuple{typeof(svd_trunc!), Any, Any, MatrixAlgebraKit.AbstractAlgorithm} | ||
| function Mooncake.rrule!!(::CoDual{typeof(svd_trunc!)}, A_dA::CoDual, USVᴴ_dUSVᴴ::CoDual, alg_dalg::CoDual) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,77 @@ | ||||||||||||||||||||||||||
| function svd_pushforward!(ΔA, A, USVᴴ, ΔUSVᴴ, ind = Colon(); rank_atol = default_pullback_rank_atol(A), kwargs...) | ||||||||||||||||||||||||||
| U, Smat, Vᴴ = USVᴴ | ||||||||||||||||||||||||||
| m, n = size(U, 1), size(Vᴴ, 2) | ||||||||||||||||||||||||||
| (m, n) == size(ΔA) || throw(DimensionMismatch("size of ΔA ($(size(ΔA))) does not match size of U*S*Vᴴ ($m, $n)")) | ||||||||||||||||||||||||||
| minmn = min(m, n) | ||||||||||||||||||||||||||
| S = diagview(Smat) | ||||||||||||||||||||||||||
| ΔU, ΔS, ΔVᴴ = ΔUSVᴴ | ||||||||||||||||||||||||||
| r = svd_rank(S; rank_atol) | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| U₁ = view(U, :, 1:r) | ||||||||||||||||||||||||||
| S₁ = view(S, 1:r) | ||||||||||||||||||||||||||
| V₁ᴴ = view(Vᴴ, 1:r, :) | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| # compact region | ||||||||||||||||||||||||||
| V₁ = adjoint(V₁ᴴ) | ||||||||||||||||||||||||||
| ΔAV₁ = ΔA * V₁ | ||||||||||||||||||||||||||
| UᴴΔAV₁ = U₁' * ΔAV₁ | ||||||||||||||||||||||||||
| if !iszerotangent(ΔS) | ||||||||||||||||||||||||||
| ΔS₁ = view(diagview(ΔS), 1:r) | ||||||||||||||||||||||||||
| ΔS₁ .= real.(diagview(UᴴΔAV₁)) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| if !iszerotangent(ΔU) || !iszerotangent(ΔVᴴ) | ||||||||||||||||||||||||||
| hUᴴΔAV₁ = inv_safe.(transpose(S₁) .- S₁) .* project_hermitian(UᴴΔAV₁) | ||||||||||||||||||||||||||
|
kshyatt marked this conversation as resolved.
|
||||||||||||||||||||||||||
| aUᴴΔAV₁ = inv_safe.(transpose(S₁) .+ S₁) .* project_antihermitian(UᴴΔAV₁) | ||||||||||||||||||||||||||
|
Comment on lines
+23
to
+24
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think below only the sum and difference are actually used, we could use a kernel like MatrixAlgebraKit.jl/src/implementations/polar.jl Lines 209 to 220 in e271b4a
|
||||||||||||||||||||||||||
| if !iszerotangent(ΔU) | ||||||||||||||||||||||||||
| ΔU₁ = view(ΔU, :, 1:r) | ||||||||||||||||||||||||||
| K̇ = hUᴴΔAV₁ + aUᴴΔAV₁ | ||||||||||||||||||||||||||
| mul!(ΔU₁, U₁, K̇) | ||||||||||||||||||||||||||
| if m > r | ||||||||||||||||||||||||||
| ΔAV₁ = mul!(ΔAV₁, U₁, UᴴΔAV₁, -1, 1) | ||||||||||||||||||||||||||
| ΔU₁ .+= ΔAV₁ ./ transpose(S₁) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| if size(U, 2) > r # these columns of U are undetermined, but U' * U̇ should be antihermitian | ||||||||||||||||||||||||||
| U₂ = view(U, :, (r + 1):size(U, 2)) | ||||||||||||||||||||||||||
| ΔU₁ᴴU₂ = ΔU₁' * U₂ | ||||||||||||||||||||||||||
| ΔU₂ = view(ΔU, :, (r + 1):size(U, 2)) | ||||||||||||||||||||||||||
| mul!(ΔU₂, U₁, ΔU₁ᴴU₂, -1, 0) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| if !iszerotangent(ΔVᴴ) | ||||||||||||||||||||||||||
| ΔV₁ᴴ = view(ΔVᴴ, 1:r, :) | ||||||||||||||||||||||||||
| Ṁ = hUᴴΔAV₁ - aUᴴΔAV₁ | ||||||||||||||||||||||||||
| mul!(ΔV₁ᴴ, Ṁ', V₁ᴴ) | ||||||||||||||||||||||||||
| if n > r | ||||||||||||||||||||||||||
| UᴴΔA₁ = U₁' * ΔA | ||||||||||||||||||||||||||
| UᴴΔA₁ = mul!(UᴴΔA₁, UᴴΔAV₁, V₁ᴴ, -1, 1) | ||||||||||||||||||||||||||
| ΔV₁ᴴ .+= S₁ .\ UᴴΔA₁ | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| if size(Vᴴ, 1) > r # these rows of Vᴴ are undetermined, but V * V̇ should be antihermitian | ||||||||||||||||||||||||||
| V₂ᴴ = view(Vᴴ, (r + 1):size(Vᴴ, 1), :) | ||||||||||||||||||||||||||
| V₂ᴴΔV₁ = V₂ᴴ * ΔV₁ᴴ' | ||||||||||||||||||||||||||
| ΔV₂ᴴ = view(ΔVᴴ, (r + 1):size(Vᴴ, 1), :) | ||||||||||||||||||||||||||
| mul!(ΔV₂ᴴ, V₂ᴴΔV₁, V₁ᴴ, -1, 0) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| if eltype(U) <: Complex && !iszerotangent(ΔU) && !iszerotangent(ΔVᴴ) # fix gauge for `gaugefix!` compatibility | ||||||||||||||||||||||||||
| _, I = findmax(abs, U₁; dims = 1) | ||||||||||||||||||||||||||
| infinitesimal_phases = imag.(ΔU₁[I] ./ U₁[I]) | ||||||||||||||||||||||||||
| ΔU₁ .-= im .* U₁ .* infinitesimal_phases | ||||||||||||||||||||||||||
| ΔV₁ᴴ .+= im .* transpose(infinitesimal_phases) .* V₁ᴴ | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
| return (ΔU, ΔS, ΔVᴴ) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| function svd_trunc_pushforward!(ΔA, A, USVᴴ, ΔUSVᴴ, ind; rank_atol = default_pullback_rank_atol(A), kwargs...) | ||||||||||||||||||||||||||
| # TODO | ||||||||||||||||||||||||||
|
kshyatt marked this conversation as resolved.
Outdated
|
||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| function svd_vals_pushforward!( | ||||||||||||||||||||||||||
| ΔA, A, USVᴴ, ΔS, ind = Colon(); | ||||||||||||||||||||||||||
| rank_atol::Real = default_pullback_rank_atol(USVᴴ[2]), | ||||||||||||||||||||||||||
| degeneracy_atol::Real = default_pullback_rank_atol(USVᴴ[2]) | ||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||
| ΔUSVᴴ = (nothing, diagonal(ΔS), nothing) | ||||||||||||||||||||||||||
| return svd_pushforward!(ΔA, A, USVᴴ, ΔUSVᴴ, ind; rank_atol, degeneracy_atol) | ||||||||||||||||||||||||||
| end | ||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it necessary to make
USVᴴ.dval[2], sodSzero specifically? And then why notdUanddV. Is that something we can fix directly insvd_pushforward!?Also, does it make sense to still have a
!isa(USVᴴ, Const)test afterwards? Are you not already assuming that it is not constant if you try to accessUSVᴴ.dvalbefore?Finally, what happens in the case where
A.val::DiagonalandS = USVᴴ.val[2]might potentially shadowA.val. Will thenUSVᴴ.dval[2]also be identical toA.dval? In that case, thismake_zero!would really be problematic, as we eraseA.dval, which we still need.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is an artifact from the previous version of the push forward, let's remove it. That version also didn't test with Diagonal yet.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also the next
make_zero!(A.dval)after the pf runs is also problematic since it doesn't check for theA === Scase, I'll fix that too.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's tough to reason about all this because of the inconsistent "when is
Aalso anarg?" stuff we were talking about on Zulip 😢