More tests for CUDA + MPS

kshyatt · kshyatt · commit b657bb4fdfe8 · 2026-03-26T08:48:37.000-04:00
diff --git a/ext/MPSKitAdaptExt.jl b/ext/MPSKitAdaptExt.jl
@@ -35,5 +35,7 @@ end
     MPSKit.JordanMPOTensor(space(W), adapt(to, W.A), adapt(to, W.B), adapt(to, W.C), adapt(to, W.D))
 @inline Adapt.adapt_structure(to, mpo::MPOHamiltonian) =
     MPOHamiltonian(map(x -> adapt(to, x), mpo.W))
+@inline Adapt.adapt_structure(to, ml::MPSKit.Multiline) = MPSKit.Multiline(map(adapt(to), ml.data))
+@inline Adapt.adapt_structure(to, pa::MPSKit.PeriodicArray) = MPSKit.PeriodicArray(map(adapt(to), pa.data))
 
 end
diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl
@@ -5,12 +5,15 @@ using MPSKit: GeometryStyle, FiniteChainStyle, InfiniteChainStyle, OperatorStyle
 using TensorKit
 using MatrixAlgebraKit
 using TensorKit: ℙ, tensormaptype, TensorMapWithStorage
-using Adapt, CUDA, cuTENSOR
+using Adapt, CUDA, cuTENSOR, CUDA.CUBLAS
 
 # TODO revisit this once https://github.com/QuantumKitHub/MatrixAlgebraKit.jl/issues/176
 # is resolved
 MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration()
 
+pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1))
+vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1))
+
 @testset "CuFiniteMPO" for V in (ℂ^2, U1Space(0 => 1, 1 => 1))
     # start from random operators
     L = 4
@@ -87,3 +90,155 @@ MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration()
 
     @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁)
 end
+
+@testset "Finite CuMPOHamiltonian" begin
+    L = 3
+    T = ComplexF64
+    for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1))
+        lattice = fill(V, L)
+        O₁ = randn(T, V, V)
+        O₁ += O₁'
+        E = id(storagetype(O₁), domain(O₁))
+        O₂ = randn(T, V^2 ← V^2)
+        O₂ += O₂'
+
+        H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L))
+        H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1)))
+        H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂))
+        @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
+        @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory}
+        @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory}
+
+        @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T
+        #=if !(T <: Complex)
+            for H in (H1, H2, H3)
+                Hc = @constinferred complex(H)
+                @test scalartype(Hc) == complex(T)
+                @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory}
+            end
+        end=# # TODO
+
+        # check if constructor works by converting back to tensormap
+        #= H1_tm = convert(TensorMap, H1)
+        operators = vcat(fill(E, L - 1), O₁)
+        @test H1_tm ≈ mapreduce(+, 1:L) do i
+            return reduce(⊗, circshift(operators, i))
+        end
+        operators = vcat(fill(E, L - 2), O₂)
+        @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i
+            return reduce(⊗, circshift(operators, i))
+        end
+        @test convert(TensorMap, H3) ≈
+            O₁ ⊗ E ⊗ E + E ⊗ O₂ + permute(O₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) =# # needs a fix in BlockTensorKit
+
+        # check if adding terms on the same site works
+        single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2))
+        H4 = adapt(CuArray, FiniteMPOHamiltonian(lattice, single_terms))
+        @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+        @test H4 ≈ H1 atol = 1.0e-6
+        double_terms = Iterators.flatten(
+            Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2)
+        )
+        H5 = adapt(CuArray, FiniteMPOHamiltonian(lattice, double_terms))
+        @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory}
+        @test H5 ≈ H2 atol = 1.0e-6
+
+        # test linear algebra
+        @test H1 ≈
+            adapt(CuArray, FiniteMPOHamiltonian(lattice, 1 => O₁)) +
+            adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) +
+            adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁))
+        @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
+        #@test 0.8 * H1 + 0.2 * H1 ≈ H1 atol = 1.0e-6 # broken due to JordanMPOTensorMap
+        #=@test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6
+        H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0)))
+        @test H1_trunc ≈ H1
+        @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1))=# # needs fix in BlockTensorKit
+
+        # test dot and application
+        state = rand(T, prod(lattice))
+        mps = adapt(CuArray, FiniteMPS(state))
+
+        #=@test convert(TensorMap, H1 * mps) ≈ H1_tm * state # needs fix in BlockTensorKit
+        @test H1 * state ≈ H1_tm * state
+        @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps)=#
+
+        # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys
+        grid = square = fill(V, 3, 3)
+
+        local_operators = Dict((I,) => O₁ for I in eachindex(grid))
+        I_vertical = CartesianIndex(1, 0)
+        vertical_operators = Dict(
+            (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1)
+        )
+        I_horizontal = CartesianIndex(0, 1)
+        horizontal_operators = Dict(
+            (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1)
+        )
+        operators = merge(local_operators, vertical_operators, horizontal_operators)
+        H4 = adapt(CuArray, FiniteMPOHamiltonian(grid, operators))
+        @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+
+        @test H4 ≈
+            adapt(CuArray, FiniteMPOHamiltonian(grid, local_operators)) +
+            adapt(CuArray, FiniteMPOHamiltonian(grid, vertical_operators)) +
+            adapt(CuArray, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4
+        @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
+
+        #H4′= H4 / 3 + 2H4 / 3
+        #@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory}
+        #H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12)))
+        #@test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} # more problems with arithmetic operations...
+        #psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V)))
+        #@test expectation_value(psi, H4) ≈ expectation_value(psi, H5)
+    end
+end
+
+@testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces)
+    # generate a 1-2-3 body interaction
+    operators = ntuple(3) do i
+        O = rand(ComplexF64, pspace^i, pspace^i)
+        return O += O'
+    end
+
+    H1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[1]))
+    H2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[2]))
+    H3 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, repeat(InfiniteMPOHamiltonian(operators[3]), 2))
+
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H2) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(H3) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    # make a teststate to measure expectation values for
+    ψ1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace], [Dspace]))
+    ψ2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace, pspace], [Dspace, Dspace]))
+    @test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    @test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory}
+
+    #=
+    e1 = expectation_value(ψ1, H1)
+    e2 = expectation_value(ψ1, H2)
+    =# # broken due to BraidingTensor
+
+    # H1 = 2 * H1 - CuArray([1]) # scalar indexing
+    # @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    # @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor
+
+    H1 = H1 + H2
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+
+    # @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor
+
+    H1 = repeat(H1, 2)
+    @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
+
+    #=e1 = expectation_value(ψ2, H1)
+    e3 = expectation_value(ψ2, H3)
+
+    @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10=# # broken due to BraidingTensor
+
+    #H4 = H1 + H3 # broken due to BraidingTensor
+    #@test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    #h4 = H4 * H4
+    #@test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory}
+    #@test real(expectation_value(ψ2, H4)) >= 0 # broken due to BraidingTensor
+end
diff --git a/test/cuda/states.jl b/test/cuda/states.jl
@@ -50,3 +50,43 @@ using Adapt, CUDA, cuTENSOR
 
     @test norm(2 * ψ + ψ - 3 * ψ) ≈ 0.0 atol = sqrt(eps(real(ComplexF64)))
 end
+
+@testset "CuMultilineMPS ($(sectortype(D)), $elt)" for (D, d, elt) in
+    [(ℙ^10, ℙ^2, ComplexF64), (Rep[U₁](1 => 3), Rep[U₁](0 => 1), ComplexF32)]
+    tol = Float64(eps(real(elt)) * 100)
+    ψ = adapt(
+        CuVector{elt, CUDA.DeviceMemory}, MultilineMPS(
+            [
+                rand(elt, D * d, D) rand(elt, D * d, D)
+                rand(elt, D * d, D) rand(elt, D * d, D)
+            ]; tol
+        )
+    )
+
+    @test GeometryStyle(typeof(ψ)) == InfiniteChainStyle()
+    @test GeometryStyle(ψ) == InfiniteChainStyle()
+    @test TensorKit.storagetype(ψ) == CuVector{elt, CUDA.DeviceMemory}
+    @test TensorKit.sectortype(ψ) == sectortype(D)
+
+    @test !isfinite(typeof(ψ))
+
+    @test physicalspace(ψ) == fill(d, 2, 2)
+    @test all(x -> x ≾ D, left_virtualspace(ψ))
+    @test all(x -> x ≾ D, right_virtualspace(ψ))
+
+    for i in 1:size(ψ, 1), j in 1:size(ψ, 2)
+        @plansor difference[-1 -2; -3] := ψ.AL[i, j][-1 -2; 1] * ψ.C[i, j][1; -3] -
+            ψ.C[i, j - 1][-1; 1] * ψ.AR[i, j][1 -2; -3]
+        @test norm(difference, Inf) < tol * 10
+
+        @test l_LL(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) ≈ l_LL(ψ, i, j + 1)
+        @test l_LR(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) ≈ l_LR(ψ, i, j + 1)
+        @test l_RL(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) ≈ l_RL(ψ, i, j + 1)
+        @test l_RR(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) ≈ l_RR(ψ, i, j + 1)
+
+        @test TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) * r_LL(ψ, i, j) ≈ r_LL(ψ, i, j + 1)
+        @test TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) * r_LR(ψ, i, j) ≈ r_LR(ψ, i, j + 1)
+        @test TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) * r_RL(ψ, i, j) ≈ r_RL(ψ, i, j + 1)
+        @test TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) * r_RR(ψ, i, j) ≈ r_RR(ψ, i, j + 1)
+    end
+end