|
418 | 418 | end |
419 | 419 | end |
420 | 420 |
|
| 421 | +@testset "strided" begin |
| 422 | + @testset "PermutedDimsArray" begin |
| 423 | + function copy_kernel_2d( |
| 424 | + src::ct.TileArray{Float32, 2}, dst::ct.TileArray{Float32, 2}, |
| 425 | + tile_x::ct.Constant{Int}, tile_y::ct.Constant{Int} |
| 426 | + ) |
| 427 | + bid_x = ct.bid(1) |
| 428 | + bid_y = ct.bid(2) |
| 429 | + tile = ct.load(src, (bid_x, bid_y), (tile_x[], tile_y[])) |
| 430 | + ct.store(dst, (bid_x, bid_y), tile) |
| 431 | + return |
| 432 | + end |
| 433 | + |
| 434 | + m, n = 64, 32 |
| 435 | + tm, tn = 16, 16 |
| 436 | + A = CuArray(Float32.(reshape(1:n*m, n, m))) |
| 437 | + P = PermutedDimsArray(A, (2, 1)) |
| 438 | + out = CUDA.zeros(Float32, m, n) |
| 439 | + |
| 440 | + grid = (cld(m, tm), cld(n, tn)) |
| 441 | + ct.launch(copy_kernel_2d, grid, P, out, ct.Constant(tm), ct.Constant(tn)) |
| 442 | + |
| 443 | + @test out == permutedims(A, (2, 1)) |
| 444 | + end |
| 445 | +end |
| 446 | + |
421 | 447 | @testset "extract" begin |
422 | 448 | @testset "extract identity (0,0) full shape" begin |
423 | 449 | function extract_identity_kernel(x::ct.TileArray{Float32,2}, y::ct.TileArray{Float32,2}) |
|
0 commit comments