Skip to content

Commit caeb2d3

Browse files
committed
Improve test coverage a bit.
1 parent 4ee897d commit caeb2d3

2 files changed

Lines changed: 302 additions & 62 deletions

File tree

test/execution.jl

Lines changed: 219 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,118 @@ end
896896
@test Array(b) sqrt.(Array(a))
897897
end
898898

899+
@testset "1D abs" begin
900+
function vabs_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
901+
pid = ct.bid(1)
902+
tile = ct.load(a, pid, (16,))
903+
ct.store(b, pid, abs.(tile))
904+
return
905+
end
906+
907+
n = 1024
908+
a = CUDA.rand(Float32, n) .- 0.5f0 # Mix of positive and negative
909+
b = CUDA.zeros(Float32, n)
910+
911+
ct.launch(vabs_1d, cld(n, 16), a, b)
912+
913+
@test Array(b) abs.(Array(a)) rtol=1e-5
914+
end
915+
916+
@testset "1D cos" begin
917+
function vcos_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
918+
pid = ct.bid(1)
919+
tile = ct.load(a, pid, (16,))
920+
ct.store(b, pid, cos.(tile))
921+
return
922+
end
923+
924+
n = 1024
925+
a = CUDA.rand(Float32, n) .* 6.28f0 # Range [0, 2π]
926+
b = CUDA.zeros(Float32, n)
927+
928+
ct.launch(vcos_1d, cld(n, 16), a, b)
929+
930+
@test Array(b) cos.(Array(a)) rtol=1e-4
931+
end
932+
933+
@testset "1D sin" begin
934+
function vsin_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
935+
pid = ct.bid(1)
936+
tile = ct.load(a, pid, (16,))
937+
ct.store(b, pid, sin.(tile))
938+
return
939+
end
940+
941+
n = 1024
942+
a = CUDA.rand(Float32, n) .* 6.28f0
943+
b = CUDA.zeros(Float32, n)
944+
945+
ct.launch(vsin_1d, cld(n, 16), a, b)
946+
947+
@test Array(b) sin.(Array(a)) rtol=1e-4
948+
end
949+
950+
@testset "1D exp" begin
951+
function vexp_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
952+
pid = ct.bid(1)
953+
tile = ct.load(a, pid, (16,))
954+
ct.store(b, pid, exp.(tile))
955+
return
956+
end
957+
958+
n = 1024
959+
a = CUDA.rand(Float32, n) .* 4.0f0 # Range [0, 4] to avoid overflow
960+
b = CUDA.zeros(Float32, n)
961+
962+
ct.launch(vexp_1d, cld(n, 16), a, b)
963+
964+
@test Array(b) exp.(Array(a)) rtol=1e-4
965+
end
966+
967+
@testset "1D log" begin
968+
function vlog_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
969+
pid = ct.bid(1)
970+
tile = ct.load(a, pid, (16,))
971+
ct.store(b, pid, log.(tile))
972+
return
973+
end
974+
975+
n = 1024
976+
a = CUDA.rand(Float32, n) .+ 0.1f0 # Ensure positive
977+
b = CUDA.zeros(Float32, n)
978+
979+
ct.launch(vlog_1d, cld(n, 16), a, b)
980+
981+
@test Array(b) log.(Array(a)) rtol=1e-4
982+
end
983+
984+
@testset "1D ceil and floor" begin
985+
function vceil_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
986+
pid = ct.bid(1)
987+
tile = ct.load(a, pid, (16,))
988+
ct.store(b, pid, ceil.(tile))
989+
return
990+
end
991+
992+
function vfloor_1d(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1})
993+
pid = ct.bid(1)
994+
tile = ct.load(a, pid, (16,))
995+
ct.store(b, pid, floor.(tile))
996+
return
997+
end
998+
999+
n = 1024
1000+
a = CUDA.rand(Float32, n) .* 10.0f0 .- 5.0f0 # Range [-5, 5]
1001+
b_ceil = CUDA.zeros(Float32, n)
1002+
b_floor = CUDA.zeros(Float32, n)
1003+
1004+
ct.launch(vceil_1d, cld(n, 16), a, b_ceil)
1005+
ct.launch(vfloor_1d, cld(n, 16), a, b_floor)
1006+
1007+
@test Array(b_ceil) ceil.(Array(a))
1008+
@test Array(b_floor) floor.(Array(a))
1009+
end
1010+
8991011
end
9001012

9011013
@testset "reduction operations" begin
@@ -1307,94 +1419,139 @@ end
13071419
end
13081420
end
13091421

1310-
@testset "mismatched shapes with + throws MethodError" begin
1311-
# Verify that + with different tile shapes throws MethodError (Julia-idiomatic)
1312-
# Note: This tests the type system, not kernel execution
1313-
tile_a = ct.Tile{Float32, (1, 128)}()
1314-
tile_b = ct.Tile{Float32, (64, 1)}()
1422+
end
13151423

1316-
# + should require same shapes, so this should fail
1317-
@test_throws MethodError tile_a + tile_b
1424+
@testset "comparison operations" begin
13181425

1319-
# But .+ should work (broadcasting)
1320-
result = tile_a .+ tile_b
1321-
@test result isa ct.Tile{Float32, (64, 128)}
1322-
end
1426+
@testset "float .< and .>" begin
1427+
function cmp_lt_gt_kernel(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1},
1428+
out_lt::ct.TileArray{Float32,1}, out_gt::ct.TileArray{Float32,1})
1429+
pid = ct.bid(1)
1430+
ta = ct.load(a, pid, (16,))
1431+
tb = ct.load(b, pid, (16,))
1432+
ct.store(out_lt, pid, ct.where(ta .< tb, 1.0f0, 0.0f0))
1433+
ct.store(out_gt, pid, ct.where(ta .> tb, 1.0f0, 0.0f0))
1434+
return
1435+
end
13231436

1437+
n = 1024
1438+
a = CUDA.rand(Float32, n)
1439+
b = CUDA.rand(Float32, n)
1440+
out_lt = CUDA.zeros(Float32, n)
1441+
out_gt = CUDA.zeros(Float32, n)
1442+
1443+
ct.launch(cmp_lt_gt_kernel, cld(n, 16), a, b, out_lt, out_gt)
1444+
1445+
@test Array(out_lt) Float32.(Array(a) .< Array(b))
1446+
@test Array(out_gt) Float32.(Array(a) .> Array(b))
13241447
end
13251448

1326-
@testset "comparison operations" begin
1449+
@testset "float .<= and .>=" begin
1450+
function cmp_le_ge_kernel(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1},
1451+
out_le::ct.TileArray{Float32,1}, out_ge::ct.TileArray{Float32,1})
1452+
pid = ct.bid(1)
1453+
ta = ct.load(a, pid, (16,))
1454+
tb = ct.load(b, pid, (16,))
1455+
ct.store(out_le, pid, ct.where(ta .<= tb, 1.0f0, 0.0f0))
1456+
ct.store(out_ge, pid, ct.where(ta .>= tb, 1.0f0, 0.0f0))
1457+
return
1458+
end
1459+
1460+
n = 1024
1461+
a = CUDA.rand(Float32, n)
1462+
b = CUDA.rand(Float32, n)
1463+
out_le = CUDA.zeros(Float32, n)
1464+
out_ge = CUDA.zeros(Float32, n)
13271465

1328-
@testset "float comparison operators" begin
1329-
# Test all broadcast comparison operators with Float32 tiles
1330-
tile = ct.Tile{Float32, (16,)}()
1466+
ct.launch(cmp_le_ge_kernel, cld(n, 16), a, b, out_le, out_ge)
13311467

1332-
@test (tile .< tile) isa ct.Tile{Bool, (16,)}
1333-
@test (tile .> tile) isa ct.Tile{Bool, (16,)}
1334-
@test (tile .<= tile) isa ct.Tile{Bool, (16,)}
1335-
@test (tile .>= tile) isa ct.Tile{Bool, (16,)}
1336-
@test (tile .== tile) isa ct.Tile{Bool, (16,)}
1337-
@test (tile .!= tile) isa ct.Tile{Bool, (16,)}
1468+
@test Array(out_le) Float32.(Array(a) .<= Array(b))
1469+
@test Array(out_ge) Float32.(Array(a) .>= Array(b))
13381470
end
13391471

1340-
@testset "integer comparison operators" begin
1341-
# Test all broadcast comparison operators with Int tiles
1342-
int_tile = ct.arange((16,), Int)
1472+
@testset "float .== and .!=" begin
1473+
function cmp_eq_ne_kernel(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1},
1474+
out_eq::ct.TileArray{Float32,1}, out_ne::ct.TileArray{Float32,1})
1475+
pid = ct.bid(1)
1476+
ta = ct.load(a, pid, (16,))
1477+
tb = ct.load(b, pid, (16,))
1478+
ct.store(out_eq, pid, ct.where(ta .== tb, 1.0f0, 0.0f0))
1479+
ct.store(out_ne, pid, ct.where(ta .!= tb, 1.0f0, 0.0f0))
1480+
return
1481+
end
1482+
1483+
n = 1024
1484+
# Use integer-valued floats so equality is meaningful
1485+
a = CUDA.fill(Float32(1), n)
1486+
b = CUDA.fill(Float32(1), n)
1487+
# Set half to different values
1488+
CUDA.@allowscalar b[1:512] .= 2.0f0
1489+
out_eq = CUDA.zeros(Float32, n)
1490+
out_ne = CUDA.zeros(Float32, n)
13431491

1344-
@test (int_tile .< int_tile) isa ct.Tile{Bool, (16,)}
1345-
@test (int_tile .> int_tile) isa ct.Tile{Bool, (16,)}
1346-
@test (int_tile .<= int_tile) isa ct.Tile{Bool, (16,)}
1347-
@test (int_tile .>= int_tile) isa ct.Tile{Bool, (16,)}
1348-
@test (int_tile .== int_tile) isa ct.Tile{Bool, (16,)}
1349-
@test (int_tile .!= int_tile) isa ct.Tile{Bool, (16,)}
1492+
ct.launch(cmp_eq_ne_kernel, cld(n, 16), a, b, out_eq, out_ne)
1493+
1494+
@test Array(out_eq) Float32.(Array(a) .== Array(b))
1495+
@test Array(out_ne) Float32.(Array(a) .!= Array(b))
13501496
end
13511497

13521498
@testset "tile vs scalar comparison" begin
1353-
int_tile = ct.arange((16,), Int)
1354-
float_tile = ct.Tile{Float32, (16,)}()
1355-
1356-
# Int tile vs Int scalar
1357-
@test (int_tile .< 10) isa ct.Tile{Bool, (16,)}
1358-
@test (5 .< int_tile) isa ct.Tile{Bool, (16,)}
1499+
function cmp_scalar_kernel(a::ct.TileArray{Float32,1},
1500+
out::ct.TileArray{Float32,1})
1501+
pid = ct.bid(1)
1502+
ta = ct.load(a, pid, (16,))
1503+
ct.store(out, pid, ct.where(ta .> 0.5f0, 1.0f0, 0.0f0))
1504+
return
1505+
end
13591506

1360-
# Float32 tile vs Float32 scalar
1361-
@test (float_tile .< 2.0f0) isa ct.Tile{Bool, (16,)}
1362-
@test (1.0f0 .> float_tile) isa ct.Tile{Bool, (16,)}
1363-
end
1507+
n = 1024
1508+
a = CUDA.rand(Float32, n)
1509+
out = CUDA.zeros(Float32, n)
13641510

1365-
@testset "broadcast comparison shapes" begin
1366-
tile_a = ct.Tile{Float32, (1, 16)}()
1367-
tile_b = ct.Tile{Float32, (8, 1)}()
1511+
ct.launch(cmp_scalar_kernel, cld(n, 16), a, out)
13681512

1369-
# (1, 16) .< (8, 1) -> (8, 16)
1370-
result = tile_a .< tile_b
1371-
@test result isa ct.Tile{Bool, (8, 16)}
1513+
@test Array(out) Float32.(Array(a) .> 0.5f0)
13721514
end
13731515

13741516
end
13751517

13761518
@testset "power operations" begin
13771519

1378-
@testset "float tile .^ float tile" begin
1379-
tile = ct.Tile{Float32, (16,)}()
1380-
@test (tile .^ tile) isa ct.Tile{Float32, (16,)}
1381-
end
1520+
@testset "tile .^ tile" begin
1521+
function pow_tt_kernel(a::ct.TileArray{Float32,1}, b::ct.TileArray{Float32,1},
1522+
c::ct.TileArray{Float32,1})
1523+
pid = ct.bid(1)
1524+
ta = ct.load(a, pid, (16,))
1525+
tb = ct.load(b, pid, (16,))
1526+
ct.store(c, pid, ta .^ tb)
1527+
return
1528+
end
13821529

1383-
@testset "float tile .^ scalar" begin
1384-
tile = ct.Tile{Float32, (16,)}()
1385-
@test (tile .^ 2.0f0) isa ct.Tile{Float32, (16,)}
1386-
@test (2.0f0 .^ tile) isa ct.Tile{Float32, (16,)}
1387-
end
1530+
n = 1024
1531+
a = CUDA.rand(Float32, n) .+ 0.5f0 # Ensure positive base
1532+
b = CUDA.rand(Float32, n) .+ 0.5f0
1533+
c = CUDA.zeros(Float32, n)
13881534

1389-
@testset "broadcast power shapes" begin
1390-
tile_a = ct.Tile{Float32, (1, 16)}()
1391-
tile_b = ct.Tile{Float32, (8, 1)}()
1392-
@test (tile_a .^ tile_b) isa ct.Tile{Float32, (8, 16)}
1535+
ct.launch(pow_tt_kernel, cld(n, 16), a, b, c)
1536+
1537+
@test Array(c) Array(a) .^ Array(b) rtol=1e-4
13931538
end
13941539

1395-
@testset "integer power not supported" begin
1396-
int_tile = ct.arange((16,), Int)
1397-
@test_throws MethodError int_tile .^ int_tile
1540+
@testset "tile .^ scalar" begin
1541+
function pow_ts_kernel(a::ct.TileArray{Float32,1}, c::ct.TileArray{Float32,1})
1542+
pid = ct.bid(1)
1543+
ta = ct.load(a, pid, (16,))
1544+
ct.store(c, pid, ta .^ 2.0f0)
1545+
return
1546+
end
1547+
1548+
n = 1024
1549+
a = CUDA.rand(Float32, n) .+ 0.1f0
1550+
c = CUDA.zeros(Float32, n)
1551+
1552+
ct.launch(pow_ts_kernel, cld(n, 16), a, c)
1553+
1554+
@test Array(c) Array(a) .^ 2.0f0 rtol=1e-4
13981555
end
13991556

14001557
end

0 commit comments

Comments
 (0)