@@ -896,6 +896,118 @@ end
896896 @test Array (b) ≈ sqrt .(Array (a))
897897end
898898
899+ @testset " 1D abs" begin
900+ function vabs_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
901+ pid = ct. bid (1 )
902+ tile = ct. load (a, pid, (16 ,))
903+ ct. store (b, pid, abs .(tile))
904+ return
905+ end
906+
907+ n = 1024
908+ a = CUDA. rand (Float32, n) .- 0.5f0 # Mix of positive and negative
909+ b = CUDA. zeros (Float32, n)
910+
911+ ct. launch (vabs_1d, cld (n, 16 ), a, b)
912+
913+ @test Array (b) ≈ abs .(Array (a)) rtol= 1e-5
914+ end
915+
916+ @testset " 1D cos" begin
917+ function vcos_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
918+ pid = ct. bid (1 )
919+ tile = ct. load (a, pid, (16 ,))
920+ ct. store (b, pid, cos .(tile))
921+ return
922+ end
923+
924+ n = 1024
925+ a = CUDA. rand (Float32, n) .* 6.28f0 # Range [0, 2π]
926+ b = CUDA. zeros (Float32, n)
927+
928+ ct. launch (vcos_1d, cld (n, 16 ), a, b)
929+
930+ @test Array (b) ≈ cos .(Array (a)) rtol= 1e-4
931+ end
932+
933+ @testset " 1D sin" begin
934+ function vsin_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
935+ pid = ct. bid (1 )
936+ tile = ct. load (a, pid, (16 ,))
937+ ct. store (b, pid, sin .(tile))
938+ return
939+ end
940+
941+ n = 1024
942+ a = CUDA. rand (Float32, n) .* 6.28f0
943+ b = CUDA. zeros (Float32, n)
944+
945+ ct. launch (vsin_1d, cld (n, 16 ), a, b)
946+
947+ @test Array (b) ≈ sin .(Array (a)) rtol= 1e-4
948+ end
949+
950+ @testset " 1D exp" begin
951+ function vexp_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
952+ pid = ct. bid (1 )
953+ tile = ct. load (a, pid, (16 ,))
954+ ct. store (b, pid, exp .(tile))
955+ return
956+ end
957+
958+ n = 1024
959+ a = CUDA. rand (Float32, n) .* 4.0f0 # Range [0, 4] to avoid overflow
960+ b = CUDA. zeros (Float32, n)
961+
962+ ct. launch (vexp_1d, cld (n, 16 ), a, b)
963+
964+ @test Array (b) ≈ exp .(Array (a)) rtol= 1e-4
965+ end
966+
967+ @testset " 1D log" begin
968+ function vlog_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
969+ pid = ct. bid (1 )
970+ tile = ct. load (a, pid, (16 ,))
971+ ct. store (b, pid, log .(tile))
972+ return
973+ end
974+
975+ n = 1024
976+ a = CUDA. rand (Float32, n) .+ 0.1f0 # Ensure positive
977+ b = CUDA. zeros (Float32, n)
978+
979+ ct. launch (vlog_1d, cld (n, 16 ), a, b)
980+
981+ @test Array (b) ≈ log .(Array (a)) rtol= 1e-4
982+ end
983+
984+ @testset " 1D ceil and floor" begin
985+ function vceil_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
986+ pid = ct. bid (1 )
987+ tile = ct. load (a, pid, (16 ,))
988+ ct. store (b, pid, ceil .(tile))
989+ return
990+ end
991+
992+ function vfloor_1d (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} )
993+ pid = ct. bid (1 )
994+ tile = ct. load (a, pid, (16 ,))
995+ ct. store (b, pid, floor .(tile))
996+ return
997+ end
998+
999+ n = 1024
1000+ a = CUDA. rand (Float32, n) .* 10.0f0 .- 5.0f0 # Range [-5, 5]
1001+ b_ceil = CUDA. zeros (Float32, n)
1002+ b_floor = CUDA. zeros (Float32, n)
1003+
1004+ ct. launch (vceil_1d, cld (n, 16 ), a, b_ceil)
1005+ ct. launch (vfloor_1d, cld (n, 16 ), a, b_floor)
1006+
1007+ @test Array (b_ceil) ≈ ceil .(Array (a))
1008+ @test Array (b_floor) ≈ floor .(Array (a))
1009+ end
1010+
8991011end
9001012
9011013@testset " reduction operations" begin
@@ -1307,94 +1419,139 @@ end
13071419 end
13081420end
13091421
1310- @testset " mismatched shapes with + throws MethodError" begin
1311- # Verify that + with different tile shapes throws MethodError (Julia-idiomatic)
1312- # Note: This tests the type system, not kernel execution
1313- tile_a = ct. Tile {Float32, (1, 128)} ()
1314- tile_b = ct. Tile {Float32, (64, 1)} ()
1422+ end
13151423
1316- # + should require same shapes, so this should fail
1317- @test_throws MethodError tile_a + tile_b
1424+ @testset " comparison operations" begin
13181425
1319- # But .+ should work (broadcasting)
1320- result = tile_a .+ tile_b
1321- @test result isa ct. Tile{Float32, (64 , 128 )}
1322- end
1426+ @testset " float .< and .>" begin
1427+ function cmp_lt_gt_kernel (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} ,
1428+ out_lt:: ct.TileArray{Float32,1} , out_gt:: ct.TileArray{Float32,1} )
1429+ pid = ct. bid (1 )
1430+ ta = ct. load (a, pid, (16 ,))
1431+ tb = ct. load (b, pid, (16 ,))
1432+ ct. store (out_lt, pid, ct. where (ta .< tb, 1.0f0 , 0.0f0 ))
1433+ ct. store (out_gt, pid, ct. where (ta .> tb, 1.0f0 , 0.0f0 ))
1434+ return
1435+ end
13231436
1437+ n = 1024
1438+ a = CUDA. rand (Float32, n)
1439+ b = CUDA. rand (Float32, n)
1440+ out_lt = CUDA. zeros (Float32, n)
1441+ out_gt = CUDA. zeros (Float32, n)
1442+
1443+ ct. launch (cmp_lt_gt_kernel, cld (n, 16 ), a, b, out_lt, out_gt)
1444+
1445+ @test Array (out_lt) ≈ Float32 .(Array (a) .< Array (b))
1446+ @test Array (out_gt) ≈ Float32 .(Array (a) .> Array (b))
13241447end
13251448
1326- @testset " comparison operations" begin
1449+ @testset " float .<= and .>=" begin
1450+ function cmp_le_ge_kernel (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} ,
1451+ out_le:: ct.TileArray{Float32,1} , out_ge:: ct.TileArray{Float32,1} )
1452+ pid = ct. bid (1 )
1453+ ta = ct. load (a, pid, (16 ,))
1454+ tb = ct. load (b, pid, (16 ,))
1455+ ct. store (out_le, pid, ct. where (ta .<= tb, 1.0f0 , 0.0f0 ))
1456+ ct. store (out_ge, pid, ct. where (ta .>= tb, 1.0f0 , 0.0f0 ))
1457+ return
1458+ end
1459+
1460+ n = 1024
1461+ a = CUDA. rand (Float32, n)
1462+ b = CUDA. rand (Float32, n)
1463+ out_le = CUDA. zeros (Float32, n)
1464+ out_ge = CUDA. zeros (Float32, n)
13271465
1328- @testset " float comparison operators" begin
1329- # Test all broadcast comparison operators with Float32 tiles
1330- tile = ct. Tile {Float32, (16,)} ()
1466+ ct. launch (cmp_le_ge_kernel, cld (n, 16 ), a, b, out_le, out_ge)
13311467
1332- @test (tile .< tile) isa ct. Tile{Bool, (16 ,)}
1333- @test (tile .> tile) isa ct. Tile{Bool, (16 ,)}
1334- @test (tile .<= tile) isa ct. Tile{Bool, (16 ,)}
1335- @test (tile .>= tile) isa ct. Tile{Bool, (16 ,)}
1336- @test (tile .== tile) isa ct. Tile{Bool, (16 ,)}
1337- @test (tile .!= tile) isa ct. Tile{Bool, (16 ,)}
1468+ @test Array (out_le) ≈ Float32 .(Array (a) .<= Array (b))
1469+ @test Array (out_ge) ≈ Float32 .(Array (a) .>= Array (b))
13381470end
13391471
1340- @testset " integer comparison operators" begin
1341- # Test all broadcast comparison operators with Int tiles
1342- int_tile = ct. arange ((16 ,), Int)
1472+ @testset " float .== and .!=" begin
1473+ function cmp_eq_ne_kernel (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} ,
1474+ out_eq:: ct.TileArray{Float32,1} , out_ne:: ct.TileArray{Float32,1} )
1475+ pid = ct. bid (1 )
1476+ ta = ct. load (a, pid, (16 ,))
1477+ tb = ct. load (b, pid, (16 ,))
1478+ ct. store (out_eq, pid, ct. where (ta .== tb, 1.0f0 , 0.0f0 ))
1479+ ct. store (out_ne, pid, ct. where (ta .!= tb, 1.0f0 , 0.0f0 ))
1480+ return
1481+ end
1482+
1483+ n = 1024
1484+ # Use integer-valued floats so equality is meaningful
1485+ a = CUDA. fill (Float32 (1 ), n)
1486+ b = CUDA. fill (Float32 (1 ), n)
1487+ # Set half to different values
1488+ CUDA. @allowscalar b[1 : 512 ] .= 2.0f0
1489+ out_eq = CUDA. zeros (Float32, n)
1490+ out_ne = CUDA. zeros (Float32, n)
13431491
1344- @test (int_tile .< int_tile) isa ct. Tile{Bool, (16 ,)}
1345- @test (int_tile .> int_tile) isa ct. Tile{Bool, (16 ,)}
1346- @test (int_tile .<= int_tile) isa ct. Tile{Bool, (16 ,)}
1347- @test (int_tile .>= int_tile) isa ct. Tile{Bool, (16 ,)}
1348- @test (int_tile .== int_tile) isa ct. Tile{Bool, (16 ,)}
1349- @test (int_tile .!= int_tile) isa ct. Tile{Bool, (16 ,)}
1492+ ct. launch (cmp_eq_ne_kernel, cld (n, 16 ), a, b, out_eq, out_ne)
1493+
1494+ @test Array (out_eq) ≈ Float32 .(Array (a) .== Array (b))
1495+ @test Array (out_ne) ≈ Float32 .(Array (a) .!= Array (b))
13501496end
13511497
13521498@testset " tile vs scalar comparison" begin
1353- int_tile = ct. arange ((16 ,), Int)
1354- float_tile = ct. Tile {Float32, (16,)} ()
1355-
1356- # Int tile vs Int scalar
1357- @test (int_tile .< 10 ) isa ct. Tile{Bool, (16 ,)}
1358- @test (5 .< int_tile) isa ct. Tile{Bool, (16 ,)}
1499+ function cmp_scalar_kernel (a:: ct.TileArray{Float32,1} ,
1500+ out:: ct.TileArray{Float32,1} )
1501+ pid = ct. bid (1 )
1502+ ta = ct. load (a, pid, (16 ,))
1503+ ct. store (out, pid, ct. where (ta .> 0.5f0 , 1.0f0 , 0.0f0 ))
1504+ return
1505+ end
13591506
1360- # Float32 tile vs Float32 scalar
1361- @test (float_tile .< 2.0f0 ) isa ct. Tile{Bool, (16 ,)}
1362- @test (1.0f0 .> float_tile) isa ct. Tile{Bool, (16 ,)}
1363- end
1507+ n = 1024
1508+ a = CUDA. rand (Float32, n)
1509+ out = CUDA. zeros (Float32, n)
13641510
1365- @testset " broadcast comparison shapes" begin
1366- tile_a = ct. Tile {Float32, (1, 16)} ()
1367- tile_b = ct. Tile {Float32, (8, 1)} ()
1511+ ct. launch (cmp_scalar_kernel, cld (n, 16 ), a, out)
13681512
1369- # (1, 16) .< (8, 1) -> (8, 16)
1370- result = tile_a .< tile_b
1371- @test result isa ct. Tile{Bool, (8 , 16 )}
1513+ @test Array (out) ≈ Float32 .(Array (a) .> 0.5f0 )
13721514end
13731515
13741516end
13751517
13761518@testset " power operations" begin
13771519
1378- @testset " float tile .^ float tile" begin
1379- tile = ct. Tile {Float32, (16,)} ()
1380- @test (tile .^ tile) isa ct. Tile{Float32, (16 ,)}
1381- end
1520+ @testset " tile .^ tile" begin
1521+ function pow_tt_kernel (a:: ct.TileArray{Float32,1} , b:: ct.TileArray{Float32,1} ,
1522+ c:: ct.TileArray{Float32,1} )
1523+ pid = ct. bid (1 )
1524+ ta = ct. load (a, pid, (16 ,))
1525+ tb = ct. load (b, pid, (16 ,))
1526+ ct. store (c, pid, ta .^ tb)
1527+ return
1528+ end
13821529
1383- @testset " float tile .^ scalar" begin
1384- tile = ct. Tile {Float32, (16,)} ()
1385- @test (tile .^ 2.0f0 ) isa ct. Tile{Float32, (16 ,)}
1386- @test (2.0f0 .^ tile) isa ct. Tile{Float32, (16 ,)}
1387- end
1530+ n = 1024
1531+ a = CUDA. rand (Float32, n) .+ 0.5f0 # Ensure positive base
1532+ b = CUDA. rand (Float32, n) .+ 0.5f0
1533+ c = CUDA. zeros (Float32, n)
13881534
1389- @testset " broadcast power shapes" begin
1390- tile_a = ct. Tile {Float32, (1, 16)} ()
1391- tile_b = ct. Tile {Float32, (8, 1)} ()
1392- @test (tile_a .^ tile_b) isa ct. Tile{Float32, (8 , 16 )}
1535+ ct. launch (pow_tt_kernel, cld (n, 16 ), a, b, c)
1536+
1537+ @test Array (c) ≈ Array (a) .^ Array (b) rtol= 1e-4
13931538end
13941539
1395- @testset " integer power not supported" begin
1396- int_tile = ct. arange ((16 ,), Int)
1397- @test_throws MethodError int_tile .^ int_tile
1540+ @testset " tile .^ scalar" begin
1541+ function pow_ts_kernel (a:: ct.TileArray{Float32,1} , c:: ct.TileArray{Float32,1} )
1542+ pid = ct. bid (1 )
1543+ ta = ct. load (a, pid, (16 ,))
1544+ ct. store (c, pid, ta .^ 2.0f0 )
1545+ return
1546+ end
1547+
1548+ n = 1024
1549+ a = CUDA. rand (Float32, n) .+ 0.1f0
1550+ c = CUDA. zeros (Float32, n)
1551+
1552+ ct. launch (pow_ts_kernel, cld (n, 16 ), a, c)
1553+
1554+ @test Array (c) ≈ Array (a) .^ 2.0f0 rtol= 1e-4
13981555end
13991556
14001557end
0 commit comments