Skip to content

Commit 9dc66ea

Browse files
committed
Split test/device/random.jl into multiple files
1 parent bf6489b commit 9dc66ea

3 files changed

Lines changed: 372 additions & 195 deletions

File tree

Lines changed: 0 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -223,201 +223,6 @@ end
223223
end
224224

225225

226-
# Distribution config used to share the randn/randexp tests below. `edges`
227-
# are 3 quartile-or-equivalent CDF cuts; `probs` are the matching per-bin
228-
# probabilities; `mean` is the population mean; `in_range` is the per-element
229-
# validity predicate (e.g. exponential support is `[0, ∞)`).
230-
const RAND_DISTS = (
231-
(label = "randn",
232-
f = Random.randn,
233-
f! = Random.randn!,
234-
edges = (-1.0, 0.0, 1.0),
235-
probs = (0.1587, 0.3413, 0.3413, 0.1587),
236-
mean = 0.0,
237-
in_range = isfinite),
238-
239-
(label = "randexp",
240-
f = Random.randexp,
241-
f! = Random.randexp!,
242-
edges = (-log(0.75), -log(0.5), -log(0.25)),
243-
probs = (0.25, 0.25, 0.25, 0.25),
244-
mean = 1.0,
245-
in_range = >=(0.0)),
246-
)
247-
248-
function bin_counts(v, edges)
249-
counts = zeros(Int, length(edges) + 1)
250-
for x in v
251-
idx = something(findfirst(>=(Float64(x)), edges), length(edges) + 1)
252-
counts[idx] += 1
253-
end
254-
counts
255-
end
256-
257-
# Per-bin tolerance: 10% of N. Smallest bin under either CDF (p≈0.16 for
258-
# normal, p=0.25 for exp) clears 5σ at N≥1024, so the check is flake-free
259-
# across the per-launch-randomized default-stream seed.
260-
function check_dist(v, dist; N = length(v), tol_factor = 0.10)
261-
counts = bin_counts(v, dist.edges)
262-
for (i, p) in enumerate(dist.probs)
263-
@test abs(counts[i] - N * p) < N * tol_factor
264-
end
265-
end
266-
267-
# Mean tolerance: SE/√N at N=1024 is ≤ 0.031; 5σ ≈ 0.16. Narrow floats lose
268-
# precision in the BM/log path, so widen for those.
269-
mean_tol(::Type{T}) where {T} =
270-
T <: Union{Float16, ct.BFloat16} ? 0.20 : 0.15
271-
272-
@testset "device $(dist.label)" for dist in RAND_DISTS
273-
f, in_range = dist.f, dist.in_range
274-
275-
@testset "typed surfaces (T=$T, dims=$dims)" for (T, dims) in
276-
((Float32, (16,)), (Float32, (32,)), (Float32, (64,)),
277-
(Float64, (16,)), (Float16, (16,)), (ct.BFloat16, (16,)))
278-
# `o1`: scalar form. `o2`: tile form via the default stream.
279-
# `o3`: tile form via an explicit `DeviceRNG` (different stream).
280-
function k(o1, o2, o3, ::Type{T_}, dims_::NTuple{N, Int}) where {T_, N}
281-
pid = ct.bid(1)
282-
rng = ct.DeviceRNG(); Random.seed!(rng, 1)
283-
o1[pid] = f(T_)
284-
ct.store(o2, pid, f(T_, dims_))
285-
ct.store(o3, pid, f(rng, T_, prod(dims_)))
286-
return
287-
end
288-
289-
n_blocks = 64
290-
m = prod(dims)
291-
o1 = CUDA.zeros(T, n_blocks)
292-
o2 = CUDA.zeros(T, n_blocks * m)
293-
o3 = CUDA.zeros(T, n_blocks * m)
294-
@cuda backend=cuTile blocks=n_blocks k(o1, o2, o3, T, ct.Constant(dims))
295-
296-
for v in (Array(o1), Array(o2), Array(o3))
297-
@test eltype(v) === T
298-
@test all(isfinite, v)
299-
@test all(in_range, v)
300-
end
301-
302-
# Distribution shape + mean on the larger draws (o1's N=64 is too
303-
# small for shape testing).
304-
for v in (Array(o2), Array(o3))
305-
check_dist(v, dist)
306-
@test abs(sum(Float64, v) / length(v) - dist.mean) < mean_tol(T)
307-
end
308-
end
309-
310-
@testset "untyped surface defaults to Float32" begin
311-
function k(o::ct.TileArray{Float32, 1})
312-
pid = ct.bid(1)
313-
ct.store(o, pid, ct.reshape(f(4, 4), (16,)))
314-
return
315-
end
316-
n_blocks = 64
317-
o = CUDA.zeros(Float32, n_blocks * 16)
318-
@cuda backend=cuTile blocks=n_blocks k(o)
319-
v = Array(o)
320-
@test eltype(v) === Float32
321-
@test all(in_range, v)
322-
check_dist(v, dist)
323-
end
324-
325-
@testset "in-kernel `Random.seed!` matches host RNG output" begin
326-
# Single-block draw with an in-kernel `Random.seed!(default_rng(),s)`
327-
# plumbs the same `(seed, counter)` as `cuTile.RNG(s, 0)`, so the
328-
# outputs must be byte-identical.
329-
function k(out::ct.TileArray{Float32, 1})
330-
Random.seed!(Random.default_rng(), UInt32(42))
331-
pid = ct.bid(1)
332-
ct.store(out, pid, f(Float32, (512,)))
333-
return
334-
end
335-
out = CUDA.zeros(Float32, 512); @cuda backend=cuTile k(out)
336-
@test Array(out) == Array(f(ct.RNG(UInt32(42), UInt32(0)), Float32, 512))
337-
end
338-
end
339-
340-
341-
@testset "host $(dist.label)" for dist in RAND_DISTS
342-
f, f!, in_range = dist.f, dist.f!, dist.in_range
343-
N = 4096
344-
345-
@testset "$(dist.label)! basics + counter advance" begin
346-
rng = ct.RNG(42)
347-
A = CUDA.zeros(Float32, N)
348-
f!(rng, A)
349-
v = Array(A)
350-
@test all(isfinite, v)
351-
@test all(in_range, v)
352-
@test rng.counter == UInt32(N)
353-
@test abs(sum(v) / N - dist.mean) < 0.1
354-
end
355-
356-
@testset "determinism + Philox re-keying" begin
357-
# Same seed → byte-identical output.
358-
@test Array(f(ct.RNG(123), Float32, N)) == Array(f(ct.RNG(123), Float32, N))
359-
360-
# Different seeds → uncorrelated streams. Set-disjoint isn't safe
361-
# for either distribution at this N due to Float32 birthday flake
362-
# (especially randexp, whose output concentrates near 0); use
363-
# element-wise equality instead — uncorrelated streams collide in
364-
# ≤ N²/2^24 ≈ 1 position.
365-
c = Array(f(ct.RNG(42), Float32, N))
366-
d = Array(f(ct.RNG(100), Float32, N))
367-
@test sum(c .== d) < N ÷ 100
368-
end
369-
370-
@testset "consecutive disjoint; seed! resets counter" begin
371-
# Two back-to-back draws on the same RNG use disjoint counter
372-
# ranges, so the underlying Philox outputs (and post-transform
373-
# samples) are disjoint up to Float32 birthday flake.
374-
rng = ct.RNG(7)
375-
a = Array(f(rng, Float32, N))
376-
b = Array(f(rng, Float32, N))
377-
@test sum(a .== b) < N ÷ 100
378-
379-
Random.seed!(rng, 7)
380-
@test rng.counter == UInt32(0)
381-
end
382-
383-
@testset "T-coverage" for T in (Float16, ct.BFloat16, Float32, Float64)
384-
v = Array(f(ct.RNG(13), T, N))
385-
@test eltype(v) === T
386-
@test all(in_range, v)
387-
@test all(isfinite, v)
388-
@test abs(sum(Float64, v) / N - dist.mean) < mean_tol(T)
389-
check_dist(v, dist)
390-
end
391-
392-
@testset "ct.$(dist.label) / ct.$(dist.label)! aliases default to Float32" begin
393-
ct_f, ct_f! = dist.label == "randn" ? (ct.randn, ct.randn!) :
394-
(ct.randexp, ct.randexp!)
395-
@test eltype(ct_f(N)) === Float32
396-
@test eltype(ct_f(Float32, N)) === Float32
397-
B = CUDA.zeros(Float32, N); ct_f!(B)
398-
@test all(in_range, Array(B))
399-
end
400-
401-
@testset "arbitrary length (partial last tile)" begin
402-
# `store_partition_view` clips OOB writes, so `length(A)` can be
403-
# any value. The host advances by `n_blocks * RAND_FILL_TILE`,
404-
# not `length(A)`, so consecutive partial-length calls remain
405-
# disjoint up to Float32 birthday flake.
406-
rng = ct.RNG(0)
407-
A = CUDA.zeros(Float32, 513)
408-
f!(rng, A)
409-
v = Array(A)
410-
@test all(isfinite, v)
411-
@test rng.counter == UInt32(2 * cuTile.RAND_FILL_TILE)
412-
413-
rng2 = ct.RNG(0)
414-
a = Array(f(rng2, Float32, 100))
415-
b = Array(f(rng2, Float32, 100))
416-
@test sum(a .== b) < 100 ÷ 10
417-
end
418-
end
419-
420-
421226
@testset "host rand" begin
422227
N = 2048
423228

test/device/random_randexp.jl

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
using CUDA
2+
using Random
3+
4+
# Distribution config shared by the device + host randexp testsets. `edges`
5+
# are 3 quartile-or-equivalent CDF cuts; `probs` are the matching per-bin
6+
# probabilities; `mean` is the population mean; `in_range` is the per-element
7+
# validity predicate (exponential support is `[0, ∞)`).
8+
const RANDEXP_DIST = (label = "randexp",
9+
f = Random.randexp,
10+
f! = Random.randexp!,
11+
edges = (-log(0.75), -log(0.5), -log(0.25)),
12+
probs = (0.25, 0.25, 0.25, 0.25),
13+
mean = 1.0,
14+
in_range = >=(0.0))
15+
16+
function bin_counts(v, edges)
17+
counts = zeros(Int, length(edges) + 1)
18+
for x in v
19+
idx = something(findfirst(>=(Float64(x)), edges), length(edges) + 1)
20+
counts[idx] += 1
21+
end
22+
counts
23+
end
24+
25+
# Per-bin tolerance: 10% of N. Smallest bin under either CDF (p≈0.16 for
26+
# normal, p=0.25 for exp) clears 5σ at N≥1024, so the check is flake-free
27+
# across the per-launch-randomized default-stream seed.
28+
function check_dist(v, dist; N = length(v), tol_factor = 0.10)
29+
counts = bin_counts(v, dist.edges)
30+
for (i, p) in enumerate(dist.probs)
31+
@test abs(counts[i] - N * p) < N * tol_factor
32+
end
33+
end
34+
35+
# Mean tolerance: SE/√N at N=1024 is ≤ 0.031; 5σ ≈ 0.16. Narrow floats lose
36+
# precision in the BM/log path, so widen for those.
37+
mean_tol(::Type{T}) where {T} =
38+
T <: Union{Float16, ct.BFloat16} ? 0.20 : 0.15
39+
40+
@testset "device randexp" begin
41+
dist = RANDEXP_DIST
42+
f, in_range = dist.f, dist.in_range
43+
44+
@testset "typed surfaces (T=$T, dims=$dims)" for (T, dims) in
45+
((Float32, (16,)), (Float32, (32,)), (Float32, (64,)),
46+
(Float64, (16,)), (Float16, (16,)), (ct.BFloat16, (16,)))
47+
# `o1`: scalar form. `o2`: tile form via the default stream.
48+
# `o3`: tile form via an explicit `DeviceRNG` (different stream).
49+
function k(o1, o2, o3, ::Type{T_}, dims_::NTuple{N, Int}) where {T_, N}
50+
pid = ct.bid(1)
51+
rng = ct.DeviceRNG(); Random.seed!(rng, 1)
52+
o1[pid] = f(T_)
53+
ct.store(o2, pid, f(T_, dims_))
54+
ct.store(o3, pid, f(rng, T_, prod(dims_)))
55+
return
56+
end
57+
58+
n_blocks = 64
59+
m = prod(dims)
60+
o1 = CUDA.zeros(T, n_blocks)
61+
o2 = CUDA.zeros(T, n_blocks * m)
62+
o3 = CUDA.zeros(T, n_blocks * m)
63+
@cuda backend=cuTile blocks=n_blocks k(o1, o2, o3, T, ct.Constant(dims))
64+
65+
for v in (Array(o1), Array(o2), Array(o3))
66+
@test eltype(v) === T
67+
@test all(isfinite, v)
68+
@test all(in_range, v)
69+
end
70+
71+
# Distribution shape + mean on the larger draws (o1's N=64 is too
72+
# small for shape testing).
73+
for v in (Array(o2), Array(o3))
74+
check_dist(v, dist)
75+
@test abs(sum(Float64, v) / length(v) - dist.mean) < mean_tol(T)
76+
end
77+
end
78+
79+
@testset "untyped surface defaults to Float32" begin
80+
function k(o::ct.TileArray{Float32, 1})
81+
pid = ct.bid(1)
82+
ct.store(o, pid, ct.reshape(f(4, 4), (16,)))
83+
return
84+
end
85+
n_blocks = 64
86+
o = CUDA.zeros(Float32, n_blocks * 16)
87+
@cuda backend=cuTile blocks=n_blocks k(o)
88+
v = Array(o)
89+
@test eltype(v) === Float32
90+
@test all(in_range, v)
91+
check_dist(v, dist)
92+
end
93+
94+
@testset "in-kernel `Random.seed!` matches host RNG output" begin
95+
# Single-block draw with an in-kernel `Random.seed!(default_rng(),s)`
96+
# plumbs the same `(seed, counter)` as `cuTile.RNG(s, 0)`, so the
97+
# outputs must be byte-identical.
98+
function k(out::ct.TileArray{Float32, 1})
99+
Random.seed!(Random.default_rng(), UInt32(42))
100+
pid = ct.bid(1)
101+
ct.store(out, pid, f(Float32, (512,)))
102+
return
103+
end
104+
out = CUDA.zeros(Float32, 512); @cuda backend=cuTile k(out)
105+
@test Array(out) == Array(f(ct.RNG(UInt32(42), UInt32(0)), Float32, 512))
106+
end
107+
end
108+
109+
110+
@testset "host randexp" begin
111+
dist = RANDEXP_DIST
112+
f, f!, in_range = dist.f, dist.f!, dist.in_range
113+
N = 4096
114+
115+
@testset "randexp! basics + counter advance" begin
116+
rng = ct.RNG(42)
117+
A = CUDA.zeros(Float32, N)
118+
f!(rng, A)
119+
v = Array(A)
120+
@test all(isfinite, v)
121+
@test all(in_range, v)
122+
@test rng.counter == UInt32(N)
123+
@test abs(sum(v) / N - dist.mean) < 0.1
124+
end
125+
126+
@testset "determinism + Philox re-keying" begin
127+
# Same seed → byte-identical output.
128+
@test Array(f(ct.RNG(123), Float32, N)) == Array(f(ct.RNG(123), Float32, N))
129+
130+
# Different seeds → uncorrelated streams. Set-disjoint isn't safe
131+
# for either distribution at this N due to Float32 birthday flake
132+
# (especially randexp, whose output concentrates near 0); use
133+
# element-wise equality instead — uncorrelated streams collide in
134+
# ≤ N²/2^24 ≈ 1 position.
135+
c = Array(f(ct.RNG(42), Float32, N))
136+
d = Array(f(ct.RNG(100), Float32, N))
137+
@test sum(c .== d) < N ÷ 100
138+
end
139+
140+
@testset "consecutive disjoint; seed! resets counter" begin
141+
# Two back-to-back draws on the same RNG use disjoint counter
142+
# ranges, so the underlying Philox outputs (and post-transform
143+
# samples) are disjoint up to Float32 birthday flake.
144+
rng = ct.RNG(7)
145+
a = Array(f(rng, Float32, N))
146+
b = Array(f(rng, Float32, N))
147+
@test sum(a .== b) < N ÷ 100
148+
149+
Random.seed!(rng, 7)
150+
@test rng.counter == UInt32(0)
151+
end
152+
153+
@testset "T-coverage" for T in (Float16, ct.BFloat16, Float32, Float64)
154+
v = Array(f(ct.RNG(13), T, N))
155+
@test eltype(v) === T
156+
@test all(in_range, v)
157+
@test all(isfinite, v)
158+
@test abs(sum(Float64, v) / N - dist.mean) < mean_tol(T)
159+
check_dist(v, dist)
160+
end
161+
162+
@testset "ct.randexp / ct.randexp! aliases default to Float32" begin
163+
@test eltype(ct.randexp(N)) === Float32
164+
@test eltype(ct.randexp(Float32, N)) === Float32
165+
B = CUDA.zeros(Float32, N); ct.randexp!(B)
166+
@test all(in_range, Array(B))
167+
end
168+
169+
@testset "arbitrary length (partial last tile)" begin
170+
# `store_partition_view` clips OOB writes, so `length(A)` can be
171+
# any value. The host advances by `n_blocks * RAND_FILL_TILE`,
172+
# not `length(A)`, so consecutive partial-length calls remain
173+
# disjoint up to Float32 birthday flake.
174+
rng = ct.RNG(0)
175+
A = CUDA.zeros(Float32, 513)
176+
f!(rng, A)
177+
v = Array(A)
178+
@test all(isfinite, v)
179+
@test rng.counter == UInt32(2 * cuTile.RAND_FILL_TILE)
180+
181+
rng2 = ct.RNG(0)
182+
a = Array(f(rng2, Float32, 100))
183+
b = Array(f(rng2, Float32, 100))
184+
@test sum(a .== b) < 100 ÷ 10
185+
end
186+
end

0 commit comments

Comments
 (0)