Skip to content

Commit 8d0c41d

Browse files
committed
upgrade to zig 0.16.0
1 parent 3a5955b commit 8d0c41d

3 files changed

Lines changed: 74 additions & 67 deletions

File tree

build.zig.zon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
.name = .zmath,
33
.fingerprint = 0xfd23d422bd223cc2,
44
.version = "0.11.0-dev",
5-
.minimum_zig_version = "0.15.1",
5+
.minimum_zig_version = "0.16.0",
66
.paths = .{
77
"build.zig",
88
"build.zig.zon",

src/benchmark.zig

Lines changed: 51 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -49,43 +49,41 @@
4949
/// wave benchmark (SOA) - scalar version: 3.7832s, zmath version: 0.3642s
5050
///
5151
/// -------------------------------------------------------------------------------------------------
52-
pub fn main() !void {
53-
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
54-
defer _ = gpa.deinit();
55-
const allocator = gpa.allocator();
52+
pub fn main(init: std.process.Init) !void {
53+
const allocator = init.gpa;
54+
const io = init.io;
5655

5756
// m = mul(ma, mb); data set fits in L1 cache; AOS data layout.
58-
try mat4MulBenchmark(allocator, 100_000);
57+
try mat4MulBenchmark(allocator, io, 100_000);
5958

6059
// v = 0.01 * cross3(va, vb) + vec3(1.0); data set fits in L1 cache; AOS data layout.
61-
try cross3ScaleBiasBenchmark(allocator, 10_000);
60+
try cross3ScaleBiasBenchmark(allocator, io, 10_000);
6261

6362
// v = dot3(va, vb) * (0.1 * cross3(va, vb) + vec3(1.0)); data set fits in L1 cache; AOS data layout.
64-
try cross3Dot3ScaleBiasBenchmark(allocator, 10_000);
63+
try cross3Dot3ScaleBiasBenchmark(allocator, io, 10_000);
6564

6665
// q = qmul(qa, qb); data set fits in L1 cache; AOS data layout.
67-
try quatBenchmark(allocator, 10_000);
66+
try quatBenchmark(allocator, io, 10_000);
6867

6968
// d = sqrt(x * x + z * z); y = sin(d - t); SOA layout.
70-
try waveBenchmark(allocator, 1_000);
69+
try waveBenchmark(allocator, io, 1_000);
7170
}
7271

7372
const std = @import("std");
74-
const time = std.time;
75-
const Timer = time.Timer;
73+
const Clock = std.Io.Clock;
7674
const zm = @import("zmath");
7775

7876
var prng = std.Random.DefaultPrng.init(0);
7977
const random = prng.random();
8078

81-
noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
79+
noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, io: std.Io, comptime count: comptime_int) !void {
8280
std.debug.print("\n", .{});
8381
std.debug.print("{s:>42} - ", .{"matrix mul benchmark (AOS)"});
8482

8583
var data0 = try std.ArrayList([16]f32).initCapacity(allocator, 64);
86-
defer data0.deinit();
84+
defer data0.deinit(allocator);
8785
var data1 = try std.ArrayList([16]f32).initCapacity(allocator, 64);
88-
defer data1.deinit();
86+
defer data1.deinit(allocator);
8987

9088
var i: usize = 0;
9189
while (i < 64) : (i += 1) {
@@ -118,8 +116,7 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt
118116

119117
{
120118
i = 0;
121-
var timer = try Timer.start();
122-
const start = timer.lap();
119+
const start = Clock.now(.awake, io);
123120
while (i < count) : (i += 1) {
124121
for (data1.items) |b| {
125122
for (data0.items) |a| {
@@ -145,16 +142,15 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt
145142
}
146143
}
147144
}
148-
const end = timer.read();
149-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
145+
const end = Clock.now(.awake, io);
146+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
150147

151148
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
152149
}
153150

154151
{
155152
i = 0;
156-
var timer = try Timer.start();
157-
const start = timer.lap();
153+
const start = Clock.now(.awake, io);
158154
while (i < count) : (i += 1) {
159155
for (data1.items) |b| {
160156
for (data0.items) |a| {
@@ -165,20 +161,23 @@ noinline fn mat4MulBenchmark(allocator: std.mem.Allocator, comptime count: compt
165161
}
166162
}
167163
}
168-
const end = timer.read();
169-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
164+
const end = Clock.now(
165+
.awake,
166+
io,
167+
);
168+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
170169

171170
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
172171
}
173172
}
174173

175-
noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
174+
noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, io: std.Io, comptime count: comptime_int) !void {
176175
std.debug.print("{s:>42} - ", .{"cross3, scale, bias benchmark (AOS)"});
177176

178177
var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
179-
defer data0.deinit();
178+
defer data0.deinit(allocator);
180179
var data1 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
181-
defer data1.deinit();
180+
defer data1.deinit(allocator);
182181

183182
var i: usize = 0;
184183
while (i < 256) : (i += 1) {
@@ -201,8 +200,7 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun
201200

202201
{
203202
i = 0;
204-
var timer = try Timer.start();
205-
const start = timer.lap();
203+
const start = Clock.now(.awake, io);
206204
while (i < count) : (i += 1) {
207205
for (data1.items) |b| {
208206
for (data0.items) |a| {
@@ -215,16 +213,15 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun
215213
}
216214
}
217215
}
218-
const end = timer.read();
219-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
216+
const end = Clock.now(.awake, io);
217+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
220218

221219
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
222220
}
223221

224222
{
225223
i = 0;
226-
var timer = try Timer.start();
227-
const start = timer.lap();
224+
const start = Clock.now(.awake, io);
228225
while (i < count) : (i += 1) {
229226
for (data1.items) |b| {
230227
for (data0.items) |a| {
@@ -235,14 +232,14 @@ noinline fn cross3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime coun
235232
}
236233
}
237234
}
238-
const end = timer.read();
239-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
235+
const end = Clock.now(.awake, io);
236+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
240237

241238
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
242239
}
243240
}
244241

245-
noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
242+
noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, io: std.Io, comptime count: comptime_int) !void {
246243
std.debug.print("{s:>42} - ", .{"cross3, dot3, scale, bias benchmark (AOS)"});
247244

248245
var data0 = try std.ArrayList([3]f32).initCapacity(allocator, 256);
@@ -271,8 +268,7 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime
271268

272269
{
273270
i = 0;
274-
var timer = try Timer.start();
275-
const start = timer.lap();
271+
const start = Clock.now(.awake, io);
276272
while (i < count) : (i += 1) {
277273
for (data1.items) |b| {
278274
for (data0.items) |a| {
@@ -286,16 +282,15 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime
286282
}
287283
}
288284
}
289-
const end = timer.read();
290-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
285+
const end = Clock.now(.awake, io);
286+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
291287

292288
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
293289
}
294290

295291
{
296292
i = 0;
297-
var timer = try Timer.start();
298-
const start = timer.lap();
293+
const start = Clock.now(.awake, io);
299294
while (i < count) : (i += 1) {
300295
for (data1.items) |b| {
301296
for (data0.items) |a| {
@@ -306,14 +301,14 @@ noinline fn cross3Dot3ScaleBiasBenchmark(allocator: std.mem.Allocator, comptime
306301
}
307302
}
308303
}
309-
const end = timer.read();
310-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
304+
const end = Clock.now(.awake, io);
305+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
311306

312307
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
313308
}
314309
}
315310

316-
noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
311+
noinline fn quatBenchmark(allocator: std.mem.Allocator, io: std.Io, comptime count: comptime_int) !void {
317312
std.debug.print("{s:>42} - ", .{"quaternion mul benchmark (AOS)"});
318313

319314
var data0 = try std.ArrayList([4]f32).initCapacity(allocator, 256);
@@ -342,8 +337,7 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime
342337

343338
{
344339
i = 0;
345-
var timer = try Timer.start();
346-
const start = timer.lap();
340+
const start = Clock.now(.awake, io);
347341
while (i < count) : (i += 1) {
348342
for (data1.items) |b| {
349343
for (data0.items) |a| {
@@ -357,16 +351,15 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime
357351
}
358352
}
359353
}
360-
const end = timer.read();
361-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
354+
const end = Clock.now(.awake, io);
355+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
362356

363357
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
364358
}
365359

366360
{
367361
i = 0;
368-
var timer = try Timer.start();
369-
const start = timer.lap();
362+
const start = Clock.now(.awake, io);
370363
while (i < count) : (i += 1) {
371364
for (data1.items) |b| {
372365
for (data0.items) |a| {
@@ -377,14 +370,14 @@ noinline fn quatBenchmark(allocator: std.mem.Allocator, comptime count: comptime
377370
}
378371
}
379372
}
380-
const end = timer.read();
381-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
373+
const end = Clock.now(.awake, io);
374+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
382375

383376
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
384377
}
385378
}
386379

387-
noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime_int) !void {
380+
noinline fn waveBenchmark(allocator: std.mem.Allocator, io: std.Io, comptime count: comptime_int) !void {
388381
_ = allocator;
389382
std.debug.print("{s:>42} - ", .{"wave benchmark (SOA)"});
390383

@@ -394,8 +387,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
394387

395388
const scale: f32 = 0.05;
396389

397-
var timer = try Timer.start();
398-
const start = timer.lap();
390+
const start = Clock.now(.awake, io);
399391

400392
var iter: usize = 0;
401393
while (iter < count) : (iter += 1) {
@@ -428,8 +420,8 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
428420
}
429421
t += 0.001;
430422
}
431-
const end = timer.read();
432-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
423+
const end = Clock.now(.awake, io);
424+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
433425

434426
std.debug.print("scalar version: {d:.4}s, ", .{elapsed_s});
435427
}
@@ -445,8 +437,7 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
445437

446438
const scale: f32 = 0.05;
447439

448-
var timer = try Timer.start();
449-
const start = timer.lap();
440+
const start = Clock.now(.awake, io);
450441

451442
var iter: usize = 0;
452443
while (iter < count) : (iter += 1) {
@@ -469,8 +460,8 @@ noinline fn waveBenchmark(allocator: std.mem.Allocator, comptime count: comptime
469460
}
470461
vt += zm.splat(T, 0.001);
471462
}
472-
const end = timer.read();
473-
const elapsed_s = @as(f64, @floatFromInt(end - start)) / time.ns_per_s;
463+
const end = Clock.now(.awake, io);
464+
const elapsed_s = @as(f64, @floatFromInt(start.durationTo(end).toNanoseconds())) / std.time.ns_per_s;
474465

475466
std.debug.print("zmath version: {d:.4}s\n", .{elapsed_s});
476467
}

src/root.zig

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -787,8 +787,15 @@ test "zmath.maxFast" {
787787
}
788788

789789
pub inline fn min(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
790-
// This will handle inf & nan
791-
return @min(v0, v1); // minps, cmpunordps, andps, andnps, orps
790+
const T = @TypeOf(v0, v1);
791+
const Child = std.meta.Child(T);
792+
// v != v is true only when v is NaN
793+
const nan0 = v0 != v0;
794+
const nan1 = v1 != v1;
795+
// if v0 is NaN, pick v1
796+
// else if v1 is NaN, pick v0
797+
// else pick normal @min
798+
return @select(Child, nan0, v1, @select(Child, nan1, v0, @min(v0, v1)));
792799
}
793800
test "zmath.min" {
794801
// Calling math.inf causes test to fail!
@@ -831,8 +838,15 @@ test "zmath.min" {
831838
}
832839

833840
pub inline fn max(v0: anytype, v1: anytype) @TypeOf(v0, v1) {
834-
// This will handle inf & nan
835-
return @max(v0, v1); // maxps, cmpunordps, andps, andnps, orps
841+
const T = @TypeOf(v0, v1);
842+
const Child = std.meta.Child(T);
843+
// v != v is true only when v is NaN
844+
const nan0 = v0 != v0;
845+
const nan1 = v1 != v1;
846+
// if v0 is NaN, pick v1
847+
// else if v1 is NaN, pick v0
848+
// else pick normal @max
849+
return @select(Child, nan0, v1, @select(Child, nan1, v0, @max(v0, v1)));
836850
}
837851
test "zmath.max" {
838852
// Calling math.inf causes test to fail!
@@ -4122,7 +4136,8 @@ test "zmath.fftN" {
41224136
-77.254834, 0.000000, -105.489863, 0.000000, -160.874864, 0.000000, -324.901452, 0.000000,
41234137
};
41244138
for (expected, 0..) |e, ie| {
4125-
try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
4139+
const v: [4]f32 = im[ie / 4];
4140+
try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon));
41264141
}
41274142
}
41284143

@@ -4185,7 +4200,8 @@ test "zmath.fftN" {
41854200
-321.749727, 0.000000, 0.000000, 0.000000, -649.802905, 0.000000, 0.000000, 0.000000,
41864201
};
41874202
for (expected, 0..) |e, ie| {
4188-
try expect(std.math.approxEqAbs(f32, e, im[(ie / 4)][ie % 4], epsilon));
4203+
const v: [4]f32 = im[ie / 4];
4204+
try expect(std.math.approxEqAbs(f32, e, v[ie % 4], epsilon));
41894205
}
41904206
}
41914207
}

0 commit comments

Comments
 (0)