Skip to content

Commit 9e7b01a

Browse files
bench: --frames + --no-vsync + BENCH stdout line (Win32)
Adds the BabylonNative-side of the cross-app perf comparison driven by webgpu-cross-platform-app/tools/bench/run-bench.mjs. Same CLI contract, same BENCH stdout format, so the runner can treat both apps uniformly. CLI - Apps/Playground/Shared/CommandLine.{h,cpp}: new fields PlaygroundOptions::Frames and PlaygroundOptions::NoVsync, parsed via the existing FlagSpec table. --frames N also implicitly sets NoVsync=true (matches DawnTest), --no-vsync standalone is supported for indefinite runs where vsync should still be off. Graphics - Core/Graphics/Include/Shared/Babylon/Graphics/Device.h: add Configuration::VSync (default true) so non-Playground consumers keep current behavior. - Core/Graphics/Source/DeviceImpl.cpp: drop the BGFX_RESET_VSYNC bit from the reset mask when config.VSync is false. With vsync on the loop is pinned to the display refresh and dwarfs the per-frame measurement. - Apps/Playground/Shared/AppContext.cpp: plumb options.NoVsync to graphicsConfig.VSync. Bench - Apps/Playground/Shared/BenchTimer.{h,cpp}: PascalCase mirror of DawnTest's framework/bench API (StartFrame/EndFrame/PrintBenchLine), same metric set (min/avg/max/p95 + wall), first frame is warmup. - Apps/Playground/Win32/App.cpp: wrap each main-loop iteration in the timer (Finish -> FinishRender -> StartRender -> Start -> PeekMessage/ Dispatch). When --frames is set, PostQuitMessage(0) on hitting the budget so the loop exits without depending on user input. Print the BENCH line before returning. Scene name is derived from the script stem so the runner can correlate cells across apps. - Apps/Playground/CMakeLists.txt: register BenchTimer sources. macOS/iOS/Android Playground entries are intentionally not wired here. They route command-line args through MTKView delegates / JNI and need larger refactors to feed --frames into the render loop. CI bench is win32-only for now (no GPU on GH mac runners); local mac/Android bench will follow. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4e6618d commit 9e7b01a

9 files changed

Lines changed: 249 additions & 1 deletion

File tree

Apps/Playground/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ set(SCRIPTS
2121
set(SOURCES
2222
"Shared/AppContext.cpp"
2323
"Shared/AppContext.h"
24+
"Shared/BenchTimer.cpp"
25+
"Shared/BenchTimer.h"
2426
"Shared/CommandLine.cpp"
2527
"Shared/CommandLine.h"
2628
"Shared/Diagnostics.cpp"

Apps/Playground/Shared/AppContext.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ AppContext::AppContext(
7878
graphicsConfig.Width = width;
7979
graphicsConfig.Height = height;
8080
graphicsConfig.MSAASamples = 4;
81+
// Bench mode (--frames / --no-vsync) measures pure render throughput,
82+
// not display refresh cap. Honor the CLI flag before the device is
83+
// created so the BGFX_RESET_VSYNC bit is dropped on first init.
84+
graphicsConfig.VSync = !playgroundOptions.NoVsync;
8185

8286
m_device.emplace(graphicsConfig);
8387
m_deviceUpdate.emplace(m_device->GetUpdate("update"));
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#include "BenchTimer.h"
2+
3+
#include <algorithm>
4+
#include <chrono>
5+
#include <cstdio>
6+
7+
namespace Bench
8+
{
9+
double MonotonicMillis()
10+
{
11+
using namespace std::chrono;
12+
static const auto kEpoch = steady_clock::now();
13+
return duration<double, std::milli>(steady_clock::now() - kEpoch).count();
14+
}
15+
16+
void FrameTimer::SetWarmupFrames(int n)
17+
{
18+
m_warmupFrames = n < 0 ? 0 : n;
19+
}
20+
21+
void FrameTimer::Reserve(std::size_t n)
22+
{
23+
m_deltas.reserve(n);
24+
}
25+
26+
void FrameTimer::StartFrame()
27+
{
28+
m_startMs = MonotonicMillis();
29+
++m_seenFrames;
30+
}
31+
32+
void FrameTimer::EndFrame()
33+
{
34+
if (m_seenFrames <= m_warmupFrames)
35+
{
36+
return;
37+
}
38+
m_deltas.push_back(MonotonicMillis() - m_startMs);
39+
}
40+
41+
FrameStats FrameTimer::Finish() const
42+
{
43+
FrameStats s{};
44+
if (m_deltas.empty())
45+
{
46+
return s;
47+
}
48+
49+
std::vector<double> sorted(m_deltas);
50+
std::sort(sorted.begin(), sorted.end());
51+
52+
double sum = 0.0;
53+
for (double d : sorted)
54+
{
55+
sum += d;
56+
}
57+
58+
const std::size_t n = sorted.size();
59+
std::size_t pIdx = static_cast<std::size_t>(0.95 * n);
60+
if (pIdx >= n)
61+
{
62+
pIdx = n - 1;
63+
}
64+
65+
s.frameCount = static_cast<int>(n);
66+
s.wallMs = sum;
67+
s.minMs = sorted.front();
68+
s.avgMs = sum / static_cast<double>(n);
69+
s.maxMs = sorted.back();
70+
s.p95Ms = sorted[pIdx];
71+
return s;
72+
}
73+
74+
void FrameTimer::PrintBenchLine(const std::string& sceneName) const
75+
{
76+
const FrameStats s = Finish();
77+
std::fprintf(stdout,
78+
"BENCH scene=%s frames=%d wall_ms=%.3f min_ms=%.3f avg_ms=%.3f max_ms=%.3f p95_ms=%.3f\n",
79+
sceneName.c_str(),
80+
s.frameCount,
81+
s.wallMs,
82+
s.minMs,
83+
s.avgMs,
84+
s.maxMs,
85+
s.p95Ms);
86+
std::fflush(stdout);
87+
}
88+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// BenchTimer — per-frame wall-time collector used by Playground bench mode.
2+
//
3+
// Mirrors the DawnTest framework/bench.h API so the emitted "BENCH …" line
4+
// is identical across both apps and tools/bench/run-bench.mjs can parse a
5+
// single format.
6+
7+
#pragma once
8+
9+
#include <cstddef>
10+
#include <string>
11+
#include <vector>
12+
13+
namespace Bench
14+
{
15+
struct FrameStats
16+
{
17+
int frameCount = 0; // included frames (excludes warmup)
18+
double wallMs = 0;
19+
double minMs = 0;
20+
double avgMs = 0;
21+
double maxMs = 0;
22+
double p95Ms = 0;
23+
};
24+
25+
class FrameTimer
26+
{
27+
public:
28+
// Default warmup is 1 frame (shader compile + first-use upload is
29+
// almost always an outlier).
30+
void SetWarmupFrames(int n);
31+
void Reserve(std::size_t n);
32+
33+
void StartFrame();
34+
void EndFrame();
35+
36+
FrameStats Finish() const;
37+
38+
// Emits exactly:
39+
// BENCH scene=<sceneName> frames=N wall_ms=X min_ms=X avg_ms=X max_ms=X p95_ms=X\n
40+
// on stdout, then flushes.
41+
void PrintBenchLine(const std::string& sceneName) const;
42+
43+
private:
44+
int m_warmupFrames = 1;
45+
int m_seenFrames = 0;
46+
double m_startMs = 0;
47+
std::vector<double> m_deltas;
48+
};
49+
50+
double MonotonicMillis();
51+
}

Apps/Playground/Shared/CommandLine.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,33 @@ namespace
232232
err = "invalid --test-index value: " + ierr;
233233
}
234234
}},
235+
236+
FlagSpec{"--frames", "", FlagKind::ValueRequired, "N",
237+
"Render N frames then exit. Disables vsync.",
238+
" Emits a single 'BENCH scene=... frames=... ...'\n"
239+
" line on stdout, parsed by\n"
240+
" tools/bench/run-bench.mjs to compute summary\n"
241+
" statistics. When set, vsync is disabled\n"
242+
" automatically so per-frame timings reflect\n"
243+
" actual GPU/CPU work rather than the display\n"
244+
" refresh cap.\n",
245+
[](PlaygroundOptions& o, std::string_view value, std::string& err) {
246+
int n = 0;
247+
if (!ParseIntStrict(value, n) || n < 1 || n > 1000000)
248+
{
249+
err = "invalid --frames value (expected positive integer): '" + std::string{value} + "'";
250+
return;
251+
}
252+
o.Frames = n;
253+
o.NoVsync = true;
254+
}},
255+
256+
FlagSpec{"--no-vsync", "", FlagKind::Boolean, "",
257+
"Disable vsync on the swap chain.",
258+
" Implied by --frames; can be set on its own for\n"
259+
" interactive perf testing without a fixed frame\n"
260+
" budget.\n",
261+
[](PlaygroundOptions& o, std::string_view, std::string&) { o.NoVsync = true; }},
235262
};
236263

237264
struct FlagMatch

Apps/Playground/Shared/CommandLine.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,21 @@ struct PlaygroundOptions
2121
std::optional<bool> DebugTrace;
2222
std::optional<std::string> PerfTrace;
2323

24+
// Benchmark mode.
25+
//
26+
// When `Frames > 0`, the host's render loop exits after that many
27+
// rendered frames (counted post-warmup by the BenchTimer) and emits a
28+
// single line of the form:
29+
// BENCH scene=<name> frames=N wall_ms=X min_ms=X avg_ms=X max_ms=X p95_ms=X
30+
// on stdout, parsed by tools/bench/run-bench.mjs. Setting `Frames`
31+
// implicitly enables `NoVsync` so the timing reflects actual GPU/CPU
32+
// throughput rather than the display refresh cap.
33+
//
34+
// `NoVsync` may also be set independently of `Frames` for interactive
35+
// perf testing.
36+
int Frames = 0;
37+
bool NoVsync = false;
38+
2439
// 1-based frame index at which to call TestUtils.captureNextFrame()
2540
// (RenderDoc capture trigger). When set, the runner extends each test's
2641
// render budget so the .rdc finalizes. Requires renderdoc.dll to be

Apps/Playground/Win32/App.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "App.h"
55
#include <Shared/AppContext.h>
6+
#include <Shared/BenchTimer.h>
67
#include <Shared/CommandLine.h>
78
#include <Shared/Diagnostics.h>
89
#include <Babylon/Plugins/TestUtils.h>
@@ -202,6 +203,27 @@ int APIENTRY wWinMain(_In_ HINSTANCE hInstance,
202203

203204
MSG msg{};
204205

206+
// Bench mode: per-frame timing collector that emits the BENCH line on
207+
// exit. Only armed when --frames was passed; default-constructed
208+
// FrameTimer with zero work otherwise.
209+
Bench::FrameTimer benchTimer;
210+
int benchFrames = 0; // count of rendered frames in this session
211+
std::string benchSceneName;
212+
if (options.Frames > 0)
213+
{
214+
benchTimer.Reserve(static_cast<std::size_t>(options.Frames));
215+
// Scene name comes from the first positional script arg (stem); if
216+
// none was passed, fall back to "experience" (the default script).
217+
if (!options.Scripts.empty())
218+
{
219+
benchSceneName = std::filesystem::path{options.Scripts.front()}.stem().string();
220+
}
221+
else
222+
{
223+
benchSceneName = "experience";
224+
}
225+
}
226+
205227
// Main message loop:
206228
while (msg.message != WM_QUIT)
207229
{
@@ -215,10 +237,28 @@ int APIENTRY wWinMain(_In_ HINSTANCE hInstance,
215237
{
216238
if (appContext)
217239
{
240+
// Time the full frame: from the moment we ask the worker to
241+
// finish the previous frame's JS render-loop pass, through
242+
// FinishRenderingCurrentFrame (GPU present), the next
243+
// StartRenderingCurrentFrame, the next DeviceUpdate.Start
244+
// (which kicks the next JS render-loop pass), AND the
245+
// PeekMessage / Translate / Dispatch that follows. This
246+
// matches DawnTest's notion of "frame": the wall-clock
247+
// interval between two consecutive present completions, so
248+
// the BENCH line values are directly comparable.
249+
benchTimer.StartFrame();
218250
appContext->DeviceUpdate().Finish();
219251
appContext->Device().FinishRenderingCurrentFrame();
220252
appContext->Device().StartRenderingCurrentFrame();
221253
appContext->DeviceUpdate().Start();
254+
if (options.Frames > 0)
255+
{
256+
++benchFrames;
257+
if (benchFrames >= options.Frames)
258+
{
259+
PostQuitMessage(0);
260+
}
261+
}
222262
}
223263

224264
result = PeekMessage(&msg, nullptr, 0, 0, PM_REMOVE) && msg.message != WM_QUIT;
@@ -232,9 +272,24 @@ int APIENTRY wWinMain(_In_ HINSTANCE hInstance,
232272
DispatchMessage(&msg);
233273
}
234274
}
275+
// Close out the frame interval after we've drained pending window
276+
// messages for this loop iteration. EndFrame is a no-op until past
277+
// warmup, so it's safe to call every iteration including those
278+
// where appContext is still being constructed.
279+
if (!minimized && appContext)
280+
{
281+
benchTimer.EndFrame();
282+
}
235283
}
236284

237285
Diagnostics::SetExitCode(static_cast<int>(msg.wParam));
286+
// Emit the BENCH line *before* return so it reaches stdout (which is
287+
// unbuffered here) even when the host gets torn down by the runtime
288+
// exit hook after we return.
289+
if (options.Frames > 0)
290+
{
291+
benchTimer.PrintBenchLine(benchSceneName);
292+
}
238293
return (int)msg.wParam;
239294
}
240295

Core/Graphics/Include/Shared/Babylon/Graphics/Device.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ namespace Babylon::Graphics
5353
// When enabled, back buffer will be premultiplied with alpha value.
5454
bool AlphaPremultiplied{};
5555

56+
// When true (default), the swap chain waits for vertical sync. Set
57+
// false for benchmark / latency-sensitive scenarios that want to
58+
// measure render throughput without the display refresh cap.
59+
// Mapped to BGFX_RESET_VSYNC in the bgfx backend.
60+
bool VSync{true};
61+
5662
// Format to use when creating the depth/stencil texture for the back buffer.
5763
// Specify DepthStencilFormat::None to not create a depth/stencil texture.
5864
DepthStencilFormat BackBufferDepthStencilFormat{DepthStencilFormat::Depth24Stencil8};

Core/Graphics/Source/DeviceImpl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ namespace Babylon::Graphics
5353
// init.resolution
5454
//
5555

56-
init.resolution.reset = BGFX_RESET_VSYNC | BGFX_RESET_MAXANISOTROPY | BGFX_RESET_FLIP_AFTER_RENDER;
56+
init.resolution.reset = (config.VSync ? BGFX_RESET_VSYNC : 0u) | BGFX_RESET_MAXANISOTROPY | BGFX_RESET_FLIP_AFTER_RENDER;
5757
init.resolution.maxFrameLatency = 1;
5858

5959
UpdateSize(config.Width, config.Height);

0 commit comments

Comments
 (0)