From 619eed0f20be927e9e4e1b2beef96b595d4baa21 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 7 May 2026 15:26:22 -0700 Subject: [PATCH 1/3] Harden Playground snippet-fetch retry against transient failures The Win32_x64_D3D11 Playground job fails intermittently with five identical "[Error] SyntaxError: JSON.parse Error: Unexpected input at position:0" lines followed by "[Log] Running the playground failed." on commits that don't touch playground code. Root cause: validation_native.js fetches each test's snippet from https://snippet.babylonjs.com// and unconditionally calls JSON.parse(xmlHttp.responseText) on readyState === 4, ignoring xmlHttp.status. When the snippet service returns a transient error (5xx, 429, gateway timeout, empty body), the parse fails and falls through to the catch which calls onError. The retry policy is maxRetry=5 with a fixed 500ms delay -- a 2-second total budget that cannot ride out a normal CDN/upstream blip. Three changes: 1. Check xmlHttp.status === 200 before parsing. Non-200 responses are logged with the status code and the playground id, then routed to onError instead of bubbling up as a misleading SyntaxError. 2. Increase maxRetry from 5 to 8. 3. Replace the fixed 500ms delay with exponential backoff capped at 30 seconds (500ms, 1s, 2s, 4s, 8s, 16s, 30s). Total budget grows from ~2s to ~60s, which is sufficient to ride out typical service blips without changing the eventual fail-fast behavior on persistent outages. Validates against the canonical snippet loader in BabylonJS/Babylon.js (packages/tools/snippetLoader/src/fetchSnippet.ts) which also checks response.ok before calling response.json(). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Apps/Playground/Scripts/validation_native.js | 22 +++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/Apps/Playground/Scripts/validation_native.js b/Apps/Playground/Scripts/validation_native.js index e46818783..a868ded16 100644 --- a/Apps/Playground/Scripts/validation_native.js +++ b/Apps/Playground/Scripts/validation_native.js @@ -157,20 +157,24 @@ const snippetUrl = "https://snippet.babylonjs.com"; const pgRoot = "https://playground.babylonjs.com"; - const retryTime = 500; - const maxRetry = 5; + const initialRetryTime = 500; + const maxRetry = 8; let retry = 0; const onError = function () { retry++; if (retry < maxRetry) { + // Exponential backoff capped at 30s. The snippet service can be briefly + // unavailable (5xx, 429, gateway timeout); 8 attempts over ~60s gives the + // upstream room to recover before we fail the whole run. + const delay = Math.min(initialRetryTime * Math.pow(2, retry - 1), 30000); setTimeout(function () { loadPG(); - }, retryTime); + }, delay); } else { // Fail the test, something wrong happen - console.log("Running the playground failed."); + console.log("Running the playground failed after " + maxRetry + " attempts."); done(false); } } @@ -179,8 +183,16 @@ const xmlHttp = new XMLHttpRequest(); xmlHttp.addEventListener("readystatechange", function () { if (xmlHttp.readyState === 4) { + xmlHttp.onreadystatechange = null; + // Treat any non-200 (5xx, 429, empty body, etc.) as retryable rather + // than feeding the response body to JSON.parse and surfacing it as a + // SyntaxError. + if (xmlHttp.status !== 200) { + console.error("Snippet fetch returned status " + xmlHttp.status + " for " + test.playgroundId); + onError(); + return; + } try { - xmlHttp.onreadystatechange = null; const snippet = JSON.parse(xmlHttp.responseText); let code = JSON.parse(snippet.jsonPayload).code.toString(); From 9e5bd7b632a37abfa39bc1b019c2b57ce0cea9dc Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Fri, 8 May 2026 08:55:29 -0700 Subject: [PATCH 2/3] Drop dead xmlHttp.onreadystatechange = null assignment The readystatechange listener is registered via addEventListener, not via the onreadystatechange property -- those are separate slots in the XHR API. Setting the property to null does not detach the listener, just as the reviewer observed. Spec also guarantees readystatechange fires only once on transition to DONE, so the line was a misleading no-op. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Apps/Playground/Scripts/validation_native.js | 1 - 1 file changed, 1 deletion(-) diff --git a/Apps/Playground/Scripts/validation_native.js b/Apps/Playground/Scripts/validation_native.js index a868ded16..70669300d 100644 --- a/Apps/Playground/Scripts/validation_native.js +++ b/Apps/Playground/Scripts/validation_native.js @@ -183,7 +183,6 @@ const xmlHttp = new XMLHttpRequest(); xmlHttp.addEventListener("readystatechange", function () { if (xmlHttp.readyState === 4) { - xmlHttp.onreadystatechange = null; // Treat any non-200 (5xx, 429, empty body, etc.) as retryable rather // than feeding the response body to JSON.parse and surfacing it as a // SyntaxError. From c32315b70c8eccb357492d39264bba2bde746c84 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Fri, 15 May 2026 13:15:08 -0700 Subject: [PATCH 3/3] Route Playground snippet load through BABYLON.Tools.LoadFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the manual XMLHttpRequest + retry loop in `loadPG()` with a single `BABYLON.Tools.LoadFile()` call so that snippet fetches and in-snippet texture / scene fetches all share the same Babylon-provided retry plumbing. The custom retry block (8 attempts, exponential backoff, explicit status-200 gate) is removed because `FileTools.LoadFile`'s internal `retryLoop` already implements retries via the global `FileToolsOptions.DefaultRetryStrategy`, and that strategy is now configured up front for the test framework. The configured strategy broadens the upstream `ExponentialBackoff` default (which retries only when `request.status === 0`) to also cover transient HTTP error responses: - `0` — network drop / connection reset (existing) - `429` — rate limited - `5xx` — server error / gateway timeout / etc. Up to 5 attempts with `500ms * 2^N` backoff. Applies to every `Tools.LoadFile` call in the test framework, including: - The snippet fetch in `loadPG()` (this PR's primary concern, same scenario as before). - The reference-image fetch in `runTest()` (already on `LoadFile`). - Every texture / scene / asset URL loaded from inside each playground's `createScene()` body via `_loadFile` (e.g. `new BABYLON.Texture("...exr", scene)`). That last category covers the `EXR Loader` flake observed on `Win32_x64_V8_D3D11` (run 25930475253): a single transient failure fetching `green-door.exr` from the assets CDN caused Babylon's fallback red-and-black checkerboard texture to be substituted, which validated as a ~110k pixel diff against the reference. Under the new strategy the fetch will retry on 5xx/429 instead of falling back on the first error. Net `validation_native.js` change: +57 / -85 lines (-28 net) plus the 21-line strategy override block at file scope. The strategy override is a one-time global mutation in the test-framework entry; it does not affect any non-test BabylonNative app. [Created by Copilot on behalf of @bghgary] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Apps/Playground/Scripts/validation_native.js | 88 ++++++++++---------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/Apps/Playground/Scripts/validation_native.js b/Apps/Playground/Scripts/validation_native.js index 48c539c10..c49598ed2 100644 --- a/Apps/Playground/Scripts/validation_native.js +++ b/Apps/Playground/Scripts/validation_native.js @@ -83,6 +83,27 @@ const engine = new BABYLON.NativeEngine(); engine.getCaps().parallelShaderCompile = undefined; + // Broaden Babylon's default retry strategy for the test framework: in addition to + // network drops (status 0, the default trigger), also retry transient HTTP errors + // (5xx) and rate limits (429). Applies to every BABYLON.Tools.LoadFile request + // including the snippet fetches in loadPG below and the texture/asset loads + // initiated from inside each playground's createScene(). + BABYLON.Tools.DefaultRetryStrategy = function (url, request, retryIndex) { + const maxRetries = 5; + if (retryIndex >= maxRetries) { + return -1; + } + if (url.indexOf("file:") !== -1) { + return -1; + } + if (request.status === 0 || + request.status === 429 || + (request.status >= 500 && request.status < 600)) { + return Math.pow(2, retryIndex) * 500; + } + return -1; + }; + engine.getRenderingCanvas = function () { return window; } @@ -284,42 +305,13 @@ const snippetUrl = "https://snippet.babylonjs.com"; const pgRoot = "https://playground.babylonjs.com"; - const initialRetryTime = 500; - const maxRetry = 8; - let retry = 0; - - const onError = function () { - retry++; - if (retry < maxRetry) { - // Exponential backoff capped at 30s. The snippet service can be briefly - // unavailable (5xx, 429, gateway timeout); 8 attempts over ~60s gives the - // upstream room to recover before we fail the whole run. - const delay = Math.min(initialRetryTime * Math.pow(2, retry - 1), 30000); - setTimeout(function () { - loadPG(); - }, delay); - } - else { - // Fail the test, something wrong happen - console.log("Running the playground failed."); - failTest(done); - } - } - const loadPG = function () { - const xmlHttp = new XMLHttpRequest(); - xmlHttp.addEventListener("readystatechange", function () { - if (xmlHttp.readyState === 4) { - // Treat any non-200 (5xx, 429, empty body, etc.) as retryable rather - // than feeding the response body to JSON.parse and surfacing it as a - // SyntaxError. - if (xmlHttp.status !== 200) { - console.error("Snippet fetch returned status " + xmlHttp.status + " for " + test.playgroundId); - onError(); - return; - } + const url = snippetUrl + test.playgroundId.replace(/#/g, "/"); + BABYLON.Tools.LoadFile( + url, + function (responseText) { try { - const snippet = JSON.parse(xmlHttp.responseText); + const snippet = JSON.parse(responseText); let code = JSON.parse(snippet.jsonPayload).code.toString(); // Check if this is a v2 manifest and extract the entry file's code @@ -361,26 +353,30 @@ processCurrentScene(test, referenceImage, done, compareFunction); }).catch(function (e) { console.error(e); - onError(); - }) + failTest(done); + }); } else { // Handle if createScene returns a scene processCurrentScene(test, referenceImage, done, compareFunction); } - } catch (e) { - console.error(e); - onError(); + console.error("Failed to evaluate playground snippet " + test.playgroundId + ": " + e); + failTest(done); } + }, + undefined, // onProgress + undefined, // database + false, // useArrayBuffer (snippet response is JSON text) + function (request, exception) { + const status = request ? (request.status + " " + request.statusText) : "no response"; + console.error("Failed to load playground snippet " + test.playgroundId + " after retries: " + status); + if (exception) { + console.error(exception); + } + failTest(done); } - }, false); - xmlHttp.onerror = function () { - console.error("Network error during test load."); - onError(); - } - xmlHttp.open("GET", snippetUrl + test.playgroundId.replace(/#/g, "/")); - xmlHttp.send(); + ); } loadPG(); } else {