11/**
2- * Extract onnxruntime-web and create onnx-sync.mjs
3- * This runs during build to create a wrapper around onnxruntime-web
4- * for use in the MiniLM semantic inference engine .
2+ * Extract ONNX Runtime WASM and create onnx-sync.mjs
3+ * This runs during build to extract the ONNX Runtime WASM binary from
4+ * our custom build and generate a synchronous loader wrapper .
55 *
66 * Idempotent: Skips regeneration if source hasn't changed (supports CI caching).
77 */
88
9- import { writeFileSync } from 'node:fs'
9+ import { existsSync , readFileSync , writeFileSync } from 'node:fs'
1010import path from 'node:path'
1111import { fileURLToPath } from 'node:url'
1212
@@ -21,43 +21,108 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url))
2121const rootPath = path . join ( __dirname , '..' )
2222const outputPath = path . join ( rootPath , 'build/onnx-sync.mjs' )
2323
24- // Get onnxruntime-web package.json path for version tracking.
25- const onnxPackageJsonPath = path . join (
26- rootPath ,
27- 'node_modules/onnxruntime-web/package.json' ,
28- )
24+ // Source files from custom onnxruntime package.
25+ const onnxPackageRoot = path . join ( rootPath , '../onnxruntime' )
26+ const onnxWasmFile = path . join ( onnxPackageRoot , 'build/wasm/ort-wasm-simd-threaded.wasm' )
27+ const onnxJsFile = path . join ( onnxPackageRoot , 'build/wasm/ort-wasm-simd-threaded.js' )
2928
30- // Check if extraction needed (hash-based caching ).
29+ // Check if extraction needed (hash both files ).
3130if (
3231 ! ( await shouldExtract ( {
33- sourcePaths : onnxPackageJsonPath ,
32+ sourcePaths : [ onnxWasmFile , onnxJsFile ] ,
3433 outputPath,
3534 validateOutput : content =>
36- content . includes ( 'onnxruntime-web' ) &&
37- content . includes ( 'InferenceSession' ) ,
35+ content . includes ( 'onnxruntime' ) &&
36+ content . includes ( 'InferenceSession' ) &&
37+ content . includes ( 'WebAssembly' ) ,
3838 } ) )
3939) {
4040 process . exit ( 0 )
4141}
4242
43+ // Check if ONNX Runtime WASM files exist.
44+ if ( ! existsSync ( onnxWasmFile ) || ! existsSync ( onnxJsFile ) ) {
45+ // Graceful fallback: Generate placeholder for CI builds without WASM.
46+ logger . warn ( 'ONNX Runtime WASM not built yet, generating placeholder' )
47+
48+ const placeholderContent = `/**
49+ * Synchronous ONNX Runtime with embedded WASM binary (Placeholder).
50+ *
51+ * This file is AUTO-GENERATED by scripts/extract-onnx-runtime.mjs
52+ * DO NOT EDIT MANUALLY - changes will be overwritten on next build.
53+ *
54+ * NOTE: This is a placeholder build. Run 'pnpm build' in ../onnxruntime first.
55+ */
56+
57+ // Placeholder ONNX Runtime export with minimal API.
58+ const ort = {
59+ InferenceSession: {
60+ create() {
61+ throw new Error('ONNX Runtime not built - run pnpm build in packages/onnxruntime')
62+ }
63+ },
64+ Tensor: class Tensor {}
65+ }
66+
67+ export const InferenceSession = ort.InferenceSession
68+ export const Tensor = ort.Tensor
69+
70+ export default ort
71+ `
72+
73+ ensureOutputDir ( outputPath )
74+ writeFileSync ( outputPath , placeholderContent , 'utf-8' )
75+ logger . log ( `✓ Generated placeholder ${ outputPath } ` )
76+ process . exit ( 0 )
77+ }
78+
79+ // Read WASM binary and convert to base64.
80+ const wasmBinary = readFileSync ( onnxWasmFile )
81+ const base64Data = wasmBinary . toString ( 'base64' )
82+
83+ // Read our custom Emscripten loader (generated by our build).
84+ const onnxJsContent = readFileSync ( onnxJsFile , 'utf-8' )
85+
4386// Compute source hash for cache validation.
44- const sourceHashComment = await generateHashComment ( onnxPackageJsonPath )
87+ const sourceHashComment = await generateHashComment ( [ onnxWasmFile , onnxJsFile ] )
4588
46- // Generate onnx-sync.mjs as a simple re-export of onnxruntime-web.
89+ logger . log (
90+ `✓ Extracted ${ wasmBinary . length } bytes of WASM data from custom onnxruntime` ,
91+ )
92+
93+ // Generate onnx-sync.mjs using OUR custom loader with OUR custom WASM.
4794const onnxSyncContent = `/**
48- * ONNX Runtime wrapper for MiniLM inference .
95+ * Synchronous ONNX Runtime with embedded WASM binary .
4996 *
5097 * This file is AUTO-GENERATED by scripts/extract-onnx-runtime.mjs
5198 * DO NOT EDIT MANUALLY - changes will be overwritten on next build.
5299 *
53- * Re-exports onnxruntime-web for use in MiniLM embedding pipeline.
100+ * Uses custom-built ONNX Runtime with Emscripten loader.
101+ * Built with WASM_ASYNC_COMPILATION=0 for synchronous instantiation.
54102 *
55103 * ${ sourceHashComment }
56104 */
57105
58- import ort from 'onnxruntime-web'
59- import { logger } from '@socketsecurity/lib/logger'
60- import colors from 'yoctocolors-cjs'
106+ // Inlined base64 WASM from custom onnxruntime (extracted at build time).
107+ const base64Wasm = '${ base64Data } '
108+
109+ // Decode base64 to Uint8Array.
110+ const wasmBinary = Uint8Array.from(atob(base64Wasm), c => c.charCodeAt(0))
111+
112+ // Inlined Emscripten loader from custom onnxruntime build.
113+ ${ onnxJsContent }
114+
115+ // Synchronously initialize ONNX Runtime with embedded WASM.
116+ const ort = ortWasmThreaded({
117+ wasmBinary,
118+ instantiateWasm(imports, successCallback) {
119+ // Synchronously instantiate WASM module.
120+ const module = new WebAssembly.Module(wasmBinary)
121+ const instance = new WebAssembly.Instance(module, imports)
122+ successCallback(instance, module)
123+ return instance.exports
124+ }
125+ })
61126
62127export const InferenceSession = ort.InferenceSession
63128export const Tensor = ort.Tensor
0 commit comments