11// Configuration
2- const QUANTIZED_MODEL_PATH = 'document_detector_quant.onnx' ;
3- const FP32_MODEL_PATH = 'document_detector.onnx' ;
42const INPUT_SIZE = 384 ;
5- const MEAN = [ 0.485 , 0.456 , 0.406 ] ;
6- const STD = [ 0.229 , 0.224 , 0.225 ] ;
73
84// State
9- let session = null ;
5+ let worker = null ;
106let isWebcamActive = false ;
117let webcamStream = null ;
128let isProcessing = false ;
139let frameCount = 0 ;
1410let fpsInterval = null ;
11+ let latestResult = null ;
1512
1613// DOM Elements
1714const statusText = document . getElementById ( 'status-text' ) ;
@@ -30,56 +27,35 @@ const inferenceEl = document.getElementById('inference-time');
3027const postprocessEl = document . getElementById ( 'postprocess-time' ) ;
3128const totalEl = document . getElementById ( 'total-time' ) ;
3229const fpsEl = document . getElementById ( 'fps-counter' ) ;
33- // Initialization
34- async function init ( backend = 'wasm' ) {
35- try {
36- webcamBtn . disabled = true ;
37- updateStatus ( `Initializing ${ backend } ...` , 'loading' ) ;
38-
39- // Initialize ONNX Runtime
40- const option = {
41- executionProviders : [ backend ] ,
42- graphOptimizationLevel : 'all'
43- } ;
4430
45- // Optimization for WASM
46- if ( backend === 'wasm' ) {
47- option . executionMode = 'parallel' ;
48- option . intraOpNumThreads = navigator . hardwareConcurrency || 4 ;
49- }
50-
51- // Select model based on backend
52- // WASM -> Quantized (INT8) for CPU speed
53- // WebGL/WebGPU -> FP32 for GPU shader compatibility
54- const modelPath = backend === 'wasm' ? QUANTIZED_MODEL_PATH : FP32_MODEL_PATH ;
31+ // Initialization
32+ function init ( backend = 'wasm' ) {
33+ webcamBtn . disabled = true ;
34+ updateStatus ( `Initializing ${ backend } ...` , 'loading' ) ;
5535
56- updateStatus ( `Loading Model (${ backend } )...` , 'loading' ) ;
36+ if ( worker ) {
37+ worker . terminate ( ) ;
38+ }
5739
58- // Release existing session if any
59- if ( session ) {
60- session = null ;
40+ worker = new Worker ( 'worker.js' ) ;
41+
42+ worker . onmessage = ( e ) => {
43+ const { type, data, backend : backendName , output, timings, error } = e . data ;
44+
45+ if ( type === 'init_complete' ) {
46+ console . log ( 'Inference Session created with provider:' , backendName ) ;
47+ document . getElementById ( 'backend-type' ) . textContent = backendName ;
48+ updateStatus ( 'Ready' , 'ready' ) ;
49+ webcamBtn . disabled = false ;
50+ } else if ( type === 'detect_complete' ) {
51+ handleDetectionResult ( output , timings ) ;
52+ } else if ( type === 'error' ) {
53+ console . error ( error ) ;
54+ updateStatus ( `Error: ${ error } ` , 'error' ) ;
6155 }
56+ } ;
6257
63- session = await ort . InferenceSession . create ( modelPath , option ) ;
64-
65- // Log the execution provider
66- console . log ( 'Inference Session created with provider:' , session . handler . backendName ) ;
67- document . getElementById ( 'backend-type' ) . textContent = session . handler . backendName ;
68-
69- updateStatus ( 'Ready' , 'ready' ) ;
70- webcamBtn . disabled = false ;
71-
72- // Warmup
73- console . log ( 'Warming up model...' ) ;
74- const dummyInput = new Float32Array ( 1 * 3 * INPUT_SIZE * INPUT_SIZE ) . fill ( 0 ) ;
75- const tensor = new ort . Tensor ( 'float32' , dummyInput , [ 1 , 3 , INPUT_SIZE , INPUT_SIZE ] ) ;
76- await session . run ( { input : tensor } ) ;
77- console . log ( 'Warmup complete' ) ;
78-
79- } catch ( e ) {
80- console . error ( e ) ;
81- updateStatus ( `Error: ${ e . message } ` , 'error' ) ;
82- }
58+ worker . postMessage ( { type : 'init' , data : { backend } } ) ;
8359}
8460
8561// Handle Backend Change
@@ -89,53 +65,14 @@ backendSelect.addEventListener('change', (e) => {
8965
9066// Start initialization immediately
9167init ( backendSelect . value ) ;
92- init ( ) ;
9368
9469// Helper: Update Status
9570function updateStatus ( text , type ) {
9671 statusText . textContent = text ;
9772 statusDot . className = `status-dot ${ type } ` ;
9873}
9974
100- // Helper: Preprocess Image
101- function preprocess ( imageData ) {
102- const startTime = performance . now ( ) ;
103-
104- // 1. Resize to 384x384
105- const tempCanvas = document . createElement ( 'canvas' ) ;
106- tempCanvas . width = INPUT_SIZE ;
107- tempCanvas . height = INPUT_SIZE ;
108- const tempCtx = tempCanvas . getContext ( '2d' ) ;
109- tempCtx . drawImage ( imageData , 0 , 0 , INPUT_SIZE , INPUT_SIZE ) ;
110-
111- const resizedData = tempCtx . getImageData ( 0 , 0 , INPUT_SIZE , INPUT_SIZE ) ;
112- const { data } = resizedData ;
113-
114- // 2. Normalize and HWC -> CHW
115- const float32Data = new Float32Array ( 3 * INPUT_SIZE * INPUT_SIZE ) ;
116-
117- for ( let i = 0 ; i < INPUT_SIZE * INPUT_SIZE ; i ++ ) {
118- const r = data [ i * 4 ] / 255.0 ;
119- const g = data [ i * 4 + 1 ] / 255.0 ;
120- const b = data [ i * 4 + 2 ] / 255.0 ;
121-
122- // Normalize: (value - mean) / std
123- float32Data [ i ] = ( r - MEAN [ 0 ] ) / STD [ 0 ] ; // R
124- float32Data [ INPUT_SIZE * INPUT_SIZE + i ] = ( g - MEAN [ 1 ] ) / STD [ 1 ] ; // G
125- float32Data [ 2 * INPUT_SIZE * INPUT_SIZE + i ] = ( b - MEAN [ 2 ] ) / STD [ 2 ] ; // B
126- }
127-
128- const tensor = new ort . Tensor ( 'float32' , float32Data , [ 1 , 3 , INPUT_SIZE , INPUT_SIZE ] ) ;
129-
130- return {
131- tensor,
132- time : performance . now ( ) - startTime
133- } ;
134- }
135-
136- // === Pure JS Geometry Utils ===
137-
138- // Find convex hull using Monotone Chain algorithm
75+ // === Pure JS Geometry Utils ===// Find convex hull using Monotone Chain algorithm
13976function convexHull ( points ) {
14077 points . sort ( ( a , b ) => a . x === b . x ? a . y - b . y : a . x - b . x ) ;
14178
@@ -196,10 +133,10 @@ function findCorners(points) {
196133}
197134
198135// Helper: Postprocess
199- function postprocess ( outputTensor , originalWidth , originalHeight ) {
136+ function postprocess ( outputData , originalWidth , originalHeight ) {
200137 const startTime = performance . now ( ) ;
201138
202- const data = outputTensor . data ;
139+ const data = outputData ;
203140 const size = INPUT_SIZE * INPUT_SIZE ;
204141
205142 // Create mask array (0 or 1)
@@ -255,14 +192,11 @@ function postprocess(outputTensor, originalWidth, originalHeight) {
255192 } ;
256193}
257194
258- // Helper: Draw Results
259- function drawResults ( imageSource , mask , corners ) {
195+ // Helper: Draw Overlay
196+ function drawOverlay ( mask , corners ) {
260197 const width = canvas . width ;
261198 const height = canvas . height ;
262199
263- // Draw original image
264- ctx . drawImage ( imageSource , 0 , 0 , width , height ) ;
265-
266200 const showMask = document . getElementById ( 'show-mask' ) . checked ;
267201 const showBoundary = document . getElementById ( 'show-boundary' ) . checked ;
268202
@@ -313,40 +247,44 @@ function drawResults(imageSource, mask, corners) {
313247 }
314248}
315249
316- // Main Processing Loop
317- async function processFrame ( imageSource ) {
318- if ( isProcessing ) return ;
319- isProcessing = true ;
250+ function handleDetectionResult ( output , timings ) {
251+ // 3. Postprocess
252+ const postResult = postprocess ( output , canvas . width , canvas . height ) ;
253+ postprocessEl . textContent = ` ${ postResult . time . toFixed ( 1 ) } ms` ;
320254
321- try {
322- // 1. Preprocess
323- const preResult = preprocess ( imageSource ) ;
324- preprocessEl . textContent = `${ preResult . time . toFixed ( 1 ) } ms` ;
255+ // Update timings
256+ preprocessEl . textContent = `${ timings . preprocess . toFixed ( 1 ) } ms` ;
257+ inferenceEl . textContent = `${ timings . inference . toFixed ( 1 ) } ms` ;
325258
326- // 2. Inference
327- const startTime = performance . now ( ) ;
328- const feeds = { input : preResult . tensor } ;
329- const results = await session . run ( feeds ) ;
330- const output = results . output ; // Assuming output name is 'output'
331- const inferTime = performance . now ( ) - startTime ;
332- inferenceEl . textContent = `${ inferTime . toFixed ( 1 ) } ms` ;
259+ // Total Time
260+ const totalTime = timings . preprocess + timings . inference + postResult . time ;
261+ totalEl . textContent = `${ totalTime . toFixed ( 1 ) } ms` ;
333262
334- // 3. Postprocess
335- const postResult = postprocess ( output , canvas . width , canvas . height ) ;
336- postprocessEl . textContent = `${ postResult . time . toFixed ( 1 ) } ms` ;
263+ // Update global state
264+ latestResult = postResult ;
337265
338- // Total Time
339- const totalTime = preResult . time + inferTime + postResult . time ;
340- totalEl . textContent = `${ totalTime . toFixed ( 1 ) } ms` ;
266+ // If not webcam, we need to explicitly draw because there is no loop
267+ if ( ! isWebcamActive ) {
268+ ctx . drawImage ( sourceImage , 0 , 0 , canvas . width , canvas . height ) ;
269+ drawOverlay ( postResult . mask , postResult . corners ) ;
270+ }
341271
342- // Draw
343- drawResults ( imageSource , postResult . mask , postResult . corners ) ;
272+ isProcessing = false ;
273+ frameCount ++ ;
274+ }
344275
276+ // Main Processing Loop
277+ async function processFrame ( imageSource ) {
278+ if ( isProcessing ) return ;
279+ isProcessing = true ;
280+
281+ try {
282+ // Create ImageBitmap to send to worker (transferable and efficient)
283+ const bitmap = await createImageBitmap ( imageSource ) ;
284+ worker . postMessage ( { type : 'detect' , data : { image : bitmap } } , [ bitmap ] ) ;
345285 } catch ( e ) {
346286 console . error ( e ) ;
347- } finally {
348287 isProcessing = false ;
349- frameCount ++ ;
350288 }
351289}
352290
@@ -394,6 +332,7 @@ async function startWebcam() {
394332 canvas . width = webcamVideo . videoWidth ;
395333 canvas . height = webcamVideo . videoHeight ;
396334 isWebcamActive = true ;
335+ latestResult = null ; // Reset result
397336 webcamBtn . textContent = 'Stop Webcam' ;
398337 webcamBtn . classList . replace ( 'primary' , 'secondary' ) ;
399338
@@ -427,7 +366,19 @@ function stopWebcam() {
427366function webcamLoop ( ) {
428367 if ( ! isWebcamActive ) return ;
429368
430- processFrame ( webcamVideo ) . then ( ( ) => {
431- requestAnimationFrame ( webcamLoop ) ;
432- } ) ;
369+ // 1. Render immediately
370+ const width = canvas . width ;
371+ const height = canvas . height ;
372+ ctx . drawImage ( webcamVideo , 0 , 0 , width , height ) ;
373+
374+ // 2. Draw overlay if available
375+ if ( latestResult ) {
376+ drawOverlay ( latestResult . mask , latestResult . corners ) ;
377+ }
378+
379+ // 3. Try to process frame (will skip if busy)
380+ processFrame ( webcamVideo ) ;
381+
382+ // 4. Loop
383+ requestAnimationFrame ( webcamLoop ) ;
433384}
0 commit comments