yushulx
diff --git a/‎examples/DeepLabv3/.gitignore‎
Lines changed: 9 additions & 1 deletion b/‎examples/DeepLabv3/.gitignore‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎examples/DeepLabv3/web_app_onnx/README.md‎
Lines changed: 46 additions & 0 deletions b/‎examples/DeepLabv3/web_app_onnx/README.md‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎…DeepLabv3/web_app/document_detector.onnx‎ ‎…abv3/web_app_onnx/document_detector.onnx‎examples/DeepLabv3/web_app/document_detector.onnx renamed to examples/DeepLabv3/web_app_onnx/document_detector.onnx b/‎…DeepLabv3/web_app/document_detector.onnx‎ ‎…abv3/web_app_onnx/document_detector.onnx‎examples/DeepLabv3/web_app/document_detector.onnx renamed to examples/DeepLabv3/web_app_onnx/document_detector.onnx
diff --git a/‎examples/DeepLabv3/web_app_onnx/document_detector_quant.onnx‎
10.7 MB b/‎examples/DeepLabv3/web_app_onnx/document_detector_quant.onnx‎
10.7 MB
diff --git a/‎examples/DeepLabv3/web_app/index.html‎ ‎…amples/DeepLabv3/web_app_onnx/index.html‎examples/DeepLabv3/web_app/index.html renamed to examples/DeepLabv3/web_app_onnx/index.html
Lines changed: 4 additions & 5 deletions b/‎examples/DeepLabv3/web_app/index.html‎ ‎…amples/DeepLabv3/web_app_onnx/index.html‎examples/DeepLabv3/web_app/index.html renamed to examples/DeepLabv3/web_app_onnx/index.html
Lines changed: 4 additions & 5 deletions
diff --git a/‎examples/DeepLabv3/web_app/script.js‎ ‎examples/DeepLabv3/web_app_onnx/script.js‎examples/DeepLabv3/web_app/script.js renamed to examples/DeepLabv3/web_app_onnx/script.js
Lines changed: 76 additions & 125 deletions b/‎examples/DeepLabv3/web_app/script.js‎ ‎examples/DeepLabv3/web_app_onnx/script.js‎examples/DeepLabv3/web_app/script.js renamed to examples/DeepLabv3/web_app_onnx/script.js
Lines changed: 76 additions & 125 deletions
diff --git a/‎examples/DeepLabv3/web_app/style.css‎ ‎examples/DeepLabv3/web_app_onnx/style.css‎examples/DeepLabv3/web_app/style.css renamed to examples/DeepLabv3/web_app_onnx/style.css b/‎examples/DeepLabv3/web_app/style.css‎ ‎examples/DeepLabv3/web_app_onnx/style.css‎examples/DeepLabv3/web_app/style.css renamed to examples/DeepLabv3/web_app_onnx/style.css
@@ -40,11 +40,19 @@ Thumbs.db
 output/
 *.pth
 !model_mbv3_iou_mix_2C049.pth
+
+# Keep model files in web apps
 *.onnx
-!web_app/document_detector.onnx
+!web_app_onnx/*.onnx
+*.tflite
+
+# TensorFlow conversion artifacts
+tf_saved_model/
+tfjs_model_wrapped/
 
 # Logs
 *.log
+log.txt
 
 # Jupyter
 .ipynb_checkpoints/
 
@@ -0,0 +1,46 @@
+# Document Detection Web App
+
+This is a web-based application for document detection and segmentation using DeepLabV3 (MobileNetV3 backbone) and ONNX Runtime Web. It runs entirely in the browser using WebAssembly (WASM) or WebGPU.
+
+## Online Demo
+https://yushulx.me/javascript-barcode-qr-code-scanner/examples/DeepLabv3/
+
+## Features
+
+- **Real-time Document Detection**: Segments documents from the background.
+- **Multiple Backends**:
+  - **WASM (CPU)**: Uses a quantized INT8 model for efficient CPU inference.
+  - **WebGPU (GPU)**: Uses an FP32 model for high-performance GPU acceleration (requires a compatible browser).
+- **Input Sources**: Support for both live webcam feed and image file uploads.
+- **Visualization**: Displays the segmentation mask and document boundary overlay.
+- **Performance Metrics**: Real-time tracking of pre-processing, inference, and post-processing times.
+- **Model Caching**: Caches downloaded models locally to speed up subsequent loads.
+
+## Prerequisites
+
+- A modern web browser (Chrome, Edge, Firefox).
+- For **WebGPU** support, you need a browser that supports the WebGPU API (e.g., latest Chrome or Edge) and compatible hardware.
+
+## Setup & Usage
+
+1. **Clone the repository** (if you haven't already).
+2. **Serve the directory**:
+   Because this application uses modern web standards (ES modules, WebGPU, Cache API), it must be served over HTTP/HTTPS. You cannot run it by simply opening `index.html` as a file.
+
+   You can use any static file server. For example, using Python:
+
+   ```bash
+   # Python 3
+   python -m http.server 8000
+   ```
+
+   Or using Node.js `http-server`:
+
+   ```bash
+   npx http-server .
+   ```
+
+3. **Open in Browser**:
+   Navigate to `http://localhost:8000` (or the port shown by your server).
+
+
@@ -6,8 +6,8 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Document Detection Web App</title>
     <link rel="stylesheet" href="style.css">
-    <!-- Import ONNX Runtime Web -->
-    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
+    <!-- Import ONNX Runtime Web (full bundle with all backends: WASM, WebGPU) -->
+    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.all.min.js"></script>
 </head>
 
 <body>
@@ -38,9 +38,8 @@ <h3>Controls</h3>
                             style="display: block; margin-bottom: 5px; font-weight: 500;">Backend:</label>
                         <select id="backend-select"
                             style="width: 100%; padding: 8px; border-radius: 4px; border: 1px solid #ddd;">
-                            <option value="wasm" selected>WASM (CPU)</option>
-                            <option value="webgl">WebGL (GPU)</option>
-                            <option value="webgpu">WebGPU (GPU)</option>
+                            <option value="wasm">WASM (CPU)</option>
+                            <option value="webgpu" selected>WebGPU (GPU)</option>
                         </select>
                     </div>
                     <button id="webcam-btn" class="btn primary" disabled>Start Webcam</button>
 
@@ -1,17 +1,14 @@
 // Configuration
-const QUANTIZED_MODEL_PATH = 'document_detector_quant.onnx';
-const FP32_MODEL_PATH = 'document_detector.onnx';
 const INPUT_SIZE = 384;
-const MEAN = [0.485, 0.456, 0.406];
-const STD = [0.229, 0.224, 0.225];
 
 // State
-let session = null;
+let worker = null;
 let isWebcamActive = false;
 let webcamStream = null;
 let isProcessing = false;
 let frameCount = 0;
 let fpsInterval = null;
+let latestResult = null;
 
 // DOM Elements
 const statusText = document.getElementById('status-text');
@@ -30,56 +27,35 @@ const inferenceEl = document.getElementById('inference-time');
 const postprocessEl = document.getElementById('postprocess-time');
 const totalEl = document.getElementById('total-time');
 const fpsEl = document.getElementById('fps-counter');
-// Initialization
-async function init(backend = 'wasm') {
-    try {
-        webcamBtn.disabled = true;
-        updateStatus(`Initializing ${backend}...`, 'loading');
-
-        // Initialize ONNX Runtime
-        const option = {
-            executionProviders: [backend],
-            graphOptimizationLevel: 'all'
-        };
 
-        // Optimization for WASM
-        if (backend === 'wasm') {
-            option.executionMode = 'parallel';
-            option.intraOpNumThreads = navigator.hardwareConcurrency || 4;
-        }
-
-        // Select model based on backend
-        // WASM -> Quantized (INT8) for CPU speed
-        // WebGL/WebGPU -> FP32 for GPU shader compatibility
-        const modelPath = backend === 'wasm' ? QUANTIZED_MODEL_PATH : FP32_MODEL_PATH;
+// Initialization
+function init(backend = 'wasm') {
+    webcamBtn.disabled = true;
+    updateStatus(`Initializing ${backend}...`, 'loading');
 
-        updateStatus(`Loading Model (${backend})...`, 'loading');
+    if (worker) {
+        worker.terminate();
+    }
 
-        // Release existing session if any
-        if (session) {
-            session = null;
+    worker = new Worker('worker.js');
+
+    worker.onmessage = (e) => {
+        const { type, data, backend: backendName, output, timings, error } = e.data;
+
+        if (type === 'init_complete') {
+            console.log('Inference Session created with provider:', backendName);
+            document.getElementById('backend-type').textContent = backendName;
+            updateStatus('Ready', 'ready');
+            webcamBtn.disabled = false;
+        } else if (type === 'detect_complete') {
+            handleDetectionResult(output, timings);
+        } else if (type === 'error') {
+            console.error(error);
+            updateStatus(`Error: ${error}`, 'error');
         }
+    };
 
-        session = await ort.InferenceSession.create(modelPath, option);
-
-        // Log the execution provider
-        console.log('Inference Session created with provider:', session.handler.backendName);
-        document.getElementById('backend-type').textContent = session.handler.backendName;
-
-        updateStatus('Ready', 'ready');
-        webcamBtn.disabled = false;
-
-        // Warmup
-        console.log('Warming up model...');
-        const dummyInput = new Float32Array(1 * 3 * INPUT_SIZE * INPUT_SIZE).fill(0);
-        const tensor = new ort.Tensor('float32', dummyInput, [1, 3, INPUT_SIZE, INPUT_SIZE]);
-        await session.run({ input: tensor });
-        console.log('Warmup complete');
-
-    } catch (e) {
-        console.error(e);
-        updateStatus(`Error: ${e.message}`, 'error');
-    }
+    worker.postMessage({ type: 'init', data: { backend } });
 }
 
 // Handle Backend Change
@@ -89,53 +65,14 @@ backendSelect.addEventListener('change', (e) => {
 
 // Start initialization immediately
 init(backendSelect.value);
-init();
 
 // Helper: Update Status
 function updateStatus(text, type) {
     statusText.textContent = text;
     statusDot.className = `status-dot ${type}`;
 }
 
-// Helper: Preprocess Image
-function preprocess(imageData) {
-    const startTime = performance.now();
-
-    // 1. Resize to 384x384
-    const tempCanvas = document.createElement('canvas');
-    tempCanvas.width = INPUT_SIZE;
-    tempCanvas.height = INPUT_SIZE;
-    const tempCtx = tempCanvas.getContext('2d');
-    tempCtx.drawImage(imageData, 0, 0, INPUT_SIZE, INPUT_SIZE);
-
-    const resizedData = tempCtx.getImageData(0, 0, INPUT_SIZE, INPUT_SIZE);
-    const { data } = resizedData;
-
-    // 2. Normalize and HWC -> CHW
-    const float32Data = new Float32Array(3 * INPUT_SIZE * INPUT_SIZE);
-
-    for (let i = 0; i < INPUT_SIZE * INPUT_SIZE; i++) {
-        const r = data[i * 4] / 255.0;
-        const g = data[i * 4 + 1] / 255.0;
-        const b = data[i * 4 + 2] / 255.0;
-
-        // Normalize: (value - mean) / std
-        float32Data[i] = (r - MEAN[0]) / STD[0]; // R
-        float32Data[INPUT_SIZE * INPUT_SIZE + i] = (g - MEAN[1]) / STD[1]; // G
-        float32Data[2 * INPUT_SIZE * INPUT_SIZE + i] = (b - MEAN[2]) / STD[2]; // B
-    }
-
-    const tensor = new ort.Tensor('float32', float32Data, [1, 3, INPUT_SIZE, INPUT_SIZE]);
-
-    return {
-        tensor,
-        time: performance.now() - startTime
-    };
-}
-
-// === Pure JS Geometry Utils ===
-
-// Find convex hull using Monotone Chain algorithm
+// === Pure JS Geometry Utils ===// Find convex hull using Monotone Chain algorithm
 function convexHull(points) {
     points.sort((a, b) => a.x === b.x ? a.y - b.y : a.x - b.x);
 
@@ -196,10 +133,10 @@ function findCorners(points) {
 }
 
 // Helper: Postprocess
-function postprocess(outputTensor, originalWidth, originalHeight) {
+function postprocess(outputData, originalWidth, originalHeight) {
     const startTime = performance.now();
 
-    const data = outputTensor.data;
+    const data = outputData;
     const size = INPUT_SIZE * INPUT_SIZE;
 
     // Create mask array (0 or 1)
@@ -255,14 +192,11 @@ function postprocess(outputTensor, originalWidth, originalHeight) {
     };
 }
 
-// Helper: Draw Results
-function drawResults(imageSource, mask, corners) {
+// Helper: Draw Overlay
+function drawOverlay(mask, corners) {
     const width = canvas.width;
     const height = canvas.height;
 
-    // Draw original image
-    ctx.drawImage(imageSource, 0, 0, width, height);
-
     const showMask = document.getElementById('show-mask').checked;
     const showBoundary = document.getElementById('show-boundary').checked;
 
@@ -313,40 +247,44 @@ function drawResults(imageSource, mask, corners) {
     }
 }
 
-// Main Processing Loop
-async function processFrame(imageSource) {
-    if (isProcessing) return;
-    isProcessing = true;
+function handleDetectionResult(output, timings) {
+    // 3. Postprocess
+    const postResult = postprocess(output, canvas.width, canvas.height);
+    postprocessEl.textContent = `${postResult.time.toFixed(1)} ms`;
 
-    try {
-        // 1. Preprocess
-        const preResult = preprocess(imageSource);
-        preprocessEl.textContent = `${preResult.time.toFixed(1)} ms`;
+    // Update timings
+    preprocessEl.textContent = `${timings.preprocess.toFixed(1)} ms`;
+    inferenceEl.textContent = `${timings.inference.toFixed(1)} ms`;
 
-        // 2. Inference
-        const startTime = performance.now();
-        const feeds = { input: preResult.tensor };
-        const results = await session.run(feeds);
-        const output = results.output; // Assuming output name is 'output'
-        const inferTime = performance.now() - startTime;
-        inferenceEl.textContent = `${inferTime.toFixed(1)} ms`;
+    // Total Time
+    const totalTime = timings.preprocess + timings.inference + postResult.time;
+    totalEl.textContent = `${totalTime.toFixed(1)} ms`;
 
-        // 3. Postprocess
-        const postResult = postprocess(output, canvas.width, canvas.height);
-        postprocessEl.textContent = `${postResult.time.toFixed(1)} ms`;
+    // Update global state
+    latestResult = postResult;
 
-        // Total Time
-        const totalTime = preResult.time + inferTime + postResult.time;
-        totalEl.textContent = `${totalTime.toFixed(1)} ms`;
+    // If not webcam, we need to explicitly draw because there is no loop
+    if (!isWebcamActive) {
+        ctx.drawImage(sourceImage, 0, 0, canvas.width, canvas.height);
+        drawOverlay(postResult.mask, postResult.corners);
+    }
 
-        // Draw
-        drawResults(imageSource, postResult.mask, postResult.corners);
+    isProcessing = false;
+    frameCount++;
+}
 
+// Main Processing Loop
+async function processFrame(imageSource) {
+    if (isProcessing) return;
+    isProcessing = true;
+
+    try {
+        // Create ImageBitmap to send to worker (transferable and efficient)
+        const bitmap = await createImageBitmap(imageSource);
+        worker.postMessage({ type: 'detect', data: { image: bitmap } }, [bitmap]);
     } catch (e) {
         console.error(e);
-    } finally {
         isProcessing = false;
-        frameCount++;
     }
 }
 
@@ -394,6 +332,7 @@ async function startWebcam() {
             canvas.width = webcamVideo.videoWidth;
             canvas.height = webcamVideo.videoHeight;
             isWebcamActive = true;
+            latestResult = null; // Reset result
             webcamBtn.textContent = 'Stop Webcam';
             webcamBtn.classList.replace('primary', 'secondary');
 
@@ -427,7 +366,19 @@ function stopWebcam() {
 function webcamLoop() {
     if (!isWebcamActive) return;
 
-    processFrame(webcamVideo).then(() => {
-        requestAnimationFrame(webcamLoop);
-    });
+    // 1. Render immediately
+    const width = canvas.width;
+    const height = canvas.height;
+    ctx.drawImage(webcamVideo, 0, 0, width, height);
+
+    // 2. Draw overlay if available
+    if (latestResult) {
+        drawOverlay(latestResult.mask, latestResult.corners);
+    }
+
+    // 3. Try to process frame (will skip if busy)
+    processFrame(webcamVideo);
+
+    // 4. Loop
+    requestAnimationFrame(webcamLoop);
 }