Skip to content

Commit 9ac5b4f

Browse files
committed
Added tensorflow.js example
1 parent ab7916b commit 9ac5b4f

23 files changed

+1081
-131
lines changed

examples/DeepLabv3/.gitignore

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,19 @@ Thumbs.db
4040
output/
4141
*.pth
4242
!model_mbv3_iou_mix_2C049.pth
43+
44+
# Keep model files in web apps
4345
*.onnx
44-
!web_app/document_detector.onnx
46+
!web_app_onnx/*.onnx
47+
*.tflite
48+
49+
# TensorFlow conversion artifacts
50+
tf_saved_model/
51+
tfjs_model_wrapped/
4552

4653
# Logs
4754
*.log
55+
log.txt
4856

4957
# Jupyter
5058
.ipynb_checkpoints/
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Document Detection Web App
2+
3+
This is a web-based application for document detection and segmentation using DeepLabV3 (MobileNetV3 backbone) and ONNX Runtime Web. It runs entirely in the browser using WebAssembly (WASM) or WebGPU.
4+
5+
## Online Demo
6+
https://yushulx.me/javascript-barcode-qr-code-scanner/examples/DeepLabv3/
7+
8+
## Features
9+
10+
- **Real-time Document Detection**: Segments documents from the background.
11+
- **Multiple Backends**:
12+
- **WASM (CPU)**: Uses a quantized INT8 model for efficient CPU inference.
13+
- **WebGPU (GPU)**: Uses an FP32 model for high-performance GPU acceleration (requires a compatible browser).
14+
- **Input Sources**: Support for both live webcam feed and image file uploads.
15+
- **Visualization**: Displays the segmentation mask and document boundary overlay.
16+
- **Performance Metrics**: Real-time tracking of pre-processing, inference, and post-processing times.
17+
- **Model Caching**: Caches downloaded models locally to speed up subsequent loads.
18+
19+
## Prerequisites
20+
21+
- A modern web browser (Chrome, Edge, Firefox).
22+
- For **WebGPU** support, you need a browser that supports the WebGPU API (e.g., latest Chrome or Edge) and compatible hardware.
23+
24+
## Setup & Usage
25+
26+
1. **Clone the repository** (if you haven't already).
27+
2. **Serve the directory**:
28+
Because this application uses modern web standards (ES modules, WebGPU, Cache API), it must be served over HTTP/HTTPS. You cannot run it by simply opening `index.html` as a file.
29+
30+
You can use any static file server. For example, using Python:
31+
32+
```bash
33+
# Python 3
34+
python -m http.server 8000
35+
```
36+
37+
Or using Node.js `http-server`:
38+
39+
```bash
40+
npx http-server .
41+
```
42+
43+
3. **Open in Browser**:
44+
Navigate to `http://localhost:8000` (or the port shown by your server).
45+
46+
File renamed without changes.
10.7 MB
Binary file not shown.

examples/DeepLabv3/web_app/index.html renamed to examples/DeepLabv3/web_app_onnx/index.html

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
<meta name="viewport" content="width=device-width, initial-scale=1.0">
77
<title>Document Detection Web App</title>
88
<link rel="stylesheet" href="style.css">
9-
<!-- Import ONNX Runtime Web -->
10-
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
9+
<!-- Import ONNX Runtime Web (full bundle with all backends: WASM, WebGPU) -->
10+
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.all.min.js"></script>
1111
</head>
1212

1313
<body>
@@ -38,9 +38,8 @@ <h3>Controls</h3>
3838
style="display: block; margin-bottom: 5px; font-weight: 500;">Backend:</label>
3939
<select id="backend-select"
4040
style="width: 100%; padding: 8px; border-radius: 4px; border: 1px solid #ddd;">
41-
<option value="wasm" selected>WASM (CPU)</option>
42-
<option value="webgl">WebGL (GPU)</option>
43-
<option value="webgpu">WebGPU (GPU)</option>
41+
<option value="wasm">WASM (CPU)</option>
42+
<option value="webgpu" selected>WebGPU (GPU)</option>
4443
</select>
4544
</div>
4645
<button id="webcam-btn" class="btn primary" disabled>Start Webcam</button>
Lines changed: 76 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
// Configuration
2-
const QUANTIZED_MODEL_PATH = 'document_detector_quant.onnx';
3-
const FP32_MODEL_PATH = 'document_detector.onnx';
42
const INPUT_SIZE = 384;
5-
const MEAN = [0.485, 0.456, 0.406];
6-
const STD = [0.229, 0.224, 0.225];
73

84
// State
9-
let session = null;
5+
let worker = null;
106
let isWebcamActive = false;
117
let webcamStream = null;
128
let isProcessing = false;
139
let frameCount = 0;
1410
let fpsInterval = null;
11+
let latestResult = null;
1512

1613
// DOM Elements
1714
const statusText = document.getElementById('status-text');
@@ -30,56 +27,35 @@ const inferenceEl = document.getElementById('inference-time');
3027
const postprocessEl = document.getElementById('postprocess-time');
3128
const totalEl = document.getElementById('total-time');
3229
const fpsEl = document.getElementById('fps-counter');
33-
// Initialization
34-
async function init(backend = 'wasm') {
35-
try {
36-
webcamBtn.disabled = true;
37-
updateStatus(`Initializing ${backend}...`, 'loading');
38-
39-
// Initialize ONNX Runtime
40-
const option = {
41-
executionProviders: [backend],
42-
graphOptimizationLevel: 'all'
43-
};
4430

45-
// Optimization for WASM
46-
if (backend === 'wasm') {
47-
option.executionMode = 'parallel';
48-
option.intraOpNumThreads = navigator.hardwareConcurrency || 4;
49-
}
50-
51-
// Select model based on backend
52-
// WASM -> Quantized (INT8) for CPU speed
53-
// WebGL/WebGPU -> FP32 for GPU shader compatibility
54-
const modelPath = backend === 'wasm' ? QUANTIZED_MODEL_PATH : FP32_MODEL_PATH;
31+
// Initialization
32+
function init(backend = 'wasm') {
33+
webcamBtn.disabled = true;
34+
updateStatus(`Initializing ${backend}...`, 'loading');
5535

56-
updateStatus(`Loading Model (${backend})...`, 'loading');
36+
if (worker) {
37+
worker.terminate();
38+
}
5739

58-
// Release existing session if any
59-
if (session) {
60-
session = null;
40+
worker = new Worker('worker.js');
41+
42+
worker.onmessage = (e) => {
43+
const { type, data, backend: backendName, output, timings, error } = e.data;
44+
45+
if (type === 'init_complete') {
46+
console.log('Inference Session created with provider:', backendName);
47+
document.getElementById('backend-type').textContent = backendName;
48+
updateStatus('Ready', 'ready');
49+
webcamBtn.disabled = false;
50+
} else if (type === 'detect_complete') {
51+
handleDetectionResult(output, timings);
52+
} else if (type === 'error') {
53+
console.error(error);
54+
updateStatus(`Error: ${error}`, 'error');
6155
}
56+
};
6257

63-
session = await ort.InferenceSession.create(modelPath, option);
64-
65-
// Log the execution provider
66-
console.log('Inference Session created with provider:', session.handler.backendName);
67-
document.getElementById('backend-type').textContent = session.handler.backendName;
68-
69-
updateStatus('Ready', 'ready');
70-
webcamBtn.disabled = false;
71-
72-
// Warmup
73-
console.log('Warming up model...');
74-
const dummyInput = new Float32Array(1 * 3 * INPUT_SIZE * INPUT_SIZE).fill(0);
75-
const tensor = new ort.Tensor('float32', dummyInput, [1, 3, INPUT_SIZE, INPUT_SIZE]);
76-
await session.run({ input: tensor });
77-
console.log('Warmup complete');
78-
79-
} catch (e) {
80-
console.error(e);
81-
updateStatus(`Error: ${e.message}`, 'error');
82-
}
58+
worker.postMessage({ type: 'init', data: { backend } });
8359
}
8460

8561
// Handle Backend Change
@@ -89,53 +65,14 @@ backendSelect.addEventListener('change', (e) => {
8965

9066
// Start initialization immediately
9167
init(backendSelect.value);
92-
init();
9368

9469
// Helper: Update Status
9570
function updateStatus(text, type) {
9671
statusText.textContent = text;
9772
statusDot.className = `status-dot ${type}`;
9873
}
9974

100-
// Helper: Preprocess Image
101-
function preprocess(imageData) {
102-
const startTime = performance.now();
103-
104-
// 1. Resize to 384x384
105-
const tempCanvas = document.createElement('canvas');
106-
tempCanvas.width = INPUT_SIZE;
107-
tempCanvas.height = INPUT_SIZE;
108-
const tempCtx = tempCanvas.getContext('2d');
109-
tempCtx.drawImage(imageData, 0, 0, INPUT_SIZE, INPUT_SIZE);
110-
111-
const resizedData = tempCtx.getImageData(0, 0, INPUT_SIZE, INPUT_SIZE);
112-
const { data } = resizedData;
113-
114-
// 2. Normalize and HWC -> CHW
115-
const float32Data = new Float32Array(3 * INPUT_SIZE * INPUT_SIZE);
116-
117-
for (let i = 0; i < INPUT_SIZE * INPUT_SIZE; i++) {
118-
const r = data[i * 4] / 255.0;
119-
const g = data[i * 4 + 1] / 255.0;
120-
const b = data[i * 4 + 2] / 255.0;
121-
122-
// Normalize: (value - mean) / std
123-
float32Data[i] = (r - MEAN[0]) / STD[0]; // R
124-
float32Data[INPUT_SIZE * INPUT_SIZE + i] = (g - MEAN[1]) / STD[1]; // G
125-
float32Data[2 * INPUT_SIZE * INPUT_SIZE + i] = (b - MEAN[2]) / STD[2]; // B
126-
}
127-
128-
const tensor = new ort.Tensor('float32', float32Data, [1, 3, INPUT_SIZE, INPUT_SIZE]);
129-
130-
return {
131-
tensor,
132-
time: performance.now() - startTime
133-
};
134-
}
135-
136-
// === Pure JS Geometry Utils ===
137-
138-
// Find convex hull using Monotone Chain algorithm
75+
// === Pure JS Geometry Utils ===// Find convex hull using Monotone Chain algorithm
13976
function convexHull(points) {
14077
points.sort((a, b) => a.x === b.x ? a.y - b.y : a.x - b.x);
14178

@@ -196,10 +133,10 @@ function findCorners(points) {
196133
}
197134

198135
// Helper: Postprocess
199-
function postprocess(outputTensor, originalWidth, originalHeight) {
136+
function postprocess(outputData, originalWidth, originalHeight) {
200137
const startTime = performance.now();
201138

202-
const data = outputTensor.data;
139+
const data = outputData;
203140
const size = INPUT_SIZE * INPUT_SIZE;
204141

205142
// Create mask array (0 or 1)
@@ -255,14 +192,11 @@ function postprocess(outputTensor, originalWidth, originalHeight) {
255192
};
256193
}
257194

258-
// Helper: Draw Results
259-
function drawResults(imageSource, mask, corners) {
195+
// Helper: Draw Overlay
196+
function drawOverlay(mask, corners) {
260197
const width = canvas.width;
261198
const height = canvas.height;
262199

263-
// Draw original image
264-
ctx.drawImage(imageSource, 0, 0, width, height);
265-
266200
const showMask = document.getElementById('show-mask').checked;
267201
const showBoundary = document.getElementById('show-boundary').checked;
268202

@@ -313,40 +247,44 @@ function drawResults(imageSource, mask, corners) {
313247
}
314248
}
315249

316-
// Main Processing Loop
317-
async function processFrame(imageSource) {
318-
if (isProcessing) return;
319-
isProcessing = true;
250+
function handleDetectionResult(output, timings) {
251+
// 3. Postprocess
252+
const postResult = postprocess(output, canvas.width, canvas.height);
253+
postprocessEl.textContent = `${postResult.time.toFixed(1)} ms`;
320254

321-
try {
322-
// 1. Preprocess
323-
const preResult = preprocess(imageSource);
324-
preprocessEl.textContent = `${preResult.time.toFixed(1)} ms`;
255+
// Update timings
256+
preprocessEl.textContent = `${timings.preprocess.toFixed(1)} ms`;
257+
inferenceEl.textContent = `${timings.inference.toFixed(1)} ms`;
325258

326-
// 2. Inference
327-
const startTime = performance.now();
328-
const feeds = { input: preResult.tensor };
329-
const results = await session.run(feeds);
330-
const output = results.output; // Assuming output name is 'output'
331-
const inferTime = performance.now() - startTime;
332-
inferenceEl.textContent = `${inferTime.toFixed(1)} ms`;
259+
// Total Time
260+
const totalTime = timings.preprocess + timings.inference + postResult.time;
261+
totalEl.textContent = `${totalTime.toFixed(1)} ms`;
333262

334-
// 3. Postprocess
335-
const postResult = postprocess(output, canvas.width, canvas.height);
336-
postprocessEl.textContent = `${postResult.time.toFixed(1)} ms`;
263+
// Update global state
264+
latestResult = postResult;
337265

338-
// Total Time
339-
const totalTime = preResult.time + inferTime + postResult.time;
340-
totalEl.textContent = `${totalTime.toFixed(1)} ms`;
266+
// If not webcam, we need to explicitly draw because there is no loop
267+
if (!isWebcamActive) {
268+
ctx.drawImage(sourceImage, 0, 0, canvas.width, canvas.height);
269+
drawOverlay(postResult.mask, postResult.corners);
270+
}
341271

342-
// Draw
343-
drawResults(imageSource, postResult.mask, postResult.corners);
272+
isProcessing = false;
273+
frameCount++;
274+
}
344275

276+
// Main Processing Loop
277+
async function processFrame(imageSource) {
278+
if (isProcessing) return;
279+
isProcessing = true;
280+
281+
try {
282+
// Create ImageBitmap to send to worker (transferable and efficient)
283+
const bitmap = await createImageBitmap(imageSource);
284+
worker.postMessage({ type: 'detect', data: { image: bitmap } }, [bitmap]);
345285
} catch (e) {
346286
console.error(e);
347-
} finally {
348287
isProcessing = false;
349-
frameCount++;
350288
}
351289
}
352290

@@ -394,6 +332,7 @@ async function startWebcam() {
394332
canvas.width = webcamVideo.videoWidth;
395333
canvas.height = webcamVideo.videoHeight;
396334
isWebcamActive = true;
335+
latestResult = null; // Reset result
397336
webcamBtn.textContent = 'Stop Webcam';
398337
webcamBtn.classList.replace('primary', 'secondary');
399338

@@ -427,7 +366,19 @@ function stopWebcam() {
427366
function webcamLoop() {
428367
if (!isWebcamActive) return;
429368

430-
processFrame(webcamVideo).then(() => {
431-
requestAnimationFrame(webcamLoop);
432-
});
369+
// 1. Render immediately
370+
const width = canvas.width;
371+
const height = canvas.height;
372+
ctx.drawImage(webcamVideo, 0, 0, width, height);
373+
374+
// 2. Draw overlay if available
375+
if (latestResult) {
376+
drawOverlay(latestResult.mask, latestResult.corners);
377+
}
378+
379+
// 3. Try to process frame (will skip if busy)
380+
processFrame(webcamVideo);
381+
382+
// 4. Loop
383+
requestAnimationFrame(webcamLoop);
433384
}

0 commit comments

Comments
 (0)