|
| 1 | +/** |
| 2 | + * useActionCamera — manages camera + YOLO inference + action detection |
| 3 | + * for Step 7 motor instruction verification. |
| 4 | + * |
| 5 | + * Reuses the existing inference.worker.ts in body-only mode (YOLO + TCN) |
| 6 | + * but only extracts keypoints for rule-based action detection. |
| 7 | + */ |
| 8 | + |
| 9 | +"use client"; |
| 10 | +import { useState, useEffect, useRef, useCallback } from "react"; |
| 11 | +import type { PipelineResult, WorkerOutMessage } from "../types/inference"; |
| 12 | +import { ActionTracker, type ActionId, type ActionResult } from "../lib/actions/actionDetector"; |
| 13 | + |
| 14 | +// COCO-17 skeleton connections (same as DetectorVideoCanvas) |
| 15 | +const SKELETON: [number, number][] = [ |
| 16 | + [0, 1], [0, 2], [1, 3], [2, 4], |
| 17 | + [5, 7], [7, 9], [6, 8], [8, 10], |
| 18 | + [5, 6], [5, 11], [6, 12], [11, 12], |
| 19 | + [11, 13], [13, 15], [12, 14], [14, 16], |
| 20 | +]; |
| 21 | + |
| 22 | +export interface UseActionCameraReturn { |
| 23 | + videoRef: React.RefObject<HTMLVideoElement | null>; |
| 24 | + overlayRef: React.RefObject<HTMLCanvasElement | null>; |
| 25 | + isModelLoaded: boolean; |
| 26 | + isActive: boolean; |
| 27 | + cameraError: string | null; |
| 28 | + startCamera: () => Promise<void>; |
| 29 | + stopCamera: () => void; |
| 30 | + startDetecting: (action: ActionId) => void; |
| 31 | + stopDetecting: () => void; |
| 32 | + actionResult: ActionResult | null; |
| 33 | + actionDetected: boolean; |
| 34 | + keypoints: Float32Array | null; |
| 35 | + confidence: Float32Array | null; |
| 36 | +} |
| 37 | + |
| 38 | +export function useActionCamera(): UseActionCameraReturn { |
| 39 | + const videoRef = useRef<HTMLVideoElement | null>(null); |
| 40 | + const overlayRef = useRef<HTMLCanvasElement | null>(null); |
| 41 | + const captureCanvasRef = useRef<HTMLCanvasElement | null>(null); |
| 42 | + |
| 43 | + const [isModelLoaded, setIsModelLoaded] = useState(false); |
| 44 | + const [isActive, setIsActive] = useState(false); |
| 45 | + const [cameraError, setCameraError] = useState<string | null>(null); |
| 46 | + const [actionResult, setActionResult] = useState<ActionResult | null>(null); |
| 47 | + const [actionDetected, setActionDetected] = useState(false); |
| 48 | + const [keypoints, setKeypoints] = useState<Float32Array | null>(null); |
| 49 | + const [confidence, setConfidence] = useState<Float32Array | null>(null); |
| 50 | + |
| 51 | + const workerRef = useRef<Worker | null>(null); |
| 52 | + const busyRef = useRef(false); |
| 53 | + const rafRef = useRef(0); |
| 54 | + const streamRef = useRef<MediaStream | null>(null); |
| 55 | + const trackerRef = useRef(new ActionTracker()); |
| 56 | + const targetActionRef = useRef<ActionId | null>(null); |
| 57 | + const detectingRef = useRef(false); |
| 58 | + |
| 59 | + // Create & initialise worker on mount |
| 60 | + useEffect(() => { |
| 61 | + let worker: Worker; |
| 62 | + try { |
| 63 | + worker = new Worker( |
| 64 | + new URL("../../workers/inference.worker.ts", import.meta.url), |
| 65 | + { type: "module" }, |
| 66 | + ); |
| 67 | + } catch (err) { |
| 68 | + setCameraError(`Failed to create inference worker: ${err instanceof Error ? err.message : String(err)}`); |
| 69 | + return; |
| 70 | + } |
| 71 | + workerRef.current = worker; |
| 72 | + |
| 73 | + worker.onmessage = (e: MessageEvent<WorkerOutMessage>) => { |
| 74 | + const msg = e.data; |
| 75 | + switch (msg.type) { |
| 76 | + case "initialized": |
| 77 | + setIsModelLoaded(true); |
| 78 | + // Set body-only mode |
| 79 | + worker.postMessage({ type: "setModality", modality: "body" }); |
| 80 | + break; |
| 81 | + case "result": |
| 82 | + handleResult(msg.data); |
| 83 | + busyRef.current = false; |
| 84 | + break; |
| 85 | + case "error": |
| 86 | + busyRef.current = false; |
| 87 | + break; |
| 88 | + } |
| 89 | + }; |
| 90 | + |
| 91 | + worker.postMessage({ type: "init" }); |
| 92 | + |
| 93 | + return () => { |
| 94 | + worker.terminate(); |
| 95 | + workerRef.current = null; |
| 96 | + }; |
| 97 | + // eslint-disable-next-line react-hooks/exhaustive-deps |
| 98 | + }, []); |
| 99 | + |
| 100 | + // Handle inference result |
| 101 | + const handleResult = useCallback((result: PipelineResult) => { |
| 102 | + const kps = result.keypoints; |
| 103 | + const conf = result.confidence; |
| 104 | + if (kps && conf) { |
| 105 | + setKeypoints(kps); |
| 106 | + setConfidence(conf); |
| 107 | + drawSkeleton(kps, conf); |
| 108 | + |
| 109 | + if (detectingRef.current && targetActionRef.current) { |
| 110 | + const tracked = trackerRef.current.update(kps, conf, targetActionRef.current); |
| 111 | + setActionResult(tracked); |
| 112 | + if (tracked.confirmed) { |
| 113 | + setActionDetected(true); |
| 114 | + detectingRef.current = false; |
| 115 | + } |
| 116 | + } |
| 117 | + } |
| 118 | + // eslint-disable-next-line react-hooks/exhaustive-deps |
| 119 | + }, []); |
| 120 | + |
| 121 | + // Draw skeleton overlay |
| 122 | + const drawSkeleton = useCallback((kps: Float32Array, conf: Float32Array) => { |
| 123 | + const canvas = overlayRef.current; |
| 124 | + if (!canvas) return; |
| 125 | + const ctx = canvas.getContext("2d"); |
| 126 | + if (!ctx) return; |
| 127 | + |
| 128 | + const w = canvas.width; |
| 129 | + const h = canvas.height; |
| 130 | + ctx.clearRect(0, 0, w, h); |
| 131 | + |
| 132 | + if (kps.length < 34) return; |
| 133 | + |
| 134 | + // Scale keypoints from 320×240 to canvas size |
| 135 | + const scaleX = w / 320; |
| 136 | + const scaleY = h / 240; |
| 137 | + |
| 138 | + // Draw bones |
| 139 | + ctx.strokeStyle = "rgba(104, 159, 56, 0.8)"; |
| 140 | + ctx.lineWidth = 2.5; |
| 141 | + for (const [a, b] of SKELETON) { |
| 142 | + if (conf[a] < 0.3 || conf[b] < 0.3) continue; |
| 143 | + ctx.beginPath(); |
| 144 | + ctx.moveTo(kps[a * 2] * scaleX, kps[a * 2 + 1] * scaleY); |
| 145 | + ctx.lineTo(kps[b * 2] * scaleX, kps[b * 2 + 1] * scaleY); |
| 146 | + ctx.stroke(); |
| 147 | + } |
| 148 | + |
| 149 | + // Draw keypoints |
| 150 | + for (let i = 0; i < 17; i++) { |
| 151 | + if (conf[i] < 0.3) continue; |
| 152 | + ctx.fillStyle = "rgba(104, 159, 56, 0.9)"; |
| 153 | + ctx.beginPath(); |
| 154 | + ctx.arc(kps[i * 2] * scaleX, kps[i * 2 + 1] * scaleY, 4, 0, Math.PI * 2); |
| 155 | + ctx.fill(); |
| 156 | + } |
| 157 | + }, []); |
| 158 | + |
| 159 | + // Frame capture loop |
| 160 | + const sendFrame = useCallback(() => { |
| 161 | + const worker = workerRef.current; |
| 162 | + const video = videoRef.current; |
| 163 | + |
| 164 | + if (!worker || !video || !isActive || !isModelLoaded || busyRef.current || video.paused) { |
| 165 | + if (isActive) rafRef.current = requestAnimationFrame(sendFrame); |
| 166 | + return; |
| 167 | + } |
| 168 | + |
| 169 | + try { |
| 170 | + if (!captureCanvasRef.current) { |
| 171 | + captureCanvasRef.current = document.createElement("canvas"); |
| 172 | + captureCanvasRef.current.width = 320; |
| 173 | + captureCanvasRef.current.height = 240; |
| 174 | + } |
| 175 | + const ctx = captureCanvasRef.current.getContext("2d", { willReadFrequently: true }); |
| 176 | + if (!ctx) { rafRef.current = requestAnimationFrame(sendFrame); return; } |
| 177 | + |
| 178 | + ctx.drawImage(video, 0, 0, 320, 240); |
| 179 | + const imageData = ctx.getImageData(0, 0, 320, 240); |
| 180 | + |
| 181 | + busyRef.current = true; |
| 182 | + worker.postMessage({ type: "processFrame", imageData }, [imageData.data.buffer]); |
| 183 | + } catch { |
| 184 | + // Frame capture error — skip |
| 185 | + } |
| 186 | + |
| 187 | + rafRef.current = requestAnimationFrame(sendFrame); |
| 188 | + }, [isActive, isModelLoaded]); |
| 189 | + |
| 190 | + // Start/stop frame loop when active changes |
| 191 | + useEffect(() => { |
| 192 | + if (isActive && isModelLoaded) { |
| 193 | + rafRef.current = requestAnimationFrame(sendFrame); |
| 194 | + } |
| 195 | + return () => cancelAnimationFrame(rafRef.current); |
| 196 | + }, [isActive, isModelLoaded, sendFrame]); |
| 197 | + |
| 198 | + const startCamera = useCallback(async () => { |
| 199 | + try { |
| 200 | + const stream = await navigator.mediaDevices.getUserMedia({ |
| 201 | + video: { width: 320, height: 240, facingMode: "user" }, |
| 202 | + }); |
| 203 | + streamRef.current = stream; |
| 204 | + if (videoRef.current) { |
| 205 | + videoRef.current.srcObject = stream; |
| 206 | + await videoRef.current.play().catch(() => {}); |
| 207 | + } |
| 208 | + setIsActive(true); |
| 209 | + setCameraError(null); |
| 210 | + } catch (err) { |
| 211 | + setCameraError( |
| 212 | + err instanceof Error ? err.message : "Camera access denied", |
| 213 | + ); |
| 214 | + } |
| 215 | + }, []); |
| 216 | + |
| 217 | + const stopCamera = useCallback(() => { |
| 218 | + setIsActive(false); |
| 219 | + cancelAnimationFrame(rafRef.current); |
| 220 | + if (streamRef.current) { |
| 221 | + streamRef.current.getTracks().forEach((t) => t.stop()); |
| 222 | + streamRef.current = null; |
| 223 | + } |
| 224 | + if (videoRef.current) { |
| 225 | + videoRef.current.srcObject = null; |
| 226 | + } |
| 227 | + }, []); |
| 228 | + |
| 229 | + const startDetecting = useCallback((action: ActionId) => { |
| 230 | + targetActionRef.current = action; |
| 231 | + detectingRef.current = true; |
| 232 | + trackerRef.current.reset(); |
| 233 | + setActionDetected(false); |
| 234 | + setActionResult(null); |
| 235 | + }, []); |
| 236 | + |
| 237 | + const stopDetecting = useCallback(() => { |
| 238 | + targetActionRef.current = null; |
| 239 | + detectingRef.current = false; |
| 240 | + }, []); |
| 241 | + |
| 242 | + // Cleanup on unmount |
| 243 | + useEffect(() => { |
| 244 | + return () => { |
| 245 | + cancelAnimationFrame(rafRef.current); |
| 246 | + if (streamRef.current) { |
| 247 | + streamRef.current.getTracks().forEach((t) => t.stop()); |
| 248 | + } |
| 249 | + }; |
| 250 | + }, []); |
| 251 | + |
| 252 | + return { |
| 253 | + videoRef, |
| 254 | + overlayRef, |
| 255 | + isModelLoaded, |
| 256 | + isActive, |
| 257 | + cameraError, |
| 258 | + startCamera, |
| 259 | + stopCamera, |
| 260 | + startDetecting, |
| 261 | + stopDetecting, |
| 262 | + actionResult, |
| 263 | + actionDetected, |
| 264 | + keypoints, |
| 265 | + confidence, |
| 266 | + }; |
| 267 | +} |
0 commit comments