software-mansion
diff --git a/‎apps/computer-vision/app/vision_camera/index.tsx‎
Lines changed: 34 additions & 2 deletions b/‎apps/computer-vision/app/vision_camera/index.tsx‎
Lines changed: 34 additions & 2 deletions
diff --git a/‎apps/computer-vision/components/vision_camera/tasks/ClassificationTask.tsx‎
Lines changed: 4 additions & 1 deletion b/‎apps/computer-vision/components/vision_camera/tasks/ClassificationTask.tsx‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎apps/computer-vision/components/vision_camera/tasks/OCRTask.tsx‎
Lines changed: 133 additions & 0 deletions b/‎apps/computer-vision/components/vision_camera/tasks/OCRTask.tsx‎
Lines changed: 133 additions & 0 deletions
diff --git a/‎apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx‎
Lines changed: 14 additions & 5 deletions b/‎apps/computer-vision/components/vision_camera/tasks/SegmentationTask.tsx‎
Lines changed: 14 additions & 5 deletions
@@ -31,8 +31,15 @@ import ColorPalette from '../../colors';
 import ClassificationTask from '../../components/vision_camera/tasks/ClassificationTask';
 import ObjectDetectionTask from '../../components/vision_camera/tasks/ObjectDetectionTask';
 import SegmentationTask from '../../components/vision_camera/tasks/SegmentationTask';
+import OCRTask from '../../components/vision_camera/tasks/OCRTask';
+import StyleTransferTask from '../../components/vision_camera/tasks/StyleTransferTask';
 
-type TaskId = 'classification' | 'objectDetection' | 'segmentation';
+type TaskId =
+  | 'classification'
+  | 'objectDetection'
+  | 'segmentation'
+  | 'ocr'
+  | 'styleTransfer';
 type ModelId =
   | 'classification'
   | 'objectDetectionSsdlite'
@@ -43,7 +50,10 @@ type ModelId =
   | 'segmentationLraspp'
   | 'segmentationFcnResnet50'
   | 'segmentationFcnResnet101'
-  | 'segmentationSelfie';
+  | 'segmentationSelfie'
+  | 'ocr'
+  | 'styleTransferCandy'
+  | 'styleTransferMosaic';
 
 type TaskVariant = { id: ModelId; label: string };
 type Task = { id: TaskId; label: string; variants: TaskVariant[] };
@@ -75,6 +85,19 @@ const TASKS: Task[] = [
       { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
     ],
   },
+  {
+    id: 'ocr',
+    label: 'OCR',
+    variants: [{ id: 'ocr', label: 'English' }],
+  },
+  {
+    id: 'styleTransfer',
+    label: 'Style',
+    variants: [
+      { id: 'styleTransferCandy', label: 'Candy' },
+      { id: 'styleTransferMosaic', label: 'Mosaic' },
+    ],
+  },
 ];
 
 // Module-level consts so worklets in task components can always reference the same stable objects.
@@ -225,6 +248,15 @@ export default function VisionCameraScreen() {
           }
         />
       )}
+      {activeTask === 'ocr' && <OCRTask {...taskProps} />}
+      {activeTask === 'styleTransfer' && (
+        <StyleTransferTask
+          {...taskProps}
+          activeModel={
+            activeModel as 'styleTransferCandy' | 'styleTransferMosaic'
+          }
+        />
+      )}
 
       {!isReady && (
         <View style={styles.loadingOverlay}>
 
@@ -5,7 +5,10 @@ import { scheduleOnRN } from 'react-native-worklets';
 import { EFFICIENTNET_V2_S, useClassification } from 'react-native-executorch';
 import { TaskProps } from './types';
 
-type Props = Omit<TaskProps, 'activeModel' | 'canvasSize'>;
+type Props = Omit<
+  TaskProps,
+  'activeModel' | 'canvasSize' | 'cameraPositionSync'
+>;
 
 export default function ClassificationTask({
   frameKillSwitch,
 
@@ -0,0 +1,133 @@
+import React, { useCallback, useEffect, useRef, useState } from 'react';
+import { StyleSheet, View } from 'react-native';
+import { Frame, useFrameOutput } from 'react-native-vision-camera';
+import { scheduleOnRN } from 'react-native-worklets';
+import { OCR_ENGLISH, OCRDetection, useOCR } from 'react-native-executorch';
+import Svg, { Polygon, Text as SvgText } from 'react-native-svg';
+import { TaskProps } from './types';
+
+type Props = TaskProps & { activeModel: string };
+
+export default function OCRTask({
+  canvasSize,
+  cameraPositionSync,
+  frameKillSwitch,
+  onFrameOutputChange,
+  onReadyChange,
+  onProgressChange,
+  onGeneratingChange,
+  onFpsChange,
+}: Props) {
+  const model = useOCR({ model: OCR_ENGLISH });
+  const [detections, setDetections] = useState<OCRDetection[]>([]);
+  const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
+  const lastFrameTimeRef = useRef(Date.now());
+
+  useEffect(() => {
+    onReadyChange(model.isReady);
+  }, [model.isReady, onReadyChange]);
+
+  useEffect(() => {
+    onProgressChange(model.downloadProgress);
+  }, [model.downloadProgress, onProgressChange]);
+
+  useEffect(() => {
+    onGeneratingChange(model.isGenerating);
+  }, [model.isGenerating, onGeneratingChange]);
+
+  const ocrRof = model.runOnFrame;
+
+  const updateDetections = useCallback(
+    (p: { results: OCRDetection[]; frameW: number; frameH: number }) => {
+      setDetections(p.results);
+      setImageSize({ width: p.frameW, height: p.frameH });
+      const now = Date.now();
+      const diff = now - lastFrameTimeRef.current;
+      if (diff > 0) onFpsChange(Math.round(1000 / diff), diff);
+      lastFrameTimeRef.current = now;
+    },
+    [onFpsChange]
+  );
+
+  const frameOutput = useFrameOutput({
+    pixelFormat: 'rgb',
+    dropFramesWhileBusy: true,
+    enablePreviewSizedOutputBuffers: true,
+    onFrame: useCallback(
+      (frame: Frame) => {
+        'worklet';
+        if (frameKillSwitch.getDirty()) {
+          frame.dispose();
+          return;
+        }
+        try {
+          if (!ocrRof) return;
+          const isMirrored = cameraPositionSync.getDirty() === 'front';
+          const result = ocrRof(frame, isMirrored);
+          if (result) {
+            scheduleOnRN(updateDetections, {
+              results: result,
+              frameW: frame.height,
+              frameH: frame.width,
+            });
+          }
+        } catch {
+          // ignore
+        } finally {
+          frame.dispose();
+        }
+      },
+      [cameraPositionSync, frameKillSwitch, ocrRof, updateDetections]
+    ),
+  });
+
+  useEffect(() => {
+    onFrameOutputChange(frameOutput);
+  }, [frameOutput, onFrameOutputChange]);
+
+  const scale = Math.max(
+    canvasSize.width / imageSize.width,
+    canvasSize.height / imageSize.height
+  );
+  const offsetX = (canvasSize.width - imageSize.width * scale) / 2;
+  const offsetY = (canvasSize.height - imageSize.height * scale) / 2;
+
+  if (!detections.length) return null;
+
+  return (
+    <View style={StyleSheet.absoluteFill} pointerEvents="none">
+      <Svg
+        width={canvasSize.width}
+        height={canvasSize.height}
+        style={StyleSheet.absoluteFill}
+      >
+        {detections.map((det, i) => {
+          const pts = det.bbox
+            .map((p) => `${p.x * scale + offsetX},${p.y * scale + offsetY}`)
+            .join(' ');
+          const labelX = det.bbox[0]!.x * scale + offsetX;
+          const labelY = det.bbox[0]!.y * scale + offsetY - 4;
+          return (
+            <React.Fragment key={i}>
+              <Polygon
+                points={pts}
+                fill="none"
+                stroke="cyan"
+                strokeWidth={2}
+              />
+              <SvgText
+                x={labelX}
+                y={labelY}
+                fill="white"
+                fontSize={12}
+                fontWeight="bold"
+              >
+                {det.text}
+              </SvgText>
+            </React.Fragment>
+          );
+        })}
+      </Svg>
+    </View>
+  );
+}
@@ -153,8 +153,17 @@ export default function SegmentationTask({
           const result = segRof(frame, isMirrored, [], false);
           if (result?.ARGMAX) {
             const argmax: Int32Array = result.ARGMAX;
-            const side = Math.round(Math.sqrt(argmax.length));
-            const pixels = new Uint8Array(side * side * 4);
+            const screenW = frame.height;
+            const screenH = frame.width;
+            const maskW =
+              argmax.length === screenW * screenH
+                ? screenW
+                : Math.round(Math.sqrt(argmax.length));
+            const maskH =
+              argmax.length === screenW * screenH
+                ? screenH
+                : Math.round(Math.sqrt(argmax.length));
+            const pixels = new Uint8Array(maskW * maskH * 4);
             for (let i = 0; i < argmax.length; i++) {
               const color = colors[argmax[i]!] ?? [0, 0, 0, 0];
               pixels[i * 4] = color[0]!;
@@ -165,13 +174,13 @@ export default function SegmentationTask({
             const skData = Skia.Data.fromBytes(pixels);
             const img = Skia.Image.MakeImage(
               {
-                width: side,
-                height: side,
+                width: maskW,
+                height: maskH,
                 alphaType: AlphaType.Unpremul,
                 colorType: ColorType.RGBA_8888,
               },
               skData,
-              side * 4
+              maskW * 4
             );
             if (img) scheduleOnRN(updateMask, img);
           }