Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4341137
feat: initial version
NorbertKlockiewicz Mar 17, 2026
5cd87ed
feat: handle most common cases
NorbertKlockiewicz Mar 17, 2026
cd65250
feat: working rotations on both ios and android
NorbertKlockiewicz Mar 18, 2026
3da656a
fix: correctly transform bounding boxes
NorbertKlockiewicz Mar 18, 2026
21e8edf
feat: model sees the same thing as user approach
NorbertKlockiewicz Mar 18, 2026
eed5a75
feat: remove the cameraPosition argument
NorbertKlockiewicz Mar 18, 2026
b800b72
feat: this is the way
NorbertKlockiewicz Mar 18, 2026
2b7497c
feat: support all vision models
NorbertKlockiewicz Mar 19, 2026
606d984
refactor: remove unused code
NorbertKlockiewicz Mar 19, 2026
150daf9
refactor: changes suggested by claude
NorbertKlockiewicz Mar 19, 2026
4a3683a
tests: add tests for new utils
NorbertKlockiewicz Mar 19, 2026
db536d1
refactor: rename isMirrored to isFrontCamera
NorbertKlockiewicz Mar 19, 2026
093f5c5
docs: update vision camera docs
NorbertKlockiewicz Mar 19, 2026
bde4a49
refactor: add comments
NorbertKlockiewicz Mar 19, 2026
506cdc2
refactor: remove outputWidth/Height
NorbertKlockiewicz Mar 19, 2026
7bea029
refactor: add missing comments
NorbertKlockiewicz Mar 19, 2026
a172209
refactor: fixes after rebase + requested changes
NorbertKlockiewicz Mar 20, 2026
456cd83
feat: apply orientation handling to instance segmentation
NorbertKlockiewicz Mar 20, 2026
a92fae1
chore: Document isMirrored property logic in VisionModule
NorbertKlockiewicz Mar 20, 2026
dae70ed
Update packages/react-native-executorch/src/modules/computer_vision/V…
NorbertKlockiewicz Mar 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 40 additions & 6 deletions apps/computer-vision/app/vision_camera/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,16 @@ import ClassificationTask from '../../components/vision_camera/tasks/Classificat
import ObjectDetectionTask from '../../components/vision_camera/tasks/ObjectDetectionTask';
import SegmentationTask from '../../components/vision_camera/tasks/SegmentationTask';
import InstanceSegmentationTask from '../../components/vision_camera/tasks/InstanceSegmentationTask';
import OCRTask from '../../components/vision_camera/tasks/OCRTask';
import StyleTransferTask from '../../components/vision_camera/tasks/StyleTransferTask';

type TaskId =
| 'classification'
| 'objectDetection'
| 'segmentation'
| 'instanceSegmentation';
| 'instanceSegmentation'
| 'ocr'
| 'styleTransfer';
type ModelId =
| 'classification'
| 'objectDetectionSsdlite'
Expand All @@ -50,7 +54,10 @@ type ModelId =
| 'segmentationFcnResnet101'
| 'segmentationSelfie'
| 'instanceSegmentationYolo26n'
| 'instanceSegmentationRfdetr';
| 'instanceSegmentationRfdetr'
| 'ocr'
| 'styleTransferCandy'
| 'styleTransferMosaic';

type TaskVariant = { id: ModelId; label: string };
type Task = { id: TaskId; label: string; variants: TaskVariant[] };
Expand Down Expand Up @@ -90,11 +97,25 @@ const TASKS: Task[] = [
{ id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
],
},
{
id: 'ocr',
label: 'OCR',
variants: [{ id: 'ocr', label: 'English' }],
},
{
id: 'styleTransfer',
label: 'Style',
variants: [
{ id: 'styleTransferCandy', label: 'Candy' },
{ id: 'styleTransferMosaic', label: 'Mosaic' },
],
},
];

// Module-level const so worklets in task components can always reference the same stable object.
// Module-level consts so worklets in task components can always reference the same stable objects.
// Never replaced — only mutated via setBlocking to avoid closure staleness.
const frameKillSwitch = createSynchronizable(false);
const cameraPositionSync = createSynchronizable<'front' | 'back'>('back');

export default function VisionCameraScreen() {
const insets = useSafeAreaInsets();
Expand All @@ -121,7 +142,7 @@ export default function VisionCameraScreen() {
const format = useMemo(() => {
if (device == null) return undefined;
try {
return getCameraFormat(device, Templates.FrameProcessing);
return getCameraFormat(device, { ...Templates.FrameProcessing });
} catch {
return undefined;
}
Expand All @@ -135,6 +156,10 @@ export default function VisionCameraScreen() {
return () => clearTimeout(id);
}, [activeModel]);

useEffect(() => {
cameraPositionSync.setBlocking(cameraPosition);
}, [cameraPosition]);

const handleFpsChange = useCallback((newFps: number, newMs: number) => {
setFps(newFps);
setFrameMs(newMs);
Expand Down Expand Up @@ -177,7 +202,7 @@ export default function VisionCameraScreen() {
const taskProps = {
activeModel,
canvasSize,
cameraPosition,
cameraPositionSync,
frameKillSwitch,
onFrameOutputChange: setFrameOutput,
onReadyChange: setIsReady,
Expand All @@ -196,7 +221,7 @@ export default function VisionCameraScreen() {
outputs={frameOutput ? [frameOutput] : []}
isActive={isFocused}
format={format}
orientationSource="interface"
orientationSource="device"
/>

{/* Layout sentinel — measures the full-screen area for bbox/canvas sizing */}
Expand Down Expand Up @@ -245,6 +270,15 @@ export default function VisionCameraScreen() {
}
/>
)}
{activeTask === 'ocr' && <OCRTask {...taskProps} />}
{activeTask === 'styleTransfer' && (
<StyleTransferTask
{...taskProps}
activeModel={
activeModel as 'styleTransferCandy' | 'styleTransferMosaic'
}
/>
)}

{!isReady && (
<View style={styles.loadingOverlay}>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import { scheduleOnRN } from 'react-native-worklets';
import { EFFICIENTNET_V2_S, useClassification } from 'react-native-executorch';
import { TaskProps } from './types';

type Props = Omit<TaskProps, 'activeModel' | 'canvasSize' | 'cameraPosition'>;
type Props = Omit<
TaskProps,
'activeModel' | 'canvasSize' | 'cameraPositionSync'
>;

export default function ClassificationTask({
frameKillSwitch,
Expand Down Expand Up @@ -47,6 +50,7 @@ export default function ClassificationTask({
const frameOutput = useFrameOutput({
pixelFormat: 'rgb',
dropFramesWhileBusy: true,
enablePreviewSizedOutputBuffers: true,
onFrame: useCallback(
(frame: Frame) => {
'worklet';
Expand All @@ -71,7 +75,7 @@ export default function ClassificationTask({
scheduleOnRN(updateClass, { label: bestLabel, score: bestScore });
}
} catch {
// ignore
// Frame may be disposed before processing completes — transient, safe to ignore.
} finally {
frame.dispose();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type Props = TaskProps & { activeModel: InstSegModelId };
export default function InstanceSegmentationTask({
activeModel,
canvasSize,
cameraPosition,
cameraPositionSync,
frameKillSwitch,
onFrameOutputChange,
onReadyChange,
Expand Down Expand Up @@ -96,6 +96,7 @@ export default function InstanceSegmentationTask({
const frameOutput = useFrameOutput({
pixelFormat: 'rgb',
dropFramesWhileBusy: true,
enablePreviewSizedOutputBuffers: true,
onFrame: useCallback(
(frame: Frame) => {
'worklet';
Expand All @@ -105,9 +106,10 @@ export default function InstanceSegmentationTask({
}
try {
if (!instSegRof) return;
const isFrontCamera = cameraPositionSync.getDirty() === 'front';
const iw = frame.width > frame.height ? frame.height : frame.width;
const ih = frame.width > frame.height ? frame.width : frame.height;
const result = instSegRof(frame, {
const result = instSegRof(frame, isFrontCamera, {
confidenceThreshold: 0.5,
iouThreshold: 0.5,
maxInstances: 5,
Expand All @@ -129,7 +131,13 @@ export default function InstanceSegmentationTask({
frame.dispose();
}
},
[instSegRof, frameKillSwitch, updateInstances, activeModel]
[
instSegRof,
frameKillSwitch,
updateInstances,
activeModel,
cameraPositionSync,
]
),
});

Expand All @@ -145,13 +153,7 @@ export default function InstanceSegmentationTask({
const offsetY = (canvasSize.height - imageSize.height * scale) / 2;

return (
<View
style={[
StyleSheet.absoluteFill,
cameraPosition === 'front' && { transform: [{ scaleX: -1 }] },
]}
pointerEvents="none"
>
<View style={StyleSheet.absoluteFill} pointerEvents="none">
{/* Render masks */}
<Canvas style={StyleSheet.absoluteFill} pointerEvents="none">
{instances.map((inst, i) => {
Expand Down Expand Up @@ -197,7 +199,6 @@ export default function InstanceSegmentationTask({
style={[
styles.bboxLabel,
{ backgroundColor: labelColorBg(label) },
cameraPosition === 'front' && { transform: [{ scaleX: -1 }] },
]}
>
<Text style={styles.bboxLabelText}>
Expand Down
130 changes: 130 additions & 0 deletions apps/computer-vision/components/vision_camera/tasks/OCRTask.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import React, { useCallback, useEffect, useRef, useState } from 'react';
import { StyleSheet, View } from 'react-native';
import { Frame, useFrameOutput } from 'react-native-vision-camera';
import { scheduleOnRN } from 'react-native-worklets';
import { OCR_ENGLISH, OCRDetection, useOCR } from 'react-native-executorch';
import Svg, { Polygon, Text as SvgText } from 'react-native-svg';
import { TaskProps } from './types';

type Props = Omit<TaskProps, 'activeModel'>;

export default function OCRTask({
canvasSize,
cameraPositionSync,
frameKillSwitch,
onFrameOutputChange,
onReadyChange,
onProgressChange,
onGeneratingChange,
onFpsChange,
}: Props) {
const model = useOCR({ model: OCR_ENGLISH });
const [detections, setDetections] = useState<OCRDetection[]>([]);
const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
const lastFrameTimeRef = useRef(Date.now());

useEffect(() => {
onReadyChange(model.isReady);
}, [model.isReady, onReadyChange]);

useEffect(() => {
onProgressChange(model.downloadProgress);
}, [model.downloadProgress, onProgressChange]);

useEffect(() => {
onGeneratingChange(model.isGenerating);
}, [model.isGenerating, onGeneratingChange]);

const ocrRof = model.runOnFrame;

const updateDetections = useCallback(
(p: { results: OCRDetection[]; frameW: number; frameH: number }) => {
setDetections(p.results);
setImageSize({ width: p.frameW, height: p.frameH });
const now = Date.now();
const diff = now - lastFrameTimeRef.current;
if (diff > 0) onFpsChange(Math.round(1000 / diff), diff);
lastFrameTimeRef.current = now;
},
[onFpsChange]
);

const frameOutput = useFrameOutput({
pixelFormat: 'rgb',
dropFramesWhileBusy: true,
enablePreviewSizedOutputBuffers: true,
onFrame: useCallback(
(frame: Frame) => {
'worklet';
if (frameKillSwitch.getDirty()) {
frame.dispose();
return;
}
try {
if (!ocrRof) return;
const isFrontCamera = cameraPositionSync.getDirty() === 'front';
const result = ocrRof(frame, isFrontCamera);
if (result) {
// Sensor frames are landscape-native, so width/height are swapped
// relative to portrait screen orientation.
scheduleOnRN(updateDetections, {
results: result,
frameW: frame.height,
frameH: frame.width,
});
}
} catch {
// Frame may be disposed before processing completes — transient, safe to ignore.
} finally {
frame.dispose();
}
},
[cameraPositionSync, frameKillSwitch, ocrRof, updateDetections]
),
});

useEffect(() => {
onFrameOutputChange(frameOutput);
}, [frameOutput, onFrameOutputChange]);

const scale = Math.max(
canvasSize.width / imageSize.width,
canvasSize.height / imageSize.height
);
const offsetX = (canvasSize.width - imageSize.width * scale) / 2;
const offsetY = (canvasSize.height - imageSize.height * scale) / 2;

if (!detections.length) return null;

return (
<View style={StyleSheet.absoluteFill} pointerEvents="none">
<Svg
width={canvasSize.width}
height={canvasSize.height}
style={StyleSheet.absoluteFill}
>
{detections.map((det, i) => {
const pts = det.bbox
.map((p) => `${p.x * scale + offsetX},${p.y * scale + offsetY}`)
.join(' ');
const labelX = det.bbox[0]!.x * scale + offsetX;
const labelY = det.bbox[0]!.y * scale + offsetY - 4;
return (
<React.Fragment key={i}>
<Polygon points={pts} fill="none" stroke="cyan" strokeWidth={2} />
<SvgText
x={labelX}
y={labelY}
fill="white"
fontSize={12}
fontWeight="bold"
>
{det.text}
</SvgText>
</React.Fragment>
);
})}
</Svg>
</View>
);
}
Loading
Loading