Skip to content

Commit 1fc061c

Browse files
authored
feat: Instance Segmentation API (#893)
## Description This PR introduces Instance Segmentation Module. Provided API allows for using two predefined models (RF-DETR and YOLO26-Seg family) as well as plugging custom models through fromCustomConfig. On top of adding new API, this PR adds common CV utilities on the CPP side, as well as migrates Object Detection to leverage them. ### Introduces a breaking change? - [ ] Yes - [x] No ### Type of change - [ ] Bug fix (change which fixes an issue) - [x] New feature (change which adds functionality) - [ ] Documentation update (improves or adds clarity to existing documentation) - [ ] Other (chores, tests, code style improvements etc.) ### Tested on - [x] iOS - [x] Android ### Testing instructions - [x] Run test suite - [x] Try new features by running demo app for Instance Segmentation - [x] Confirm ObjectDetection works as expected by running respective demo apps ### Screenshots <!-- Add screenshots here, if applicable --> ### Related issues <!-- Link related issues here using #issue-number --> #825 - [ ] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [ ] My changes generate no new warnings ### Additional notes <!-- Include any additional information, assumptions, or context that reviewers might need to understand this PR. --> ---------
1 parent 3863425 commit 1fc061c

File tree

44 files changed

+3130
-109
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+3130
-109
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,4 @@ packages/react-native-executorch/common/rnexecutorch/tests/integration/assets/mo
102102
*.tgz
103103
Makefile
104104
*.pte
105+

apps/computer-vision/app/_layout.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,14 @@ export default function _layout() {
8484
headerTitleStyle: { color: ColorPalette.primary },
8585
}}
8686
/>
87+
<Drawer.Screen
88+
name="instance_segmentation/index"
89+
options={{
90+
drawerLabel: 'Instance Segmentation',
91+
title: 'Instance Segmentation',
92+
headerTitleStyle: { color: ColorPalette.primary },
93+
}}
94+
/>
8795
<Drawer.Screen
8896
name="object_detection/index"
8997
options={{

apps/computer-vision/app/index.tsx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ export default function Home() {
3535
>
3636
<Text style={styles.buttonText}>Object Detection</Text>
3737
</TouchableOpacity>
38+
<TouchableOpacity
39+
style={styles.button}
40+
onPress={() => router.navigate('instance_segmentation/')}
41+
>
42+
<Text style={styles.buttonText}>Instance Segmentation</Text>
43+
</TouchableOpacity>
3844
<TouchableOpacity
3945
style={styles.button}
4046
onPress={() => router.navigate('ocr/')}
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
import Spinner from '../../components/Spinner';
2+
import { BottomBar } from '../../components/BottomBar';
3+
import { getImage } from '../../utils';
4+
import { useInstanceSegmentation, YOLO26N_SEG } from 'react-native-executorch';
5+
import {
6+
View,
7+
StyleSheet,
8+
ScrollView,
9+
Text,
10+
TouchableOpacity,
11+
} from 'react-native';
12+
import React, { useContext, useEffect, useState } from 'react';
13+
import { GeneratingContext } from '../../context';
14+
import ScreenWrapper from '../../ScreenWrapper';
15+
import ImageWithMasks, {
16+
buildDisplayInstances,
17+
DisplayInstance,
18+
} from '../../components/ImageWithMasks';
19+
20+
export default function InstanceSegmentationScreen() {
21+
const { setGlobalGenerating } = useContext(GeneratingContext);
22+
23+
const {
24+
isReady,
25+
isGenerating,
26+
downloadProgress,
27+
forward,
28+
error,
29+
getAvailableInputSizes,
30+
} = useInstanceSegmentation({
31+
model: YOLO26N_SEG,
32+
});
33+
34+
const [imageUri, setImageUri] = useState('');
35+
const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
36+
const [instances, setInstances] = useState<DisplayInstance[]>([]);
37+
const [selectedInputSize, setSelectedInputSize] = useState<number | null>(
38+
null
39+
);
40+
41+
const availableInputSizes = getAvailableInputSizes();
42+
43+
useEffect(() => {
44+
setGlobalGenerating(isGenerating);
45+
}, [isGenerating, setGlobalGenerating]);
46+
47+
// Set default input size when model is ready
48+
useEffect(() => {
49+
if (isReady && availableInputSizes && availableInputSizes.length > 0) {
50+
setSelectedInputSize(availableInputSizes[0]);
51+
}
52+
}, [isReady, availableInputSizes]);
53+
54+
const handleCameraPress = async (isCamera: boolean) => {
55+
const image = await getImage(isCamera);
56+
if (!image?.uri) return;
57+
setImageUri(image.uri);
58+
setImageSize({
59+
width: image.width ?? 0,
60+
height: image.height ?? 0,
61+
});
62+
setInstances([]);
63+
};
64+
65+
const runForward = async () => {
66+
if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
67+
68+
try {
69+
const output = await forward(imageUri, {
70+
confidenceThreshold: 0.5,
71+
iouThreshold: 0.55,
72+
maxInstances: 20,
73+
returnMaskAtOriginalResolution: true,
74+
inputSize: selectedInputSize ?? undefined,
75+
});
76+
77+
// Convert raw masks → small Skia images immediately.
78+
// Raw Uint8Array mask buffers (backed by native OwningArrayBuffer)
79+
// go out of scope here and become eligible for GC right away.
80+
setInstances(buildDisplayInstances(output));
81+
} catch (e) {
82+
console.error(e);
83+
}
84+
};
85+
86+
if (!isReady && error) {
87+
return (
88+
<ScreenWrapper>
89+
<View style={styles.errorContainer}>
90+
<Text style={styles.errorTitle}>Error Loading Model</Text>
91+
<Text style={styles.errorText}>
92+
{error?.message || 'Unknown error occurred'}
93+
</Text>
94+
<Text style={styles.errorCode}>Code: {error?.code || 'N/A'}</Text>
95+
</View>
96+
</ScreenWrapper>
97+
);
98+
}
99+
100+
if (!isReady) {
101+
return (
102+
<Spinner
103+
visible={!isReady}
104+
textContent={`Loading the model ${(downloadProgress * 100).toFixed(0)} %`}
105+
/>
106+
);
107+
}
108+
109+
return (
110+
<ScreenWrapper>
111+
<View style={styles.container}>
112+
<View style={styles.imageContainer}>
113+
<ImageWithMasks
114+
imageUri={imageUri}
115+
instances={instances}
116+
imageWidth={imageSize.width}
117+
imageHeight={imageSize.height}
118+
/>
119+
</View>
120+
121+
{imageUri && availableInputSizes && availableInputSizes.length > 0 && (
122+
<View style={styles.inputSizeContainer}>
123+
<Text style={styles.inputSizeLabel}>Input Size:</Text>
124+
<ScrollView
125+
horizontal
126+
showsHorizontalScrollIndicator={false}
127+
style={styles.inputSizeScroll}
128+
>
129+
{availableInputSizes.map((size) => (
130+
<TouchableOpacity
131+
key={size}
132+
style={[
133+
styles.sizeButton,
134+
selectedInputSize === size && styles.sizeButtonActive,
135+
]}
136+
onPress={() => setSelectedInputSize(size)}
137+
>
138+
<Text
139+
style={[
140+
styles.sizeButtonText,
141+
selectedInputSize === size && styles.sizeButtonTextActive,
142+
]}
143+
>
144+
{size}
145+
</Text>
146+
</TouchableOpacity>
147+
))}
148+
</ScrollView>
149+
</View>
150+
)}
151+
152+
{instances.length > 0 && (
153+
<View style={styles.resultsContainer}>
154+
<Text style={styles.resultsHeader}>
155+
Detected {instances.length} instance(s)
156+
</Text>
157+
<ScrollView style={styles.resultsList}>
158+
{instances.map((instance, idx) => (
159+
<View key={idx} style={styles.resultRow}>
160+
<Text style={styles.resultText}>
161+
{instance.label || 'Unknown'} (
162+
{(instance.score * 100).toFixed(1)}%)
163+
</Text>
164+
</View>
165+
))}
166+
</ScrollView>
167+
</View>
168+
)}
169+
</View>
170+
171+
<BottomBar
172+
handleCameraPress={handleCameraPress}
173+
runForward={runForward}
174+
/>
175+
</ScreenWrapper>
176+
);
177+
}
178+
179+
const styles = StyleSheet.create({
180+
container: {
181+
flex: 6,
182+
width: '100%',
183+
},
184+
imageContainer: {
185+
flex: 1,
186+
width: '100%',
187+
padding: 16,
188+
},
189+
inputSizeContainer: {
190+
paddingHorizontal: 16,
191+
paddingVertical: 12,
192+
backgroundColor: '#fff',
193+
borderTopWidth: 1,
194+
borderTopColor: '#e0e0e0',
195+
},
196+
inputSizeLabel: {
197+
fontSize: 14,
198+
fontWeight: '600',
199+
color: '#333',
200+
marginBottom: 8,
201+
},
202+
inputSizeScroll: {
203+
flexDirection: 'row',
204+
},
205+
sizeButton: {
206+
paddingHorizontal: 16,
207+
paddingVertical: 8,
208+
marginRight: 8,
209+
borderRadius: 6,
210+
backgroundColor: '#f0f0f0',
211+
},
212+
sizeButtonActive: {
213+
backgroundColor: '#007AFF',
214+
},
215+
sizeButtonText: {
216+
fontSize: 14,
217+
fontWeight: '600',
218+
color: '#666',
219+
},
220+
sizeButtonTextActive: {
221+
color: '#fff',
222+
},
223+
resultsContainer: {
224+
maxHeight: 200,
225+
paddingHorizontal: 16,
226+
paddingVertical: 12,
227+
backgroundColor: '#fff',
228+
borderTopWidth: 1,
229+
borderTopColor: '#e0e0e0',
230+
},
231+
resultsHeader: {
232+
fontSize: 16,
233+
fontWeight: '600',
234+
marginBottom: 8,
235+
color: '#333',
236+
},
237+
resultsList: {
238+
flex: 1,
239+
},
240+
resultRow: {
241+
flexDirection: 'row',
242+
alignItems: 'center',
243+
paddingVertical: 6,
244+
paddingHorizontal: 8,
245+
marginBottom: 4,
246+
backgroundColor: '#f9f9f9',
247+
borderRadius: 6,
248+
},
249+
resultText: {
250+
fontSize: 14,
251+
fontWeight: '500',
252+
color: '#333',
253+
},
254+
errorContainer: {
255+
flex: 1,
256+
justifyContent: 'center',
257+
alignItems: 'center',
258+
padding: 32,
259+
},
260+
errorTitle: {
261+
fontSize: 20,
262+
fontWeight: '700',
263+
color: '#e74c3c',
264+
marginBottom: 12,
265+
},
266+
errorText: {
267+
fontSize: 14,
268+
color: '#555',
269+
textAlign: 'center',
270+
marginBottom: 8,
271+
},
272+
errorCode: {
273+
fontSize: 12,
274+
color: '#999',
275+
fontFamily: 'Courier',
276+
},
277+
});

apps/computer-vision/app/vision_camera/index.tsx

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,13 @@ import ColorPalette from '../../colors';
3131
import ClassificationTask from '../../components/vision_camera/tasks/ClassificationTask';
3232
import ObjectDetectionTask from '../../components/vision_camera/tasks/ObjectDetectionTask';
3333
import SegmentationTask from '../../components/vision_camera/tasks/SegmentationTask';
34+
import InstanceSegmentationTask from '../../components/vision_camera/tasks/InstanceSegmentationTask';
3435

35-
type TaskId = 'classification' | 'objectDetection' | 'segmentation';
36+
type TaskId =
37+
| 'classification'
38+
| 'objectDetection'
39+
| 'segmentation'
40+
| 'instanceSegmentation';
3641
type ModelId =
3742
| 'classification'
3843
| 'objectDetectionSsdlite'
@@ -43,7 +48,9 @@ type ModelId =
4348
| 'segmentationLraspp'
4449
| 'segmentationFcnResnet50'
4550
| 'segmentationFcnResnet101'
46-
| 'segmentationSelfie';
51+
| 'segmentationSelfie'
52+
| 'instanceSegmentationYolo26n'
53+
| 'instanceSegmentationRfdetr';
4754

4855
type TaskVariant = { id: ModelId; label: string };
4956
type Task = { id: TaskId; label: string; variants: TaskVariant[] };
@@ -67,6 +74,14 @@ const TASKS: Task[] = [
6774
{ id: 'segmentationSelfie', label: 'Selfie' },
6875
],
6976
},
77+
{
78+
id: 'instanceSegmentation',
79+
label: 'Inst Seg',
80+
variants: [
81+
{ id: 'instanceSegmentationYolo26n', label: 'YOLO26N Seg' },
82+
{ id: 'instanceSegmentationRfdetr', label: 'RF-DETR Nano Seg' },
83+
],
84+
},
7085
{
7186
id: 'objectDetection',
7287
label: 'Detect',
@@ -220,6 +235,16 @@ export default function VisionCameraScreen() {
220235
}
221236
/>
222237
)}
238+
{activeTask === 'instanceSegmentation' && (
239+
<InstanceSegmentationTask
240+
{...taskProps}
241+
activeModel={
242+
activeModel as
243+
| 'instanceSegmentationYolo26n'
244+
| 'instanceSegmentationRfdetr'
245+
}
246+
/>
247+
)}
223248

224249
{!isReady && (
225250
<View style={styles.loadingOverlay}>

0 commit comments

Comments
 (0)