chore!: don't resize the image segmentation output by default, change param name (#801)

chmjkb · msluszniak · msluszniak · commit bdb36164a2ec · 2026-02-17T00:15:09.000+01:00
## Description

This PR changes the param name of from `resize` to `resizeToInput` in
image segmentation APIs. It also defaults to true now, as the
performance impact is acceptable.

### Introduces a breaking change?

- [x] Yes
- [ ] No

### Type of change

- [ ] Bug fix (change which fixes an issue)
- [ ] New feature (change which adds functionality)
- [ ] Documentation update (improves or adds clarity to existing
documentation)
- [x] Other (chores, tests, code style improvements etc.)

### Tested on

- [ ] iOS
- [ ] Android

### Testing instructions

&lt;!-- Provide step-by-step instructions on how to test your changes.
Include setup details if necessary. --&gt;

### Screenshots

&lt;!-- Add screenshots here, if applicable --&gt;

### Related issues

&lt;!-- Link related issues here using #issue-number --&gt;

### Checklist

- [ ] I have performed a self-review of my code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have updated the documentation accordingly
- [ ] My changes generate no new warnings

### Additional notes

&lt;!-- Include any additional information, assumptions, or context that
reviewers might need to understand this PR. --&gt;

---------

Co-authored-by: Mateusz Sluszniak &lt;56299341+msluszniak@users.noreply.github.com&gt;
diff --git a/apps/computer-vision/app/image_segmentation/index.tsx b/apps/computer-vision/app/image_segmentation/index.tsx
@@ -12,30 +12,13 @@ import {
   Skia,
   AlphaType,
   ColorType,
+  SkImage,
 } from '@shopify/react-native-skia';
 import { View, StyleSheet, Image } from 'react-native';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
 
-const width = 224;
-const height = 224;
-
-let pixels = new Uint8Array(width * height * 4);
-pixels.fill(255);
-
-let data = Skia.Data.fromBytes(pixels);
-let img = Skia.Image.MakeImage(
-  {
-    width: width,
-    height: height,
-    alphaType: AlphaType.Opaque,
-    colorType: ColorType.RGBA_8888,
-  },
-  data,
-  width * 4
-);
-
 const numberToColor: number[][] = [
   [255, 87, 51], // 0 Red
   [51, 255, 87], // 1 Green
@@ -67,48 +50,58 @@ export default function ImageSegmentationScreen() {
     setGlobalGenerating(model.isGenerating);
   }, [model.isGenerating, setGlobalGenerating]);
   const [imageUri, setImageUri] = useState('');
+  const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
+  const [segImage, setSegImage] = useState<SkImage | null>(null);
+  const [canvasSize, setCanvasSize] = useState({ width: 0, height: 0 });
 
   const handleCameraPress = async (isCamera: boolean) => {
     const image = await getImage(isCamera);
-    const uri = image?.uri;
-    setImageUri(uri as string);
+    if (!image?.uri) return;
+    setImageUri(image.uri);
+    setImageSize({
+      width: image.width ?? 0,
+      height: image.height ?? 0,
+    });
+    setSegImage(null);
   };
 
-  const [resultPresent, setResultPresent] = useState(false);
-
   const runForward = async () => {
-    if (imageUri) {
-      try {
-        const output = await model.forward(imageUri);
-        pixels = new Uint8Array(width * height * 4);
+    if (!imageUri || imageSize.width === 0 || imageSize.height === 0) return;
+    try {
+      const { width, height } = imageSize;
+      const output = await model.forward(imageUri, [DeeplabLabel.ARGMAX]);
+      const argmax = output[DeeplabLabel.ARGMAX] || [];
+      const uniqueValues = new Set<number>();
+      for (let i = 0; i < argmax.length; i++) {
+        uniqueValues.add(argmax[i]);
+      }
+      const pixels = new Uint8Array(width * height * 4);
 
-        for (let x = 0; x < width; x++) {
-          for (let y = 0; y < height; y++) {
-            for (let i = 0; i < 3; i++) {
-              pixels[(x * height + y) * 4 + i] =
-                numberToColor[
-                  (output[DeeplabLabel.ARGMAX] || [])[x * height + y]
-                ][i];
-            }
-            pixels[(x * height + y) * 4 + 3] = 255;
-          }
+      for (let row = 0; row < height; row++) {
+        for (let col = 0; col < width; col++) {
+          const idx = row * width + col;
+          const color = numberToColor[argmax[idx]] || [0, 0, 0];
+          pixels[idx * 4] = color[0];
+          pixels[idx * 4 + 1] = color[1];
+          pixels[idx * 4 + 2] = color[2];
+          pixels[idx * 4 + 3] = 255;
         }
-
-        data = Skia.Data.fromBytes(pixels);
-        img = Skia.Image.MakeImage(
-          {
-            width: width,
-            height: height,
-            alphaType: AlphaType.Opaque,
-            colorType: ColorType.RGBA_8888,
-          },
-          data,
-          width * 4
-        );
-        setResultPresent(true);
-      } catch (e) {
-        console.error(e);
       }
+
+      const data = Skia.Data.fromBytes(pixels);
+      const img = Skia.Image.MakeImage(
+        {
+          width,
+          height,
+          alphaType: AlphaType.Opaque,
+          colorType: ColorType.RGBA_8888,
+        },
+        data,
+        width * 4
+      );
+      setSegImage(img);
+    } catch (e) {
+      console.error(e);
     }
   };
 
@@ -135,16 +128,24 @@ export default function ImageSegmentationScreen() {
             }
           />
         </View>
-        {resultPresent && (
-          <View style={styles.canvasContainer}>
+        {segImage && (
+          <View
+            style={styles.canvasContainer}
+            onLayout={(e) =>
+              setCanvasSize({
+                width: e.nativeEvent.layout.width,
+                height: e.nativeEvent.layout.height,
+              })
+            }
+          >
             <Canvas style={styles.canvas}>
               <SkiaImage
-                image={img}
+                image={segImage}
                 fit="contain"
                 x={0}
                 y={0}
-                width={width}
-                height={height}
+                width={canvasSize.width}
+                height={canvasSize.height}
               />
             </Canvas>
           </View>
@@ -181,7 +182,7 @@ const styles = StyleSheet.create({
     padding: 4,
   },
   canvas: {
-    width: width,
-    height: height,
+    width: '100%',
+    height: '100%',
   },
 });
diff --git a/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md b/docs/docs/03-hooks/02-computer-vision/useImageSegmentation.md
@@ -55,7 +55,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/interfaces/
 
 - The image can be a remote URL, a local file URI, or a base64-encoded image.
 - The [`classesOfInterest`](../../06-api-reference/interfaces/ImageSegmentationType.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes.
-- The [`resize`](../../06-api-reference/interfaces/ImageSegmentationType.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
+- The [`resizeToInput`](../../06-api-reference/interfaces/ImageSegmentationType.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
 
 :::warning
 Setting `resize` to true will make `forward` slower.
diff --git a/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md b/docs/docs/04-typescript-api/02-computer-vision/ImageSegmentationModule.md
@@ -52,7 +52,7 @@ To run the model, you can use the [`forward`](../../06-api-reference/classes/Ima
 
 - The image can be a remote URL, a local file URI, or a base64-encoded image.
 - The [`classesOfInterest`](../../06-api-reference/classes/ImageSegmentationModule.md#classesofinterest) list contains classes for which to output the full results. By default the list is empty, and only the most probable classes are returned (essentially an arg max for each pixel). Look at [`DeeplabLabel`](../../06-api-reference/enumerations/DeeplabLabel.md) enum for possible classes.
-- The [`resize`](../../06-api-reference/classes/ImageSegmentationModule.md#resize) flag says whether the output will be rescaled back to the size of the image you put in. The default is `false`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
+- The [`resizeToInput`](../../06-api-reference/classes/ImageSegmentationModule.md#resizeToInput) flag specifies whether the output will be rescaled back to the size of the input image. The default is `true`. The model runs inference on a scaled (probably smaller) version of your image (224x224 for the `DEEPLAB_V3_RESNET50`). If you choose to resize, the output will be `number[]` of size `width * height` of your original image.
 
 :::warning
 Setting `resize` to true will make `forward` slower.
diff --git a/docs/docs/06-api-reference/classes/ImageSegmentationModule.md b/docs/docs/06-api-reference/classes/ImageSegmentationModule.md
@@ -1,6 +1,6 @@
 # Class: ImageSegmentationModule
 
-Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13)
+Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L13)
 
 Module for image segmentation tasks.
 
@@ -28,7 +28,7 @@ Module for image segmentation tasks.
 
 > **nativeModule**: `any` = `null`
 
-Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
+Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:8](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L8)
 
 Native module instance
 
@@ -42,7 +42,7 @@ Native module instance
 
 > **delete**(): `void`
 
-Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
+Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:41](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L41)
 
 Unloads the model from memory.
 
@@ -58,9 +58,9 @@ Unloads the model from memory.
 
 ### forward()
 
-> **forward**(`imageSource`, `classesOfInterest?`, `resize?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\>
+> **forward**(`imageSource`, `classesOfInterest?`, `resizeToInput?`): `Promise`\<`Partial`\<`Record`\<[`DeeplabLabel`](../enumerations/DeeplabLabel.md), `number`[]\>\>\>
 
-Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46)
+Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:46](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L46)
 
 Executes the model's forward pass
 
@@ -78,11 +78,11 @@ a fetchable resource or a Base64-encoded string.
 
 an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list.
 
-##### resize?
+##### resizeToInput?
 
 `boolean`
 
-an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`.
+an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.
 
 #### Returns
 
@@ -96,7 +96,7 @@ A dictionary where keys are `DeeplabLabel` and values are arrays of probabilitie
 
 > `protected` **forwardET**(`inputTensor`): `Promise`\<[`TensorPtr`](../interfaces/TensorPtr.md)[]\>
 
-Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
+Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:23](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L23)
 
 Runs the model's forward method with the given input tensors.
 It returns the output tensors that mimic the structure of output from ExecuTorch.
@@ -125,7 +125,7 @@ Array of output tensors.
 
 > **getInputShape**(`methodName`, `index`): `Promise`\<`number`[]\>
 
-Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
+Defined in: [packages/react-native-executorch/src/modules/BaseModule.ts:34](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/BaseModule.ts#L34)
 
 Gets the input shape for a given method and index.
 
@@ -159,7 +159,7 @@ The input shape as an array of numbers.
 
 > **load**(`model`, `onDownloadProgressCallback`): `Promise`\<`void`\>
 
-Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21)
+Defined in: [packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts:21](https://github.com/software-mansion/react-native-executorch/blob/b5006f04ed89e0ab316675cb5fc7fabdaa345c32/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts#L21)
 
 Loads the model, where `modelSource` is a string that specifies the location of the model binary.
 To track the download progress, supply a callback function `onDownloadProgressCallback`.
diff --git a/docs/docs/06-api-reference/functions/useImageSegmentation.md b/docs/docs/06-api-reference/functions/useImageSegmentation.md
@@ -2,7 +2,7 @@
 
 > **useImageSegmentation**(`ImageSegmentationProps`): [`ImageSegmentationType`](../interfaces/ImageSegmentationType.md)
 
-Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/326d6344894d75625c600d5988666e215a32d466/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15)
+Defined in: [packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts:15](https://github.com/software-mansion/react-native-executorch/blob/9e79b9bf2a34159a71071fbfdaed3ddd9393702f/packages/react-native-executorch/src/hooks/computer_vision/useImageSegmentation.ts#L15)
 
 React hook for managing an Image Segmentation model instance.
 
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -40,13 +40,13 @@ export class ImageSegmentationModule extends BaseModule {
    *
    * @param imageSource - a fetchable resource or a Base64-encoded string.
    * @param classesOfInterest - an optional list of DeeplabLabel used to indicate additional arrays of probabilities to output (see section "Running the model"). The default is an empty list.
-   * @param resize - an optional boolean to indicate whether the output should be resized to the original image dimensions, or left in the size of the model (see section "Running the model"). The default is `false`.
+   * @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.
    * @returns A dictionary where keys are `DeeplabLabel` and values are arrays of probabilities for each pixel belonging to the corresponding class.
    */
   async forward(
     imageSource: string,
     classesOfInterest?: DeeplabLabel[],
-    resize?: boolean
+    resizeToInput?: boolean
   ): Promise<Partial<Record<DeeplabLabel, number[]>>> {
     if (this.nativeModule == null) {
       throw new RnExecutorchError(
@@ -58,7 +58,7 @@ export class ImageSegmentationModule extends BaseModule {
     const stringDict = await this.nativeModule.generate(
       imageSource,
       (classesOfInterest || []).map((label) => DeeplabLabel[label]),
-      resize || false
+      resizeToInput ?? true
     );
 
     let enumDict: { [key in DeeplabLabel]?: number[] } = {};
diff --git a/packages/react-native-executorch/src/types/imageSegmentation.ts b/packages/react-native-executorch/src/types/imageSegmentation.ts
@@ -76,13 +76,13 @@ export interface ImageSegmentationType {
    * Executes the model's forward pass to perform semantic segmentation on the provided image.
    * @param imageSource - A string representing the image source (e.g., a file path, URI, or base64 string) to be processed.
    * @param classesOfInterest - An optional array of `DeeplabLabel` enums. If provided, the model will only return segmentation masks for these specific classes.
-   * @param resize - An optional boolean indicating whether the output segmentation masks should be resized to match the original image dimensions. Defaults to standard model behavior if undefined.
+   * @param resizeToInput - an optional boolean to indicate whether the output should be resized to the original input image dimensions. If `false`, returns the model output without any resizing (see section "Running the model"). Defaults to `true`.
    * @returns A Promise that resolves to an object mapping each detected `DeeplabLabel` to its corresponding segmentation mask (represented as a flattened array of numbers).
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    */
   forward: (
     imageSource: string,
     classesOfInterest?: DeeplabLabel[],
-    resize?: boolean
+    resizeToInput?: boolean
   ) => Promise<Partial<Record<DeeplabLabel, number[]>>>;
 }