software-mansion
diff --git a/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 33 additions & 28 deletions b/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 33 additions & 28 deletions
diff --git a/‎apps/computer-vision/package.json‎
Lines changed: 6 additions & 3 deletions b/‎apps/computer-vision/package.json‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 19 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 60 additions & 2 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 60 additions & 2 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 38 additions & 31 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 38 additions & 31 deletions
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
 import ColorPalette from '../../colors';
 import { Images } from 'react-native-nitro-image';
 
-// Helper function to convert image URI to raw pixel data using NitroImage
+// Helper function to convert BGRA to RGB
+function convertBGRAtoRGB(
+  buffer: ArrayBuffer,
+  width: number,
+  height: number
+): ArrayBuffer {
+  const source = new Uint8Array(buffer);
+  const rgb = new Uint8Array(width * height * 3);
+
+  for (let i = 0; i < width * height; i++) {
+    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
+    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
+    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
+    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
+  }
+
+  return rgb.buffer;
+}
+
+// Helper function to convert image URI to raw RGB pixel data
 async function imageUriToPixelData(
   uri: string,
   targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
     const image = await Images.loadFromFileAsync(uri);
     const resized = image.resize(targetWidth, targetHeight);
 
-    // Get pixel data as ArrayBuffer (RGBA format)
-    const pixelData = resized.toRawPixelData();
+    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
+    const rawPixelData = resized.toRawPixelData();
     const buffer =
-      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
-
-    // Calculate actual buffer dimensions (accounts for device pixel ratio)
-    const bufferSize = buffer?.byteLength || 0;
-    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
-    const aspectRatio = targetWidth / targetHeight;
-    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
-    const actualWidth = totalPixels / actualHeight;
+      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
 
-    console.log('Requested:', targetWidth, 'x', targetHeight);
-    console.log('Buffer size:', bufferSize);
-    console.log(
-      'Actual dimensions:',
-      Math.round(actualWidth),
-      'x',
-      Math.round(actualHeight)
-    );
+    // Convert BGRA to RGB as required by the native API
+    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
 
     return {
-      data: buffer,
-      width: Math.round(actualWidth),
-      height: Math.round(actualHeight),
-      channels: 4, // RGBA
+      data: rgbBuffer,
+      width: targetWidth,
+      height: targetHeight,
+      channels: 3, // RGB
     };
   } catch (error) {
     console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
     if (imageUri && imageDimensions) {
       try {
         console.log('Converting image to pixel data...');
-        // Resize to 640x640 to avoid memory issues
-        const intermediateSize = 640;
+        // Use original dimensions - let the model resize internally
         const pixelData = await imageUriToPixelData(
           imageUri,
-          intermediateSize,
-          intermediateSize
+          imageDimensions.width,
+          imageDimensions.height
         );
 
         console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
         });
 
         // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.5);
+        const output = await ssdLite.forward(pixelData, 0.3);
         console.log('Pixel data result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
 
@@ -17,6 +17,7 @@
     "@react-navigation/native": "^7.1.6",
     "@shopify/react-native-skia": "2.2.12",
     "expo": "^54.0.27",
+    "expo-build-properties": "~1.0.10",
     "expo-constants": "~18.0.11",
     "expo-font": "~14.0.10",
     "expo-linking": "~8.0.10",
@@ -30,17 +31,19 @@
     "react-native-gesture-handler": "~2.28.0",
     "react-native-image-picker": "^7.2.2",
     "react-native-loading-spinner-overlay": "^3.0.1",
-    "react-native-reanimated": "~4.1.1",
+    "react-native-nitro-image": "0.10.2",
+    "react-native-nitro-modules": "0.33.4",
+    "react-native-reanimated": "~4.2.1",
     "react-native-safe-area-context": "~5.6.0",
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
-    "react-native-worklets": "0.5.1"
+    "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
     "@babel/core": "^7.25.2",
     "@types/pngjs": "^6.0.5",
-    "@types/react": "~19.1.10"
+    "@types/react": "~19.2.0"
   },
   "private": true
 }
@@ -360,6 +360,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
   return {runtime, bigInt};
 }
 
+inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    // JS numbers are doubles. Large uint64s > 2^53 will lose precision.
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
+inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
+                              jsi::Runtime &runtime) {
+  jsi::Array array(runtime, vec.size());
+  for (size_t i = 0; i < vec.size(); i++) {
+    array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
+  }
+  return {runtime, array};
+}
+
 inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
   return {runtime, val};
 }
 
@@ -158,14 +158,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
                                        "stream"));
     }
 
-    // Register generateFromFrame for all VisionModel subclasses
     if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
           "streamStop"));
     }
 
-    // Register generateFromPixels for models that support it
     if constexpr (meta::HasGenerateFromPixels<Model>) {
       addFunctions(
           JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
@@ -221,6 +219,66 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
   }
 
+  template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
+    // 1. Check Argument Count
+    // (We rely on our new FunctionTraits)
+    constexpr std::size_t cppArgCount =
+        meta::FunctionTraits<decltype(FnPtr)>::arity;
+
+    // We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value)
+    constexpr std::size_t expectedJsArgs = cppArgCount - 1;
+    log(LOG_LEVEL::Debug, cppArgCount, count);
+    if (count != expectedJsArgs) {
+      throw jsi::JSError(runtime, "Argument count mismatch in vision function");
+    }
+
+    try {
+      // 2. The Magic Trick
+      // We get a pointer to a dummy function: void dummy(Rest...) {}
+      // This function has exactly the signature of the arguments we want to
+      // parse.
+      auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
+
+      // 3. Let existing helpers do the work
+      // We pass the dummy pointer. The helper inspects its arguments (Rest...)
+      // and converts args[0]...args[N] accordingly.
+      // Note: We pass (args + 1) because JS args[0] is the PixelData, which we
+      // handle manually. Note: We use expectedJsArgs - 1 because we skipped one
+      // JS arg.
+      auto tailArgsTuple =
+          meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);
+
+      // 4. Invoke
+      using ReturnType =
+          typename meta::FunctionTraits<decltype(FnPtr)>::return_type;
+
+      if constexpr (std::is_void_v<ReturnType>) {
+        std::apply(
+            [&](auto &&...tailArgs) {
+              (model.get()->*FnPtr)(
+                  runtime,
+                  args[0], // 1. PixelData (Manually passed)
+                  std::forward<decltype(tailArgs)>(
+                      tailArgs)...); // 2. The rest (Auto parsed)
+            },
+            std::move(tailArgsTuple));
+        return jsi::Value::undefined();
+      } else {
+        auto result = std::apply(
+            [&](auto &&...tailArgs) {
+              return (model.get()->*FnPtr)(
+                  runtime, args[0],
+                  std::forward<decltype(tailArgs)>(tailArgs)...);
+            },
+            std::move(tailArgsTuple));
+
+        return jsi_conversion::getJsiValue(std::move(result), runtime);
+      }
+    } catch (const std::exception &e) {
+      throw jsi::JSError(runtime, e.what());
+    }
+  }
+
   // A generic host function that resolves a promise with a result of a
   // function. JSI arguments are converted to the types provided in the function
   // signature, and the return value is converted back to JSI before resolving.
 
@@ -1,4 +1,9 @@
 #include "VisionModel.h"
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
+#include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch {
@@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
 
 cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
                                        const jsi::Object &pixelData) const {
-  // Extract width, height, and channels
-  if (!pixelData.hasProperty(runtime, "width") ||
-      !pixelData.hasProperty(runtime, "height") ||
-      !pixelData.hasProperty(runtime, "channels") ||
-      !pixelData.hasProperty(runtime, "data")) {
-    throw std::runtime_error(
-        "Invalid pixel data: must contain width, height, channels, and data");
-  }
-
-  int width = pixelData.getProperty(runtime, "width").asNumber();
-  int height = pixelData.getProperty(runtime, "height").asNumber();
-  int channels = pixelData.getProperty(runtime, "channels").asNumber();
+  // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
+  // Use JSI conversion helper to extract the data
+  auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
 
-  // Get the ArrayBuffer
-  auto dataValue = pixelData.getProperty(runtime, "data");
-  if (!dataValue.isObject() ||
-      !dataValue.asObject(runtime).isArrayBuffer(runtime)) {
-    throw std::runtime_error(
-        "pixel data 'data' property must be an ArrayBuffer");
+  // Validate dimensions: sizes must be [height, width, channels]
+  if (tensorView.sizes.size() != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: sizes must have 3 elements "
+                  "[height, width, channels], got %zu",
+                  tensorView.sizes.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
-  size_t expectedSize = width * height * channels;
+  int height = tensorView.sizes[0];
+  int width = tensorView.sizes[1];
+  int channels = tensorView.sizes[2];
 
-  if (arrayBuffer.size(runtime) != expectedSize) {
-    throw std::runtime_error(
-        "ArrayBuffer size does not match width * height * channels");
+  // Pixel data must be RGB (3 channels) and BYTE type
+  if (channels != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: expected 3 channels (RGB), got %d",
+                  channels);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  // Create cv::Mat and copy the data
-  // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
-  // preprocessFrame handle conversion
-  int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
-  cv::Mat image(height, width, cvType);
+  if (tensorView.scalarType != ScalarType::Byte) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
+  }
 
-  // Copy data from ArrayBuffer to cv::Mat
-  std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
+  // Create cv::Mat directly from dataPtr (zero-copy view)
+  uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
+  cv::Mat image(height, width, CV_8UC3, dataPtr);
 
-  return image;
+  // Clone to own the data, since JS memory may be GC'd
+  return image.clone();
 }
 
 } // namespace models