software-mansion
diff --git a/‎apps/computer-vision/app/camera_object_detection/index.tsx‎
Lines changed: 384 additions & 265 deletions b/‎apps/computer-vision/app/camera_object_detection/index.tsx‎
Lines changed: 384 additions & 265 deletions
diff --git a/‎apps/computer-vision/app/index.tsx‎
Lines changed: 2 additions & 2 deletions b/‎apps/computer-vision/app/index.tsx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 33 additions & 28 deletions b/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 33 additions & 28 deletions
diff --git a/‎apps/computer-vision/package.json‎
Lines changed: 3 additions & 4 deletions b/‎apps/computer-vision/package.json‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 0 additions & 5 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 1 addition & 3 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 38 additions & 31 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 38 additions & 31 deletions
@@ -53,12 +53,12 @@ export default function Home() {
         >
           <Text style={styles.buttonText}>Image Generation</Text>
         </TouchableOpacity>
-        <TouchableOpacity
+        {/* <TouchableOpacity
           style={[styles.button, styles.cameraButton]}
           onPress={() => router.navigate('camera_object_detection/')}
         >
           <Text style={styles.buttonText}>🎥 Camera Object Detection</Text>
-        </TouchableOpacity>
+        </TouchableOpacity> */}
       </View>
     </View>
   );
 
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
 import ColorPalette from '../../colors';
 import { Images } from 'react-native-nitro-image';
 
-// Helper function to convert image URI to raw pixel data using NitroImage
+// Helper function to convert BGRA to RGB
+function convertBGRAtoRGB(
+  buffer: ArrayBuffer,
+  width: number,
+  height: number
+): ArrayBuffer {
+  const source = new Uint8Array(buffer);
+  const rgb = new Uint8Array(width * height * 3);
+
+  for (let i = 0; i < width * height; i++) {
+    // BGRA format: [B, G, R, A] → RGB: [R, G, B]
+    rgb[i * 3 + 0] = source[i * 4 + 2]; // R
+    rgb[i * 3 + 1] = source[i * 4 + 1]; // G
+    rgb[i * 3 + 2] = source[i * 4 + 0]; // B
+  }
+
+  return rgb.buffer;
+}
+
+// Helper function to convert image URI to raw RGB pixel data
 async function imageUriToPixelData(
   uri: string,
   targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
     const image = await Images.loadFromFileAsync(uri);
     const resized = image.resize(targetWidth, targetHeight);
 
-    // Get pixel data as ArrayBuffer (RGBA format)
-    const pixelData = resized.toRawPixelData();
+    // Get pixel data as ArrayBuffer (BGRA format from NitroImage)
+    const rawPixelData = resized.toRawPixelData();
     const buffer =
-      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
-
-    // Calculate actual buffer dimensions (accounts for device pixel ratio)
-    const bufferSize = buffer?.byteLength || 0;
-    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
-    const aspectRatio = targetWidth / targetHeight;
-    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
-    const actualWidth = totalPixels / actualHeight;
+      rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
 
-    console.log('Requested:', targetWidth, 'x', targetHeight);
-    console.log('Buffer size:', bufferSize);
-    console.log(
-      'Actual dimensions:',
-      Math.round(actualWidth),
-      'x',
-      Math.round(actualHeight)
-    );
+    // Convert BGRA to RGB as required by the native API
+    const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
 
     return {
-      data: buffer,
-      width: Math.round(actualWidth),
-      height: Math.round(actualHeight),
-      channels: 4, // RGBA
+      data: rgbBuffer,
+      width: targetWidth,
+      height: targetHeight,
+      channels: 3, // RGB
     };
   } catch (error) {
     console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
     if (imageUri && imageDimensions) {
       try {
         console.log('Converting image to pixel data...');
-        // Resize to 640x640 to avoid memory issues
-        const intermediateSize = 640;
+        // Use original dimensions - let the model resize internally
         const pixelData = await imageUriToPixelData(
           imageUri,
-          intermediateSize,
-          intermediateSize
+          imageDimensions.width,
+          imageDimensions.height
         );
 
         console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
         });
 
         // Run inference using unified forward() API
-        const output = await ssdLite.forward(pixelData, 0.5);
+        const output = await ssdLite.forward(pixelData, 0.3);
         console.log('Pixel data result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
 
@@ -31,15 +31,14 @@
     "react-native-gesture-handler": "~2.28.0",
     "react-native-image-picker": "^7.2.2",
     "react-native-loading-spinner-overlay": "^3.0.1",
+    "react-native-nitro-image": "0.10.2",
+    "react-native-nitro-modules": "0.33.4",
     "react-native-reanimated": "~4.2.1",
     "react-native-safe-area-context": "~5.6.0",
     "react-native-screens": "~4.16.0",
     "react-native-svg": "15.12.1",
     "react-native-svg-transformer": "^1.5.0",
-    "react-native-vision-camera": "4.7.3",
-    "react-native-worklets": "^0.7.2",
-    "react-native-worklets-core": "^1.6.2",
-    "vision-camera-resize-plugin": "^3.2.0"
+    "react-native-worklets": "^0.7.2"
   },
   "devDependencies": {
     "@babel/core": "^7.25.2",
 
@@ -223,7 +223,6 @@ getValue<std::vector<int64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
   return getArrayAsVector<int64_t>(val, runtime);
 }
 
-// ✅ Fix: Add support for uint64_t vectors (fixes Undefined Symbol error)
 template <>
 inline std::vector<uint64_t>
 getValue<std::vector<uint64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
@@ -285,7 +284,6 @@ inline std::span<int64_t> getValue<std::span<int64_t>>(const jsi::Value &val,
   return getTypedArrayAsSpan<int64_t>(val, runtime);
 }
 
-// ✅ Fix: Add support for uint64_t spans (fixes Undefined Symbol error)
 template <>
 inline std::span<uint64_t>
 getValue<std::span<uint64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
@@ -328,8 +326,6 @@ inline jsi::Value getJsiValue(const std::vector<char> &vec,
   return {runtime, array};
 }
 
-// ✅ Fix: Add support for uint64_t (unsigned long long) vectors
-// This fixes the error in TokenizerModule::encode/decode
 inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
                               jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());
@@ -340,7 +336,6 @@ inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
   return {runtime, array};
 }
 
-// ✅ Fix: Add support for int64_t vectors
 inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
                               jsi::Runtime &runtime) {
   jsi::Array array(runtime, vec.size());
 
@@ -158,14 +158,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
       //     "setFixedModel"));
     }
 
-    // Register generateFromFrame for all VisionModel subclasses
     if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
           "generateFromFrame"));
     }
 
-    // Register generateFromPixels for models that support it
     if constexpr (meta::HasGenerateFromPixels<Model>) {
       addFunctions(
           JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
@@ -235,7 +233,7 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
     }
 
     try {
-      // 2. The Magic Trick 🪄
+      // 2. The Magic Trick
       // We get a pointer to a dummy function: void dummy(Rest...) {}
       // This function has exactly the signature of the arguments we want to
       // parse.
 
@@ -1,4 +1,9 @@
 #include "VisionModel.h"
+#include <rnexecutorch/Error.h>
+#include <rnexecutorch/ErrorCodes.h>
+#include <rnexecutorch/Log.h>
+#include <rnexecutorch/host_objects/JSTensorViewIn.h>
+#include <rnexecutorch/host_objects/JsiConversions.h>
 #include <rnexecutorch/utils/FrameProcessor.h>
 
 namespace rnexecutorch {
@@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
 
 cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
                                        const jsi::Object &pixelData) const {
-  // Extract width, height, and channels
-  if (!pixelData.hasProperty(runtime, "width") ||
-      !pixelData.hasProperty(runtime, "height") ||
-      !pixelData.hasProperty(runtime, "channels") ||
-      !pixelData.hasProperty(runtime, "data")) {
-    throw std::runtime_error(
-        "Invalid pixel data: must contain width, height, channels, and data");
-  }
-
-  int width = pixelData.getProperty(runtime, "width").asNumber();
-  int height = pixelData.getProperty(runtime, "height").asNumber();
-  int channels = pixelData.getProperty(runtime, "channels").asNumber();
+  // PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
+  // Use JSI conversion helper to extract the data
+  auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
 
-  // Get the ArrayBuffer
-  auto dataValue = pixelData.getProperty(runtime, "data");
-  if (!dataValue.isObject() ||
-      !dataValue.asObject(runtime).isArrayBuffer(runtime)) {
-    throw std::runtime_error(
-        "pixel data 'data' property must be an ArrayBuffer");
+  // Validate dimensions: sizes must be [height, width, channels]
+  if (tensorView.sizes.size() != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: sizes must have 3 elements "
+                  "[height, width, channels], got %zu",
+                  tensorView.sizes.size());
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
-  size_t expectedSize = width * height * channels;
+  int height = tensorView.sizes[0];
+  int width = tensorView.sizes[1];
+  int channels = tensorView.sizes[2];
 
-  if (arrayBuffer.size(runtime) != expectedSize) {
-    throw std::runtime_error(
-        "ArrayBuffer size does not match width * height * channels");
+  // Pixel data must be RGB (3 channels) and BYTE type
+  if (channels != 3) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Invalid pixel data: expected 3 channels (RGB), got %d",
+                  channels);
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            errorMessage);
   }
 
-  // Create cv::Mat and copy the data
-  // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
-  // preprocessFrame handle conversion
-  int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
-  cv::Mat image(height, width, cvType);
+  if (tensorView.scalarType != ScalarType::Byte) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidUserInput,
+        "Invalid pixel data: scalarType must be BYTE (Uint8Array)");
+  }
 
-  // Copy data from ArrayBuffer to cv::Mat
-  std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
+  // Create cv::Mat directly from dataPtr (zero-copy view)
+  uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
+  cv::Mat image(height, width, CV_8UC3, dataPtr);
 
-  return image;
+  // Clone to own the data, since JS memory may be GC'd
+  return image.clone();
 }
 
 } // namespace models