software-mansion
diff --git a/‎.cspell-wordlist.txt‎
Lines changed: 2 additions & 1 deletion b/‎.cspell-wordlist.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 159 additions & 8 deletions b/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 159 additions & 8 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h‎
Lines changed: 10 additions & 2 deletions b/‎packages/react-native-executorch/common/rnexecutorch/RnExecutorchInstaller.h‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 17 additions & 4 deletions b/‎packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h‎
Lines changed: 17 additions & 4 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h‎
Lines changed: 7 additions & 2 deletions b/‎packages/react-native-executorch/common/rnexecutorch/metaprogramming/TypeConcepts.h‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 45 additions & 2 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 45 additions & 2 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h‎
Lines changed: 39 additions & 3 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h‎
Lines changed: 39 additions & 3 deletions
@@ -111,4 +111,5 @@ logprob
 RNFS
 pogodin
 kesha
-antonov
+antonov
+worklet
@@ -1,16 +1,66 @@
 import Spinner from '../../components/Spinner';
-import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   Detection,
   useObjectDetection,
   SSDLITE_320_MOBILENET_V3_LARGE,
 } from 'react-native-executorch';
-import { View, StyleSheet, Image } from 'react-native';
+import { View, StyleSheet, Image, TouchableOpacity, Text } from 'react-native';
 import ImageWithBboxes from '../../components/ImageWithBboxes';
 import React, { useContext, useEffect, useState } from 'react';
 import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
+import ColorPalette from '../../colors';
+import { Images } from 'react-native-nitro-image';
+
+// Helper function to convert image URI to raw pixel data using NitroImage
+async function imageUriToPixelData(
+  uri: string,
+  targetWidth: number,
+  targetHeight: number
+): Promise<{
+  data: ArrayBuffer;
+  width: number;
+  height: number;
+  channels: number;
+}> {
+  try {
+    // Load image and resize to target dimensions
+    const image = await Images.loadFromFileAsync(uri);
+    const resized = image.resize(targetWidth, targetHeight);
+
+    // Get pixel data as ArrayBuffer (RGBA format)
+    const pixelData = resized.toRawPixelData();
+    const buffer =
+      pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
+
+    // Calculate actual buffer dimensions (accounts for device pixel ratio)
+    const bufferSize = buffer?.byteLength || 0;
+    const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
+    const aspectRatio = targetWidth / targetHeight;
+    const actualHeight = Math.sqrt(totalPixels / aspectRatio);
+    const actualWidth = totalPixels / actualHeight;
+
+    console.log('Requested:', targetWidth, 'x', targetHeight);
+    console.log('Buffer size:', bufferSize);
+    console.log(
+      'Actual dimensions:',
+      Math.round(actualWidth),
+      'x',
+      Math.round(actualHeight)
+    );
+
+    return {
+      data: buffer,
+      width: Math.round(actualWidth),
+      height: Math.round(actualHeight),
+      channels: 4, // RGBA
+    };
+  } catch (error) {
+    console.error('Error loading image with NitroImage:', error);
+    throw error;
+  }
+}
 
 export default function ObjectDetectionScreen() {
   const [imageUri, setImageUri] = useState('');
@@ -42,10 +92,41 @@ export default function ObjectDetectionScreen() {
   const runForward = async () => {
     if (imageUri) {
       try {
-        const output = await ssdLite.forward(imageUri);
+        console.log('Running forward with string URI...');
+        const output = await ssdLite.forward(imageUri, 0.5);
+        console.log('String URI result:', output.length, 'detections');
         setResults(output);
       } catch (e) {
-        console.error(e);
+        console.error('Error in runForward:', e);
+      }
+    }
+  };
+
+  const runForwardPixels = async () => {
+    if (imageUri && imageDimensions) {
+      try {
+        console.log('Converting image to pixel data...');
+        // Resize to 640x640 to avoid memory issues
+        const intermediateSize = 640;
+        const pixelData = await imageUriToPixelData(
+          imageUri,
+          intermediateSize,
+          intermediateSize
+        );
+
+        console.log('Running forward with pixel data...', {
+          width: pixelData.width,
+          height: pixelData.height,
+          channels: pixelData.channels,
+          dataSize: pixelData.data.byteLength,
+        });
+
+        // Run inference using unified forward() API
+        const output = await ssdLite.forward(pixelData, 0.5);
+        console.log('Pixel data result:', output.length, 'detections');
+        setResults(output);
+      } catch (e) {
+        console.error('Error in runForwardPixels:', e);
       }
     }
   };
@@ -81,10 +162,41 @@ export default function ObjectDetectionScreen() {
           )}
         </View>
       </View>
-      <BottomBar
-        handleCameraPress={handleCameraPress}
-        runForward={runForward}
-      />
+
+      {/* Custom bottom bar with two buttons */}
+      <View style={styles.bottomContainer}>
+        <View style={styles.bottomIconsContainer}>
+          <TouchableOpacity onPress={() => handleCameraPress(false)}>
+            <Text style={styles.iconText}>📷 Gallery</Text>
+          </TouchableOpacity>
+        </View>
+
+        <View style={styles.buttonsRow}>
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForward}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (String)</Text>
+          </TouchableOpacity>
+
+          <TouchableOpacity
+            style={[
+              styles.button,
+              styles.halfButton,
+              !imageUri && styles.buttonDisabled,
+            ]}
+            onPress={runForwardPixels}
+            disabled={!imageUri}
+          >
+            <Text style={styles.buttonText}>Run (Pixels)</Text>
+          </TouchableOpacity>
+        </View>
+      </View>
     </ScreenWrapper>
   );
 }
@@ -129,4 +241,43 @@ const styles = StyleSheet.create({
     width: '100%',
     height: '100%',
   },
+  bottomContainer: {
+    width: '100%',
+    gap: 15,
+    alignItems: 'center',
+    padding: 16,
+    flex: 1,
+  },
+  bottomIconsContainer: {
+    flexDirection: 'row',
+    justifyContent: 'center',
+    width: '100%',
+  },
+  iconText: {
+    fontSize: 16,
+    color: ColorPalette.primary,
+  },
+  buttonsRow: {
+    flexDirection: 'row',
+    width: '100%',
+    gap: 10,
+  },
+  button: {
+    height: 50,
+    justifyContent: 'center',
+    alignItems: 'center',
+    backgroundColor: ColorPalette.primary,
+    color: '#fff',
+    borderRadius: 8,
+  },
+  halfButton: {
+    flex: 1,
+  },
+  buttonDisabled: {
+    opacity: 0.5,
+  },
+  buttonText: {
+    color: '#fff',
+    fontSize: 16,
+  },
 });
@@ -54,8 +54,16 @@ class RnExecutorchInstaller {
                 meta::createConstructorArgsWithCallInvoker<ModelT>(
                     args, runtime, jsCallInvoker);
 
-            auto modelImplementationPtr = std::make_shared<ModelT>(
-                std::make_from_tuple<ModelT>(constructorArgs));
+            // This unpacks the tuple and calls the constructor directly inside
+            // make_shared. It avoids creating a temporary object, so no
+            // move/copy is required.
+            auto modelImplementationPtr = std::apply(
+                [](auto &&...unpackedArgs) {
+                  return std::make_shared<ModelT>(
+                      std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
+                },
+                std::move(constructorArgs));
+
             auto modelHostObject = std::make_shared<ModelHostObject<ModelT>>(
                 modelImplementationPtr, jsCallInvoker);
 
 
@@ -45,10 +45,11 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
           "getInputShape"));
     }
 
-    if constexpr (meta::HasGenerate<Model>) {
-      addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
-                                       promiseHostFunction<&Model::generate>,
-                                       "generate"));
+    if constexpr (meta::HasGenerateFromString<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              promiseHostFunction<&Model::generateFromString>,
+                              "generateFromString"));
     }
 
     if constexpr (meta::HasEncode<Model>) {
@@ -155,10 +156,22 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
       addFunctions(JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
                                        promiseHostFunction<&Model::stream>,
                                        "stream"));
+    }
+
+    // Register generateFromFrame for all VisionModel subclasses
+    if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
       addFunctions(JSI_EXPORT_FUNCTION(
           ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
           "streamStop"));
     }
+
+    // Register generateFromPixels for models that support it
+    if constexpr (meta::HasGenerateFromPixels<Model>) {
+      addFunctions(
+          JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
+                              visionHostFunction<&Model::generateFromPixels>,
+                              "generateFromPixels"));
+    }
   }
 
   // A generic host function that runs synchronously, works analogously to the
 
@@ -12,8 +12,13 @@ template <typename T, typename Base>
 concept SameAs = std::is_same_v<Base, T>;
 
 template <typename T>
-concept HasGenerate = requires(T t) {
-  { &T::generate };
+concept HasGenerateFromString = requires(T t) {
+  { &T::generateFromString };
+};
+
+template <typename T>
+concept HasGenerateFromPixels = requires(T t) {
+  { &T::generateFromPixels };
 };
 
 template <typename T>
 
@@ -6,8 +6,8 @@ namespace models {
 
 using namespace facebook;
 
-cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
-                                          const jsi::Value &frameData) const {
+cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
+                                      const jsi::Value &frameData) const {
   // Extract frame using FrameProcessor utility
   auto frameObj = frameData.asObject(runtime);
   cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj);
@@ -16,5 +16,48 @@ cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
   return preprocessFrame(frame);
 }
 
+cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
+                                       const jsi::Object &pixelData) const {
+  // Extract width, height, and channels
+  if (!pixelData.hasProperty(runtime, "width") ||
+      !pixelData.hasProperty(runtime, "height") ||
+      !pixelData.hasProperty(runtime, "channels") ||
+      !pixelData.hasProperty(runtime, "data")) {
+    throw std::runtime_error(
+        "Invalid pixel data: must contain width, height, channels, and data");
+  }
+
+  int width = pixelData.getProperty(runtime, "width").asNumber();
+  int height = pixelData.getProperty(runtime, "height").asNumber();
+  int channels = pixelData.getProperty(runtime, "channels").asNumber();
+
+  // Get the ArrayBuffer
+  auto dataValue = pixelData.getProperty(runtime, "data");
+  if (!dataValue.isObject() ||
+      !dataValue.asObject(runtime).isArrayBuffer(runtime)) {
+    throw std::runtime_error(
+        "pixel data 'data' property must be an ArrayBuffer");
+  }
+
+  auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
+  size_t expectedSize = width * height * channels;
+
+  if (arrayBuffer.size(runtime) != expectedSize) {
+    throw std::runtime_error(
+        "ArrayBuffer size does not match width * height * channels");
+  }
+
+  // Create cv::Mat and copy the data
+  // OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
+  // preprocessFrame handle conversion
+  int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
+  cv::Mat image(height, width, cvType);
+
+  // Copy data from ArrayBuffer to cv::Mat
+  std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
+
+  return image;
+}
+
 } // namespace models
 } // namespace rnexecutorch
@@ -121,12 +121,48 @@ class VisionModel : public BaseModel {
    * responsible
    * @note Typical usage:
    * @code
-   *   cv::Mat preprocessed = extractAndPreprocess(runtime, frameData);
+   *   cv::Mat preprocessed = extractFromFrame(runtime, frameData);
    *   auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed);
    * @endcode
    */
-  cv::Mat extractAndPreprocess(jsi::Runtime &runtime,
-                               const jsi::Value &frameData) const;
+  cv::Mat extractFromFrame(jsi::Runtime &runtime,
+                           const jsi::Value &frameData) const;
+
+  /**
+   * @brief Extract cv::Mat from raw pixel data (ArrayBuffer) sent from
+   * JavaScript
+   *
+   * This method enables users to run inference on raw pixel data without file
+   * I/O. Useful for processing images already in memory (e.g., from canvas,
+   * image library).
+   *
+   * @param runtime JSI runtime
+   * @param pixelData JSI object containing:
+   *                  - data: ArrayBuffer with raw pixel values
+   *                  - width: number - image width
+   *                  - height: number - image height
+   *                  - channels: number - number of channels (3 for RGB, 4 for
+   * RGBA)
+   *
+   * @return cv::Mat containing the pixel data
+   *
+   * @throws std::runtime_error if pixelData format is invalid
+   *
+   * @note The returned cv::Mat owns a copy of the data
+   * @note Expected pixel format: RGB or RGBA, row-major order
+   * @note Typical usage from JS:
+   * @code
+   *   const pixels = new Uint8Array([...]);  // Raw pixel data
+   *   const result = model.generateFromPixels({
+   *     data: pixels.buffer,
+   *     width: 640,
+   *     height: 480,
+   *     channels: 3
+   *   }, 0.5);
+   * @endcode
+   */
+  cv::Mat extractFromPixels(jsi::Runtime &runtime,
+                            const jsi::Object &pixelData) const;
 };
 
 } // namespace models
-Original file line number
+Diff line change
 RNFS
 pogodin
 kesha
 -antonov
 +antonov
 +worklet