software-mansion
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 20 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h‎
Lines changed: 139 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h‎
Lines changed: 139 additions & 0 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp‎
Lines changed: 1 addition & 1 deletion b/‎packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp‎
Lines changed: 142 additions & 0 deletions b/‎packages/react-native-executorch/common/rnexecutorch/utils/FrameProcessor.cpp‎
Lines changed: 142 additions & 0 deletions
@@ -0,0 +1,20 @@
+#include "VisionModel.h"
+#include <rnexecutorch/utils/FrameProcessor.h>
+
+namespace rnexecutorch {
+namespace models {
+
+using namespace facebook;
+
+cv::Mat VisionModel::extractAndPreprocess(jsi::Runtime &runtime,
+                                          const jsi::Value &frameData) const {
+  // Extract frame using FrameProcessor utility
+  auto frameObj = frameData.asObject(runtime);
+  cv::Mat frame = utils::FrameProcessor::extractFrame(runtime, frameObj);
+
+  // Apply model-specific preprocessing
+  return preprocessFrame(frame);
+}
+
+} // namespace models
+} // namespace rnexecutorch
@@ -0,0 +1,139 @@
+#pragma once
+
+#include <jsi/jsi.h>
+#include <mutex>
+#include <opencv2/opencv.hpp>
+#include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
+#include <rnexecutorch/models/BaseModel.h>
+
+namespace rnexecutorch {
+namespace models {
+
+/**
+ * @brief Base class for computer vision models that support real-time camera
+ * input
+ *
+ * VisionModel extends BaseModel with thread-safe inference and automatic frame
+ * extraction from VisionCamera. This class is designed for models that need to
+ * process camera frames in real-time (e.g., at 30fps).
+ *
+ * Thread Safety:
+ * - All inference operations are protected by a mutex
+ * - generateFromFrame() uses try_lock() to skip frames when the model is busy
+ * - This prevents blocking the camera thread and maintains smooth frame rates
+ *
+ * Usage:
+ * Subclasses should:
+ * 1. Inherit from VisionModel instead of BaseModel
+ * 2. Implement preprocessFrame() with model-specific preprocessing
+ * 3. Use inference_mutex_ when calling forward() in custom generate methods
+ * 4. Use lock_guard for blocking operations (JS API)
+ * 5. Use try_lock() for non-blocking operations (camera API)
+ *
+ * Example:
+ * @code
+ * class Classification : public VisionModel {
+ * public:
+ *   std::unordered_map<std::string_view, float>
+ *   generateFromFrame(jsi::Runtime& runtime, const jsi::Value& frameValue) {
+ *     // try_lock is handled automatically
+ *     auto frameObject = frameValue.asObject(runtime);
+ *     cv::Mat frame = FrameExtractor::extractFrame(runtime, frameObject);
+ *
+ *     // Lock before inference
+ *     if (!inference_mutex_.try_lock()) {
+ *       return {}; // Skip frame if busy
+ *     }
+ *     std::lock_guard<std::mutex> lock(inference_mutex_, std::adopt_lock);
+ *
+ *     auto preprocessed = preprocessFrame(frame);
+ *     // ... run inference
+ *   }
+ * };
+ * @endcode
+ */
+class VisionModel : public BaseModel {
+public:
+  /**
+   * @brief Construct a VisionModel with the same parameters as BaseModel
+   *
+   * VisionModel uses the same construction pattern as BaseModel, just adding
+   * thread-safety on top.
+   */
+  VisionModel(const std::string &modelSource,
+              std::shared_ptr<react::CallInvoker> callInvoker)
+      : BaseModel(modelSource, callInvoker) {}
+
+  /**
+   * @brief Virtual destructor for proper cleanup in derived classes
+   */
+  virtual ~VisionModel() = default;
+
+protected:
+  /**
+   * @brief Mutex to ensure thread-safe inference
+   *
+   * This mutex protects against race conditions when:
+   * - generateFromFrame() is called from VisionCamera worklet thread (30fps)
+   * - generate() is called from JavaScript thread simultaneously
+   *
+   * Usage guidelines:
+   * - Use std::lock_guard for blocking operations (JS API can wait)
+   * - Use try_lock() for non-blocking operations (camera should skip frames)
+   *
+   * @note Marked mutable to allow locking in const methods if needed
+   */
+  mutable std::mutex inference_mutex_;
+
+  /**
+   * @brief Preprocess a camera frame for model input
+   *
+   * This method should implement model-specific preprocessing such as:
+   * - Resizing to the model's expected input size
+   * - Color space conversion (e.g., BGR to RGB)
+   * - Normalization
+   * - Any other model-specific transformations
+   *
+   * @param frame Input frame from camera (already extracted and rotated by
+   * FrameExtractor)
+   * @return Preprocessed cv::Mat ready for tensor conversion
+   *
+   * @note The input frame is already in RGB format and rotated 90° clockwise
+   * @note This method is called under mutex protection in generateFromFrame()
+   */
+  virtual cv::Mat preprocessFrame(const cv::Mat &frame) const = 0;
+
+  /**
+   * @brief Extract and preprocess frame from VisionCamera in one call
+   *
+   * This is a convenience method that combines frame extraction and
+   * preprocessing. It handles both nativeBuffer (zero-copy) and ArrayBuffer
+   * paths automatically.
+   *
+   * @param runtime JSI runtime
+   * @param frameData JSI value containing frame data from VisionCamera
+   *
+   * @return Preprocessed cv::Mat ready for tensor conversion
+   *
+   * @throws std::runtime_error if frame extraction fails
+   *
+   * @note This method does NOT acquire the inference mutex - caller is
+   * responsible
+   * @note Typical usage:
+   * @code
+   *   cv::Mat preprocessed = extractAndPreprocess(runtime, frameData);
+   *   auto tensor = image_processing::getTensorFromMatrix(dims, preprocessed);
+   * @endcode
+   */
+  cv::Mat extractAndPreprocess(jsi::Runtime &runtime,
+                               const jsi::Value &frameData) const;
+};
+
+} // namespace models
+// Register VisionModel constructor traits
+// Even though VisionModel is abstract, the metaprogramming system needs to know
+// its constructor signature for derived classes
+REGISTER_CONSTRUCTOR(models::VisionModel, std::string,
+                     std::shared_ptr<react::CallInvoker>);
+
+} // namespace rnexecutorch
@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {
   return probs;
 }
 
-} // namespace rnexecutorch::models::classification
+} // namespace rnexecutorch::models::classification
@@ -0,0 +1,142 @@
+#include "FrameProcessor.h"
+#include "FrameExtractor.h"
+#include <rnexecutorch/Log.h>
+#include <stdexcept>
+
+namespace rnexecutorch {
+namespace utils {
+
+cv::Mat FrameProcessor::extractFrame(jsi::Runtime &runtime,
+                                     const jsi::Object &frameData) {
+  // Get frame dimensions
+  int width =
+      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
+  int height =
+      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
+
+  // Try zero-copy path first (nativeBuffer)
+  if (hasNativeBuffer(runtime, frameData)) {
+    static bool loggedPath = false;
+    if (!loggedPath) {
+      log(LOG_LEVEL::Debug, "FrameProcessor: Using zero-copy nativeBuffer");
+      loggedPath = true;
+    }
+
+    try {
+      return extractFromNativeBuffer(runtime, frameData, width, height);
+    } catch (const std::exception &e) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: nativeBuffer extraction failed: ", e.what());
+      log(LOG_LEVEL::Debug, "FrameProcessor: Falling back to ArrayBuffer");
+    }
+  }
+
+  // Fallback to ArrayBuffer path (with copy)
+  if (frameData.hasProperty(runtime, "data")) {
+    static bool loggedPath = false;
+    if (!loggedPath) {
+      log(LOG_LEVEL::Debug, "FrameProcessor: Using ArrayBuffer (with copy)");
+      loggedPath = true;
+    }
+
+    return extractFromArrayBuffer(runtime, frameData, width, height);
+  }
+
+  // No valid frame data source
+  throw std::runtime_error(
+      "FrameProcessor: No valid frame data (neither nativeBuffer nor data "
+      "property found)");
+}
+
+cv::Size FrameProcessor::getFrameSize(jsi::Runtime &runtime,
+                                      const jsi::Object &frameData) {
+  if (!frameData.hasProperty(runtime, "width") ||
+      !frameData.hasProperty(runtime, "height")) {
+    throw std::runtime_error("FrameProcessor: Frame data missing width or "
+                             "height property");
+  }
+
+  int width =
+      static_cast<int>(frameData.getProperty(runtime, "width").asNumber());
+  int height =
+      static_cast<int>(frameData.getProperty(runtime, "height").asNumber());
+
+  return cv::Size(width, height);
+}
+
+bool FrameProcessor::hasNativeBuffer(jsi::Runtime &runtime,
+                                     const jsi::Object &frameData) {
+  return frameData.hasProperty(runtime, "nativeBuffer");
+}
+
+cv::Mat FrameProcessor::extractFromNativeBuffer(jsi::Runtime &runtime,
+                                                const jsi::Object &frameData,
+                                                int width, int height) {
+  auto nativeBufferValue = frameData.getProperty(runtime, "nativeBuffer");
+
+  // Handle bigint pointer value from JavaScript
+  uint64_t bufferPtr = static_cast<uint64_t>(
+      nativeBufferValue.asBigInt(runtime).asUint64(runtime));
+
+  // Use FrameExtractor to get cv::Mat from platform-specific buffer
+  cv::Mat frame = FrameExtractor::extractFromNativeBuffer(bufferPtr);
+
+  // Validate extracted frame dimensions match expected
+  if (frame.cols != width || frame.rows != height) {
+    log(LOG_LEVEL::Debug, "FrameProcessor: Dimension mismatch - expected ",
+        width, "x", height, " but got ", frame.cols, "x", frame.rows);
+  }
+
+  return frame;
+}
+
+cv::Mat FrameProcessor::extractFromArrayBuffer(jsi::Runtime &runtime,
+                                               const jsi::Object &frameData,
+                                               int width, int height) {
+  auto pixelData = frameData.getProperty(runtime, "data");
+  auto arrayBuffer = pixelData.asObject(runtime).getArrayBuffer(runtime);
+  uint8_t *data = arrayBuffer.data(runtime);
+  size_t bufferSize = arrayBuffer.size(runtime);
+
+  // Determine format based on buffer size
+  size_t stride = bufferSize / height;
+  size_t expectedRGBAStride = width * 4;
+  size_t expectedRGBStride = width * 3;
+
+  cv::Mat frame;
+
+  if (stride == expectedRGBAStride || bufferSize >= width * height * 4) {
+    // RGBA format with potential padding
+    frame = cv::Mat(height, width, CV_8UC4, data, stride);
+
+    static bool loggedFormat = false;
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: ArrayBuffer format is RGBA, "
+          "stride: ",
+          stride);
+      loggedFormat = true;
+    }
+  } else if (stride >= expectedRGBStride) {
+    // RGB format
+    frame = cv::Mat(height, width, CV_8UC3, data, stride);
+
+    static bool loggedFormat = false;
+    if (!loggedFormat) {
+      log(LOG_LEVEL::Debug,
+          "FrameProcessor: ArrayBuffer format is RGB, stride: ", stride);
+      loggedFormat = true;
+    }
+  } else {
+    throw std::runtime_error(
+        "FrameProcessor: Unexpected buffer size - expected " +
+        std::to_string(expectedRGBStride) + " or " +
+        std::to_string(expectedRGBAStride) + " bytes per row, got " +
+        std::to_string(stride));
+  }
+
+  return frame;
+}
+
+} // namespace utils
+} // namespace rnexecutorch
Original file line number	Diff line number	Diff line change
`@@ -73,4 +73,4 @@ Classification::postprocess(const Tensor &tensor) {`
`73`	`73`	`return probs;`
`74`	`74`	`}`
`75`	`75`
`76`		`-} // namespace rnexecutorch::models::classification`
	`76`	`+} // namespace rnexecutorch::models::classification`