software-mansion
diff --git a/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 2 additions & 0 deletions b/‎apps/computer-vision/app/object_detection/index.tsx‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎apps/computer-vision/app/vision_camera/index.tsx‎
Lines changed: 6 additions & 1 deletion b/‎apps/computer-vision/app/vision_camera/index.tsx‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx‎
Lines changed: 15 additions & 2 deletions b/‎apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp‎
Lines changed: 113 additions & 45 deletions b/‎packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp‎
Lines changed: 113 additions & 45 deletions
@@ -7,6 +7,7 @@ import {
   useObjectDetection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   ObjectDetectionModelSources,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image } from 'react-native';
@@ -18,6 +19,7 @@ import ScreenWrapper from '../../ScreenWrapper';
 const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'RF-DeTR Nano', value: RF_DETR_NANO },
   { label: 'SSDLite MobileNet', value: SSDLITE_320_MOBILENET_V3_LARGE },
+  { label: 'YOLO26N', value: YOLO26N },
 ];
 
 export default function ObjectDetectionScreen() {
 
@@ -46,6 +46,7 @@ type ModelId =
   | 'classification'
   | 'objectDetectionSsdlite'
   | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n'
   | 'segmentationDeeplabResnet50'
   | 'segmentationDeeplabResnet101'
   | 'segmentationDeeplabMobilenet'
@@ -95,6 +96,7 @@ const TASKS: Task[] = [
     variants: [
       { id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' },
       { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
+      { id: 'objectDetectionYolo26n', label: 'YOLO26N' },
     ],
   },
   {
@@ -241,7 +243,10 @@ export default function VisionCameraScreen() {
         <ObjectDetectionTask
           {...taskProps}
           activeModel={
-            activeModel as 'objectDetectionSsdlite' | 'objectDetectionRfdetr'
+            activeModel as
+              | 'objectDetectionSsdlite'
+              | 'objectDetectionRfdetr'
+              | 'objectDetectionYolo26n'
           }
         />
       )}
 
@@ -6,12 +6,16 @@ import {
   Detection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   useObjectDetection,
 } from 'react-native-executorch';
 import { labelColor, labelColorBg } from '../utils/colors';
 import { TaskProps } from './types';
 
-type ObjModelId = 'objectDetectionSsdlite' | 'objectDetectionRfdetr';
+type ObjModelId =
+  | 'objectDetectionSsdlite'
+  | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n';
 
 type Props = TaskProps & { activeModel: ObjModelId };
 
@@ -34,8 +38,17 @@ export default function ObjectDetectionTask({
     model: RF_DETR_NANO,
     preventLoad: activeModel !== 'objectDetectionRfdetr',
   });
+  const yolo26n = useObjectDetection({
+    model: YOLO26N,
+    preventLoad: activeModel !== 'objectDetectionYolo26n',
+  });
 
-  const active = activeModel === 'objectDetectionSsdlite' ? ssdlite : rfdetr;
+  const active =
+    activeModel === 'objectDetectionSsdlite'
+      ? ssdlite
+      : activeModel === 'objectDetectionRfdetr'
+        ? rfdetr
+        : yolo26n;
 
   const [detections, setDetections] = useState<Detection[]>([]);
   const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
 
@@ -1,6 +1,8 @@
 #include "ObjectDetection.h"
 #include "Constants.h"
 
+#include <set>
+
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/Log.h>
@@ -18,21 +20,6 @@ ObjectDetection::ObjectDetection(
     std::shared_ptr<react::CallInvoker> callInvoker)
     : VisionModel(modelSource, callInvoker),
       labelNames_(std::move(labelNames)) {
-  auto inputTensors = getAllInputShapes();
-  if (inputTensors.empty()) {
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            "Model seems to not take any input tensors.");
-  }
-  modelInputShape_ = inputTensors[0];
-  if (modelInputShape_.size() < 2) {
-    char errorMessage[100];
-    std::snprintf(errorMessage, sizeof(errorMessage),
-                  "Unexpected model input size, expected at least 2 dimensions "
-                  "but got: %zu.",
-                  modelInputShape_.size());
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            errorMessage);
-  }
   if (normMean.size() == 3) {
     normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]);
   } else if (!normMean.empty()) {
@@ -47,14 +34,65 @@ ObjectDetection::ObjectDetection(
   }
 }
 
+cv::Size ObjectDetection::modelInputSize() const {
+  if (currentlyLoadedMethod_.empty()) {
+    return VisionModel::modelInputSize();
+  }
+  auto inputShapes = getAllInputShapes(currentlyLoadedMethod_);
+  if (inputShapes.empty() || inputShapes[0].size() < 2) {
+    return VisionModel::modelInputSize();
+  }
+  const auto &shape = inputShapes[0];
+  return {static_cast<int>(shape[shape.size() - 2]),
+          static_cast<int>(shape[shape.size() - 1])};
+}
+
+void ObjectDetection::ensureMethodLoaded(const std::string &methodName) {
+  if (methodName.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "methodName cannot be empty");
+  }
+  if (currentlyLoadedMethod_ == methodName) {
+    return;
+  }
+  if (!module_) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Model module is not loaded");
+  }
+  if (!currentlyLoadedMethod_.empty()) {
+    module_->unload_method(currentlyLoadedMethod_);
+  }
+  auto loadResult = module_->load_method(methodName);
+  if (loadResult != executorch::runtime::Error::Ok) {
+    throw RnExecutorchError(
+        loadResult, "Failed to load method '" + methodName +
+                        "'. Ensure the method exists in the exported model.");
+  }
+  currentlyLoadedMethod_ = methodName;
+}
+
+std::set<int32_t> ObjectDetection::prepareAllowedClasses(
+    const std::vector<int32_t> &classIndices) const {
+  std::set<int32_t> allowedClasses;
+  if (!classIndices.empty()) {
+    allowedClasses.insert(classIndices.begin(), classIndices.end());
+  }
+  return allowedClasses;
+}
+
 std::vector<types::Detection>
 ObjectDetection::postprocess(const std::vector<EValue> &tensors,
-                             cv::Size originalSize, double detectionThreshold) {
+                             cv::Size originalSize, double detectionThreshold,
+                             double iouThreshold,
+                             const std::vector<int32_t> &classIndices) {
   const cv::Size inputSize = modelInputSize();
   float widthRatio = static_cast<float>(originalSize.width) / inputSize.width;
   float heightRatio =
       static_cast<float>(originalSize.height) / inputSize.height;
 
+  // Prepare allowed classes set for filtering
+  auto allowedClasses = prepareAllowedClasses(classIndices);
+
   std::vector<types::Detection> detections;
   auto bboxTensor = tensors.at(0).toTensor();
   std::span<const float> bboxes(
@@ -75,36 +113,62 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
     if (scores[i] < detectionThreshold) {
       continue;
     }
+
+    auto labelIdx = static_cast<int32_t>(labels[i]);
+
+    // Filter by class if classesOfInterest is specified
+    if (!allowedClasses.empty() &&
+        allowedClasses.find(labelIdx) == allowedClasses.end()) {
+      continue;
+    }
+
     float x1 = bboxes[i * 4] * widthRatio;
     float y1 = bboxes[i * 4 + 1] * heightRatio;
     float x2 = bboxes[i * 4 + 2] * widthRatio;
     float y2 = bboxes[i * 4 + 3] * heightRatio;
-    auto labelIdx = static_cast<std::size_t>(labels[i]);
-    if (labelIdx >= labelNames_.size()) {
+
+    if (static_cast<std::size_t>(labelIdx) >= labelNames_.size()) {
       throw RnExecutorchError(
           RnExecutorchErrorCode::InvalidConfig,
           "Model output class index " + std::to_string(labelIdx) +
               " exceeds labelNames size " + std::to_string(labelNames_.size()) +
               ". Ensure the labelMap covers all model output classes.");
     }
     detections.emplace_back(utils::computer_vision::BBox{x1, y1, x2, y2},
-                            labelNames_[labelIdx],
-                            static_cast<int32_t>(labelIdx), scores[i]);
+                            labelNames_[labelIdx], labelIdx, scores[i]);
   }
 
-  return utils::computer_vision::nonMaxSuppression(detections,
-                                                   constants::IOU_THRESHOLD);
+  return utils::computer_vision::nonMaxSuppression(detections, iouThreshold);
 }
 
-std::vector<types::Detection>
-ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::runInference(
+    cv::Mat image, double detectionThreshold, double iouThreshold,
+    const std::vector<int32_t> &classIndices, const std::string &methodName) {
   if (detectionThreshold < 0.0 || detectionThreshold > 1.0) {
     throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
                             "detectionThreshold must be in range [0, 1]");
   }
+  if (iouThreshold < 0.0 || iouThreshold > 1.0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "iouThreshold must be in range [0, 1]");
+  }
+
   std::scoped_lock lock(inference_mutex_);
 
+  // Ensure the correct method is loaded
+  ensureMethodLoaded(methodName);
+
   cv::Size originalSize = image.size();
+
+  // Query input shapes for the currently loaded method
+  auto inputShapes = getAllInputShapes(methodName);
+  if (inputShapes.empty() || inputShapes[0].size() < 2) {
+    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
+                            "Could not determine input shape for method: " +
+                                methodName);
+  }
+  modelInputShape_ = inputShapes[0];
+
   cv::Mat preprocessed = preprocess(image);
 
   auto inputTensor =
@@ -114,46 +178,50 @@ ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
           : image_processing::getTensorFromMatrix(modelInputShape_,
                                                   preprocessed);
 
-  auto forwardResult = BaseModel::forward(inputTensor);
-  if (!forwardResult.ok()) {
-    throw RnExecutorchError(forwardResult.error(),
-                            "The model's forward function did not succeed. "
-                            "Ensure the model input is correct.");
+  auto executeResult = execute(methodName, {inputTensor});
+  if (!executeResult.ok()) {
+    throw RnExecutorchError(executeResult.error(),
+                            "The model's " + methodName +
+                                " method did not succeed. "
+                                "Ensure the model input is correct.");
   }
 
-  return postprocess(forwardResult.get(), originalSize, detectionThreshold);
+  return postprocess(executeResult.get(), originalSize, detectionThreshold,
+                     iouThreshold, classIndices);
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromString(std::string imageSource,
-                                    double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::generateFromString(
+    std::string imageSource, double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat imageBGR = image_processing::readImage(imageSource);
 
   cv::Mat imageRGB;
   cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
 
-  return runInference(imageRGB, detectionThreshold);
+  return runInference(imageRGB, detectionThreshold, iouThreshold, classIndices,
+                      methodName);
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
-                                   const jsi::Value &frameData,
-                                   double detectionThreshold) {
-  auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData);
+std::vector<types::Detection> ObjectDetection::generateFromFrame(
+    jsi::Runtime &runtime, const jsi::Value &frameData,
+    double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat frame = extractFromFrame(runtime, frameData);
-  cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient);
-  auto detections = runInference(rotated, detectionThreshold);
+  auto detections = runInference(frame, detectionThreshold, iouThreshold,
+                                 classIndices, methodName);
+
   for (auto &det : detections) {
     ::rnexecutorch::utils::inverseRotateBbox(det.bbox, orient, rotated.size());
   }
   return detections;
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromPixels(JSTensorViewIn pixelData,
-                                    double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::generateFromPixels(
+    JSTensorViewIn pixelData, double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat image = extractFromPixels(pixelData);
 
-  return runInference(image, detectionThreshold);
+  return runInference(image, detectionThreshold, iouThreshold, classIndices,
+                      methodName);
 }
 } // namespace rnexecutorch::models::object_detection