Comments

daniil-lyakhov · daniil-lyakhov · commit 34e9377890ca · 2026-03-31T12:55:06.000+02:00
diff --git a/.ci/scripts/test_yolo26.sh b/.ci/scripts/test_yolo26.sh
@@ -167,9 +167,14 @@ prepare_artifacts_upload() {
 
 
 # Export model.
-EXPORTED_MODEL_NAME="${MODEL_NAME}_fp32_${MODE}.pte"
-echo "Exporting ${EXPORTED_MODEL_NAME}"
 EXPORT_ARGS="--model_name=${MODEL_NAME} --backend=${MODE}"
+if [[ -n "${PT2E_QUANTIZE}" ]]; then
+  EXPORTED_MODEL_NAME="${MODEL_NAME}_int8_${MODE}.pte"
+  EXPORT_ARGS="${EXPORT_ARGS} --quantize --video_path=${VIDEO_PATH}"
+else
+  EXPORTED_MODEL_NAME="${MODEL_NAME}_fp32_${MODE}.pte"
+fi
+echo "Exporting ${EXPORTED_MODEL_NAME}"
 
 # Add dynamically linked library location
 cmake_install_executorch_libraries
diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
@@ -192,14 +192,18 @@ def set_ignored_scope(
         :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match
           in the model graph.
         """
+        subgraphs_ = []
         if subgraphs:
-            subgraphs = [nncf.Subgraph(inputs=subgraph[0], outputs=subgraph[1]) for subgraph in subgraphs]
+            subgraphs_ = [
+                nncf.Subgraph(inputs=subgraph[0], outputs=subgraph[1])
+                for subgraph in subgraphs
+            ]
         self._algo.set_ignored_scope(
             nncf.IgnoredScope(
                 names=names or [],
                 patterns=patterns or [],
                 types=types or [],
-                subgraphs=subgraphs or [],
+                subgraphs=subgraphs_,
                 validate=validate,
             )
         )
diff --git a/docs/source/success-stories.md b/docs/source/success-stories.md
@@ -156,7 +156,7 @@ Automate PyTorch model deployment to iOS, Android, and edge devices with ExecuTo
 
 - **LoRA adapter** - Export two LoRA adapters that share a single foundation weight file, saving memory and disk space. [Try →](https://github.com/meta-pytorch/executorch-examples/tree/main/program-data-separation/cpp/lora_example)
 
-- **OpenVINO from Intel** - Deploy [Yolo12](https://github.com/pytorch/executorch/tree/main/examples/models/yolo12), [Llama](https://github.com/pytorch/executorch/tree/main/examples/openvino/llama), and [Stable Diffusion](https://github.com/pytorch/executorch/tree/main/examples/openvino/stable_diffusion) on [OpenVINO from Intel](https://www.intel.com/content/www/us/en/developer/articles/community/optimizing-executorch-on-ai-pcs.html).
+- **OpenVINO from Intel** - Deploy [Yolo26](https://github.com/pytorch/executorch/tree/main/examples/models/yolo26), [Llama](https://github.com/pytorch/executorch/tree/main/examples/openvino/llama), and [Stable Diffusion](https://github.com/pytorch/executorch/tree/main/examples/openvino/stable_diffusion) on [OpenVINO from Intel](https://www.intel.com/content/www/us/en/developer/articles/community/optimizing-executorch-on-ai-pcs.html).
 
 - **Audio Generation** - Generate audio from text prompts using Stable Audio Open Small on Arm CPUs with XNNPACK and KleidiAI. [Try →](https://github.com/Arm-Examples/ML-examples/tree/main/kleidiai-examples/audiogen-et) • [Video →](https://www.youtube.com/watch?v=q2P0ESVxhAY) <!-- @lint-ignore -->
 
diff --git a/examples/models/yolo26/README.md b/examples/models/yolo26/README.md
@@ -46,8 +46,6 @@ XNNPACK:
 python export_and_validate.py --model_name yolo26s --input_dims=[1920,1080] --backend xnnpack
 ```
 
-> **_NOTE:_**  Quantization for XNNPACK backend is WIP. Please refere to <https://github.com/pytorch/executorch/issues/11523> for more details.
-
 Exported model could be validated using the `--validate` key:
 
 ```bash
diff --git a/examples/models/yolo26/export_and_validate.py b/examples/models/yolo26/export_and_validate.py
@@ -76,10 +76,10 @@ def lower_to_openvino(
     subset_size: int,
     quantize: bool,
 ) -> ExecutorchProgramManager:
+    import nncf
     from executorch.backends.openvino.partitioner import OpenvinoPartitioner
     from executorch.backends.openvino.quantizer import OpenVINOQuantizer
     from executorch.backends.openvino.quantizer.quantizer import QuantizationMode
-    import nncf
     from nncf.experimental.torch.fx import quantize_pt2e
 
     if quantize:
@@ -269,7 +269,7 @@ def transform_fn(frame):
     if val_dataset_yaml_path is not None:
         if input_dims != [640, 640]:
             raise NotImplementedError(
-                f"Validation with the custom input shape {input_dims} is not implmenented."
+                f"Validation with the custom input shape {input_dims} is not implemented."
                 " Please use the default --input_dims=[640,640] for the validation."
             )
         stats = validate_yolo(model, exec_prog, val_dataset_yaml_path)
@@ -288,7 +288,6 @@ def _prepare_validation(
     }  # highest priority args on the right
 
     validator = model._smart_load("validator")(args=args, _callbacks=model.callbacks)
-    validator.device = torch.device("cpu")
     stride = 32  # default stride
     validator.stride = stride  # used in get_dataloader() for padding
     validator.data = check_det_dataset(dataset_yaml_path)
diff --git a/examples/models/yolo26/inference.h b/examples/models/yolo26/inference.h
@@ -32,7 +32,7 @@ struct DetectionConfig {
 };
 
 cv::Mat scale_with_padding(
-    cv::Mat& source,
+    const cv::Mat& source,
     int* pad_x,
     int* pad_y,
     float* scale,
@@ -41,13 +41,16 @@ cv::Mat scale_with_padding(
   int row = source.rows;
   int m_inputWidth = img_dims.width;
   int m_inputHeight = img_dims.height;
-  if (col == m_inputWidth and row == m_inputHeight) {
+  if (col == m_inputWidth && row == m_inputHeight) {
+    *pad_x = 0;
+    *pad_y = 0;
+    *scale = 1.f;
     return source;
   }
 
-  *scale = std::min(m_inputWidth / (float)col, m_inputHeight / (float)row);
-  int resized_w = col * *scale;
-  int resized_h = row * *scale;
+  *scale = std::min(m_inputWidth / static_cast<float>(col), m_inputHeight / static_cast<float>(row));
+  int resized_w = static_cast<int>(col * *scale);
+  int resized_h = static_cast<int>(row * *scale);
   *pad_x = (m_inputWidth - resized_w) / 2;
   *pad_y = (m_inputHeight - resized_h) / 2;
 
@@ -63,7 +66,7 @@ std::vector<Detection> infer_yolo_once(
     Module& module,
     cv::Mat input,
     cv::Size img_dims,
-    const DetectionConfig yolo_config) {
+    const DetectionConfig& yolo_config) {
   int pad_x, pad_y;
   float scale;
   input = scale_with_padding(input, &pad_x, &pad_y, &scale, img_dims);
@@ -72,15 +75,15 @@ std::vector<Detection> infer_yolo_once(
   cv::dnn::blobFromImage(
       input, blob, 1.0 / 255.0, img_dims, cv::Scalar(), true, false);
   const auto t_input = from_blob(
-      (void*)blob.data,
+      static_cast<void*>(blob.data),
       std::vector<int>(blob.size.p, blob.size.p + blob.dims),
       ScalarType::Float);
   const auto result = module.forward(t_input);
 
   ET_CHECK_MSG(
       result.ok(),
       "Execution of method forward failed with status 0x%" PRIx32,
-      (uint32_t)result.error());
+      static_cast<uint32_t>(result.error()));
 
   // Yolo26 end-to-end (post-NMS) output format: [1, N, 6]
   // Each detection row: [x1, y1, x2, y2, confidence, class_id]
@@ -89,12 +92,11 @@ std::vector<Detection> infer_yolo_once(
       t.dim() == 3 && t.sizes()[2] == 6,
       "Unexpected output shape: expected [1, N, 6] (end-to-end post-NMS format)");
 
-  const int num_detections = t.sizes()[1];
+  const int64_t num_detections = t.sizes()[1];
   const int num_classes = static_cast<int>(yolo_config.classes.size());
   const float* data = static_cast<const float*>(t.const_data_ptr());
-
   std::vector<Detection> detections;
-  for (int i = 0; i < num_detections; ++i) {
+  for (int64_t i = 0; i < num_detections; ++i) {
     const float* det = data + i * 6;
     const float x1 = det[0];
     const float y1 = det[1];
@@ -106,18 +108,21 @@ std::vector<Detection> infer_yolo_once(
     if (confidence <= yolo_config.modelScoreThreshold)
       continue;
 
+    if (class_id < 0 || class_id >= num_classes)
+      continue;
+
     // Map coordinates back to original image space
     const int left = static_cast<int>((x1 - pad_x) / scale);
     const int top = static_cast<int>((y1 - pad_y) / scale);
     const int width = static_cast<int>((x2 - x1) / scale);
     const int height = static_cast<int>((y2 - y1) / scale);
 
-    Detection result;
-    result.class_id = class_id;
-    result.confidence = confidence;
-    result.className = yolo_config.classes[class_id];
-    result.box = cv::Rect(left, top, width, height);
-    detections.push_back(result);
+    Detection detection;
+    detection.class_id = class_id;
+    detection.confidence = confidence;
+    detection.className = yolo_config.classes[class_id];
+    detection.box = cv::Rect(left, top, width, height);
+    detections.push_back(detection);
   }
 
   return detections;