CodeLinaro
diff --git a/‎.aitk/configs/checks.json‎
Lines changed: 9 additions & 9 deletions b/‎.aitk/configs/checks.json‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎.aitk/configs/model_list.json‎
Lines changed: 33 additions & 0 deletions b/‎.aitk/configs/model_list.json‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎.aitk/requirements/requirements-WCR-SAM.txt‎
Lines changed: 3 additions & 0 deletions b/‎.aitk/requirements/requirements-WCR-SAM.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.aitk/scripts/project_processor.py‎
Lines changed: 1 addition & 0 deletions b/‎.aitk/scripts/project_processor.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sam-vit-base/QNN/sam_mask_generator.py‎
Lines changed: 1 addition & 1 deletion b/‎sam-vit-base/QNN/sam_mask_generator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sam-vit-base/aitk/.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎sam-vit-base/aitk/.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎sam-vit-base/aitk/README.md‎
Lines changed: 25 additions & 0 deletions b/‎sam-vit-base/aitk/README.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎sam-vit-base/aitk/_copy.json.config‎
Lines changed: 8 additions & 0 deletions b/‎sam-vit-base/aitk/_copy.json.config‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎sam-vit-base/aitk/config.py‎
Lines changed: 19 additions & 0 deletions b/‎sam-vit-base/aitk/config.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎sam-vit-base/aitk/inference_sample.ipynb‎
Lines changed: 126 additions & 0 deletions b/‎sam-vit-base/aitk/inference_sample.ipynb‎
Lines changed: 126 additions & 0 deletions
@@ -1,16 +1,16 @@
 {
-    "configCheck": 165,
-    "copyCheck": 180,
+    "configCheck": 167,
+    "copyCheck": 182,
     "extensionCheck": 2,
-    "gitignoreCheck": 42,
+    "gitignoreCheck": 44,
     "inferenceModelCheck": 25,
-    "ipynbCheck": 42,
-    "licenseCheck": 39,
-    "modelProjectCheck": 44,
+    "ipynbCheck": 44,
+    "licenseCheck": 41,
+    "modelProjectCheck": 46,
     "oliveCheck": 60,
-    "oliveJsonCheck": 165,
-    "pathCheck": 1397,
+    "oliveJsonCheck": 167,
+    "pathCheck": 1423,
     "requirementsCheck": 37,
     "templateCheck": 3,
-    "venvRequirementsCheck": 16
+    "venvRequirementsCheck": 17
 }
@@ -471,6 +471,38 @@
                 "text-generation"
             ]
         },
+        {
+            "displayName": "facebook/sam-vit-base",
+            "icon": "meta",
+            "modelLink": "https://huggingface.co/facebook/sam-vit-base",
+            "id": "huggingface/facebook/sam-vit-base",
+            "runtimes": [
+                "QNN"
+            ],
+            "architecture": "Transformer",
+            "status": "Hide",
+            "relativePath": "sam-vit-base/aitk",
+            "version": 1,
+            "pipeline_tags": [
+                "fill-mask"
+            ]
+        },
+        {
+            "displayName": "facebook/sam2.1-hiera-small",
+            "icon": "meta",
+            "modelLink": "https://huggingface.co/facebook/sam2.1-hiera-small",
+            "id": "huggingface/facebook/sam2.1-hiera-small",
+            "runtimes": [
+                "QNN"
+            ],
+            "architecture": "Transformer",
+            "status": "Hide",
+            "relativePath": "sam2.1-hiera-small/aitk",
+            "version": 1,
+            "pipeline_tags": [
+                "fill-mask"
+            ]
+        },
         {
             "displayName": "meta-llama/Llama-3.1-8B-Instruct",
             "icon": "meta",
@@ -925,6 +957,7 @@
         "AIMClab-RUC/COCO-CN": "https://huggingface.co/datasets/AIMClab-RUC/COCO-CN",
         "librispeech_asr": "https://huggingface.co/datasets/openslr/librispeech_asr",
         "phiyodr/coco2017": "https://huggingface.co/datasets/phiyodr/coco2017",
+        "nielsr/coco-panoptic-val2017": "https://huggingface.co/datasets/nielsr/coco-panoptic-val2017",
         "pileval_for_awq_benchmark": "https://huggingface.co/datasets/mit-han-lab/pile-val-backup"
     },
     "LoginRequiredDatasets": [
 
@@ -0,0 +1,3 @@
+onnxsim==0.6.2
+sam2==1.1.0
+transformers==4.56.2
@@ -49,6 +49,7 @@ def fetch_pipeline_tags(model_link: str) -> Optional[List[str]]:
     "google": IconEnum.Gemini,
     "deepseek-ai": IconEnum.DeepSeek,
     "Qwen": IconEnum.qwen,
+    "facebook": IconEnum.Meta,
     "meta-llama": IconEnum.Meta,
     "mistralai": IconEnum.mistralai,
     # TODO add
 
@@ -50,7 +50,7 @@ def main():
     parser.add_argument("--image_path", required=True, help="Path to input image")
     parser.add_argument("--output_path", default="mask_output.png", help="Path to save the output mask image")
     parser.add_argument("--box_x", type=int, default=40, help="Top-Left X coordinate of input box")
-    parser.add_argument("--box_y", type=int, default=235, help="To-Left Y coordinate of input box")
+    parser.add_argument("--box_y", type=int, default=235, help="Top-Left Y coordinate of input box")
     parser.add_argument("--box_w", type=int, default=940, help="Width of input box")
     parser.add_argument("--box_h", type=int, default=490, help="Height of input box")
     args = parser.parse_args()
 
@@ -0,0 +1,6 @@
+__pycache__
+/cache
+/history/*/*
+!/history/*/history.config
+!/history/*/olive_config.json
+/quantization_dataset
@@ -0,0 +1,25 @@
+# SAM Model Conversion
+
+This repository demonstrates the optimization of the [facebook/sam-vit-base](https://huggingface.co/facebook/sam-vit-base) model using **post-training quantization (PTQ)** techniques.
+
+
+### Run the Quantization + Compilation Config
+Activate the **Quantization Python Environment** and run the workflow:
+
+For Encoder Model:
+```bash
+olive run --config sam_vision_encoder_qnn.json
+```
+
+For Point and Box based Decoder Model:
+```bash
+olive run --config sam_mask_decoder_qnn_fp16_ctx.json
+```
+
+### Model ORT Execution
+
+Execute SAM model in **AOT Compilation Python Environment** using following command:
+
+```bash
+python sam_mask_generator.py --model_ve path/to/encoder_model.onnx --model_md path/to/decoder_model.onnx --image_path car.png --box_x 40 --box_y 235 --box_w 940 --box_h 490 --output_path car_mask.png
+```
@@ -0,0 +1,8 @@
+{
+    "copies": [
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
+        }
+    ]
+}
@@ -0,0 +1,19 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+class ModelConfig:
+    model_name = "facebook/sam-vit-base"
+    data_dir = "quantization_dataset"
+    image_dataset = "nielsr/coco-panoptic-val2017"
+    image_dataset_split = "train"
+    ve_input_name = "pixel_values"
+    ve_sample_size = 1024
+    ve_channel_size = 3
+    mask_point_input_names = ("input_points", "image_embeddings")
+    mask_point_input_shapes = ((1, 1, 2), (256, 64, 64))
+    mask_box_input_names = ("input_boxes", "image_embeddings")
+    mask_box_input_shapes = ((1, 4), (256, 64, 64))
+    mask_input_names = ("input_points", "input_labels", "image_embeddings")
+    mask_input_shapes = ((1, 2, 2), (1, 2), (256, 64, 64))
@@ -0,0 +1,126 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "547a25de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoder_path = \"./model/encoder/model.onnx\"\n",
+    "decoder_path = \"./model/decoder/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"QNNExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eed9c231",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    print(worker_script)\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        try:\n",
+    "            ort.register_execution_provider_library(item[0], item[1])\n",
+    "        except Exception as e:\n",
+    "            print(f\"Failed to register execution provider {item[0]}: {e}\")\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e9d8984",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib import request\n",
+    "\n",
+    "test_image_url = \"https://github.com/facebookresearch/segment-anything/blob/main/notebooks/images/truck.jpg?raw=true\"\n",
+    "test_image_name = \"truck.jpg\"\n",
+    "\n",
+    "request.urlretrieve(test_image_url, test_image_name)\n",
+    "\n",
+    "from IPython.display import Image, display\n",
+    "\n",
+    "display(Image(filename=test_image_name))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffcd22ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "NOTEBOOK_DIR = Path(__file__).parent if \"__file__\" in globals() else Path.cwd()\n",
+    "PROJECT_ROOT = NOTEBOOK_DIR.parents[1]\n",
+    "sys.path.insert(0, str(PROJECT_ROOT))\n",
+    "\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "from sam_mask_generator import get_mask_ort\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "sess_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(sess_options, ExecutionProvider, ort.OrtHardwareDeviceType.CPU)\n",
+    "\n",
+    "# Load image\n",
+    "raw_image = Image.open(test_image_name).convert(\"RGB\")\n",
+    "input_box = [[[100, 300], [1750, 900]]]\n",
+    "\n",
+    "# Load models\n",
+    "sess_ve = ort.InferenceSession(encoder_path, sess_options=sess_options)\n",
+    "sess_md = ort.InferenceSession(decoder_path, sess_options=sess_options)\n",
+    "\n",
+    "sess_ve_inputs = sess_ve.get_inputs()\n",
+    "sess_md_inputs = sess_md.get_inputs()\n",
+    "\n",
+    "ve_dtype = np.float32 if sess_ve_inputs[0].type == 'tensor(float)' else np.float16\n",
+    "md_dtype = np.float32 if sess_md_inputs[0].type == 'tensor(float)' else np.float16\n",
+    "\n",
+    "# Get mask\n",
+    "mask = get_mask_ort(sess_ve, sess_md, raw_image, input_box, ve_dtype, md_dtype, sess_ve_inputs, sess_md_inputs)\n",
+    "\n",
+    "# Save mask using PIL\n",
+    "mask_img = Image.fromarray(mask * 255)  # Convert binary mask to 0-255\n",
+    "\n",
+    "from IPython.display import display\n",
+    "display(mask_img)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+onnxsim==0.6.2`
	`2`	`+sam2==1.1.0`
	`3`	`+transformers==4.56.2`