diff --git a/apps/computer-vision/app/classification/index.tsx b/apps/computer-vision/app/classification/index.tsx
index c40bc5ec93..97dde1727d 100644
--- a/apps/computer-vision/app/classification/index.tsx
+++ b/apps/computer-vision/app/classification/index.tsx
@@ -1,6 +1,9 @@
 import Spinner from '../../components/Spinner';
 import { getImage } from '../../utils';
-import { useClassification, EFFICIENTNET_V2_S } from 'react-native-executorch';
+import {
+  useClassification,
+  EFFICIENTNET_V2_S_QUANTIZED,
+} from 'react-native-executorch';
 import { View, StyleSheet, Image, Text, ScrollView } from 'react-native';
 import { BottomBar } from '../../components/BottomBar';
 import React, { useContext, useEffect, useState } from 'react';
@@ -13,7 +16,7 @@ export default function ClassificationScreen() {
   );
   const [imageUri, setImageUri] = useState('');
 
-  const model = useClassification({ model: EFFICIENTNET_V2_S });
+  const model = useClassification({ model: EFFICIENTNET_V2_S_QUANTIZED });
   const { setGlobalGenerating } = useContext(GeneratingContext);
   useEffect(() => {
     setGlobalGenerating(model.isGenerating);
diff --git a/apps/computer-vision/app/semantic_segmentation/index.tsx b/apps/computer-vision/app/semantic_segmentation/index.tsx
index f0b3f0688c..5ecb22ea5f 100644
--- a/apps/computer-vision/app/semantic_segmentation/index.tsx
+++ b/apps/computer-vision/app/semantic_segmentation/index.tsx
@@ -2,7 +2,7 @@ import Spinner from '../../components/Spinner';
 import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
-  DEEPLAB_V3_RESNET50,
+  DEEPLAB_V3_MOBILENET_V3_LARGE_QUANTIZED,
   useSemanticSegmentation,
 } from 'react-native-executorch';
 import {
@@ -46,7 +46,7 @@ export default function SemanticSegmentationScreen() {
   const { setGlobalGenerating } = useContext(GeneratingContext);
   const { isReady, isGenerating, downloadProgress, forward } =
     useSemanticSegmentation({
-      model: DEEPLAB_V3_RESNET50,
+      model: DEEPLAB_V3_MOBILENET_V3_LARGE_QUANTIZED,
     });
   const [imageUri, setImageUri] = useState('');
   const [imageSize, setImageSize] = useState({ width: 0, height: 0 });
diff --git a/apps/computer-vision/app/style_transfer/index.tsx b/apps/computer-vision/app/style_transfer/index.tsx
index a1b3a7834d..dc6a0d4963 100644
--- a/apps/computer-vision/app/style_transfer/index.tsx
+++ b/apps/computer-vision/app/style_transfer/index.tsx
@@ -3,7 +3,7 @@ import { BottomBar } from '../../components/BottomBar';
 import { getImage } from '../../utils';
 import {
   useStyleTransfer,
-  STYLE_TRANSFER_CANDY,
+  STYLE_TRANSFER_CANDY_QUANTIZED,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image } from 'react-native';
 import React, { useContext, useEffect, useState } from 'react';
@@ -11,7 +11,7 @@ import { GeneratingContext } from '../../context';
 import ScreenWrapper from '../../ScreenWrapper';
 
 export default function StyleTransferScreen() {
-  const model = useStyleTransfer({ model: STYLE_TRANSFER_CANDY });
+  const model = useStyleTransfer({ model: STYLE_TRANSFER_CANDY_QUANTIZED });
   const { setGlobalGenerating } = useContext(GeneratingContext);
   useEffect(() => {
     setGlobalGenerating(model.isGenerating);
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index 66ca348757..4ff3c895de 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -15,7 +15,7 @@ import {
   useTextEmbeddings,
   useImageEmbeddings,
   CLIP_VIT_BASE_PATCH32_TEXT,
-  CLIP_VIT_BASE_PATCH32_IMAGE,
+  CLIP_VIT_BASE_PATCH32_IMAGE_QUANTIZED,
 } from 'react-native-executorch';
 import { launchImageLibrary } from 'react-native-image-picker';
 import { useIsFocused } from '@react-navigation/native';
@@ -29,7 +29,9 @@ export default function ClipEmbeddingsScreenWrapper() {
 
 function ClipEmbeddingsScreen() {
   const textModel = useTextEmbeddings({ model: CLIP_VIT_BASE_PATCH32_TEXT });
-  const imageModel = useImageEmbeddings({ model: CLIP_VIT_BASE_PATCH32_IMAGE });
+  const imageModel = useImageEmbeddings({
+    model: CLIP_VIT_BASE_PATCH32_IMAGE_QUANTIZED,
+  });
 
   const [inputSentence, setInputSentence] = useState('');
   const [sentencesWithEmbeddings, setSentencesWithEmbeddings] = useState<
diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
index a1580169ac..7e43d7d8a6 100644
--- a/docs/docs/02-benchmarks/inference-time.md
+++ b/docs/docs/02-benchmarks/inference-time.md
@@ -3,29 +3,84 @@ title: Inference Time
 ---
 
 :::warning
-Times presented in the tables are measured as consecutive runs of the model. Initial run times may be up to 2x longer due to model loading and initialization.
+Times presented in the tables are measured as consecutive runs of the model.
+Initial run times may be up to 2x longer due to model loading and
+initialization.
 :::
 
 ## Classification
 
-| Model             | iPhone 17 Pro (Core ML) [ms] | iPhone 16 Pro (Core ML) [ms] | iPhone SE 3 (Core ML) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| ----------------- | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: |
-| EFFICIENTNET_V2_S |              64              |              68              |            217             |                205                |            198            |
+:::info
+Inference times are measured directly from native C++ code, wrapping only the
+model's forward pass, excluding input-dependent pre- and post-processing (e.g.
+image resizing, normalization) and any overhead from React Native runtime.
+:::
+
+:::info
+For this model all input images, whether larger or smaller, are resized before
+processing. Resizing is typically fast for small images but may be noticeably
+slower for very large images, which can increase total time.
+:::
+
+| Model / Device                   | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
+| :------------------------------- | :----------------: | :------------------: |
+| EFFICIENTNET_V2_S (XNNPACK FP32) |         70         |         100          |
+| EFFICIENTNET_V2_S (XNNPACK INT8) |         22         |          38          |
+| EFFICIENTNET_V2_S (Core ML FP32) |         12         |          -           |
+| EFFICIENTNET_V2_S (Core ML FP16) |         5          |          -           |
 
 ## Object Detection
 
-| Model                          | iPhone 17 Pro (XNNPACK) [ms] | iPhone 16 Pro (XNNPACK) [ms] | iPhone SE 3 (XNNPACK) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| ------------------------------ | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: |
-| SSDLITE_320_MOBILENET_V3_LARGE |              71              |              74              |            257             |                115                |            109            |
+:::info
+Inference times are measured directly from native C++ code, wrapping only the
+model's forward pass, excluding input-dependent pre- and post-processing (e.g.
+image resizing, normalization) and any overhead from React Native runtime.
+:::
+
+:::info
+For this model all input images, whether larger or smaller, are resized before
+processing. Resizing is typically fast for small images but may be noticeably
+slower for very large images, which can increase total time.
+:::
+
+| Model / Device                                | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
+| :-------------------------------------------- | :----------------: | :------------------: |
+| SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         20         |          18          |
+| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         18         |          -           |
+| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         8          |          -           |
 
 ## Style Transfer
 
-| Model                        | iPhone 17 Pro (Core ML) [ms] | iPhone 16 Pro (Core ML) [ms] | iPhone SE 3 (Core ML) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| ---------------------------- | :--------------------------: | :--------------------------: | :------------------------: | :-------------------------------: | :-----------------------: |
-| STYLE_TRANSFER_CANDY         |             1400             |             1485             |            4255            |               2510                |           2355            |
-| STYLE_TRANSFER_MOSAIC        |             1400             |             1485             |            4255            |               2510                |           2355            |
-| STYLE_TRANSFER_UDNIE         |             1400             |             1485             |            4255            |               2510                |           2355            |
-| STYLE_TRANSFER_RAIN_PRINCESS |             1400             |             1485             |            4255            |               2510                |           2355            |
+:::info
+Inference times are measured directly from native C++ code, wrapping only the
+model's forward pass, excluding input-dependent pre- and post-processing (e.g.
+image resizing, normalization) and any overhead from React Native runtime.
+:::
+
+:::info
+For this model all input images, whether larger or smaller, are resized before
+processing. Resizing is typically fast for small images but may be noticeably
+slower for very large images, which can increase total time.
+:::
+
+| Model / Device                              | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
+| :------------------------------------------ | :----------------: | :------------------: |
+| STYLE_TRANSFER_CANDY (XNNPACK FP32)         |        1192        |         1025         |
+| STYLE_TRANSFER_CANDY (XNNPACK INT8)         |        272         |         430          |
+| STYLE_TRANSFER_CANDY (Core ML FP32)         |        100         |          -           |
+| STYLE_TRANSFER_CANDY (Core ML FP16)         |        150         |          -           |
+| STYLE_TRANSFER_MOSAIC (XNNPACK FP32)        |        1192        |         1025         |
+| STYLE_TRANSFER_MOSAIC (XNNPACK INT8)        |        272         |         430          |
+| STYLE_TRANSFER_MOSAIC (Core ML FP32)        |        100         |          -           |
+| STYLE_TRANSFER_MOSAIC (Core ML FP16)        |        150         |          -           |
+| STYLE_TRANSFER_UDNIE (XNNPACK FP32)         |        1192        |         1025         |
+| STYLE_TRANSFER_UDNIE (XNNPACK INT8)         |        272         |         430          |
+| STYLE_TRANSFER_UDNIE (Core ML FP32)         |        100         |          -           |
+| STYLE_TRANSFER_UDNIE (Core ML FP16)         |        150         |          -           |
+| STYLE_TRANSFER_RAIN_PRINCESS (XNNPACK FP32) |        1192        |         1025         |
+| STYLE_TRANSFER_RAIN_PRINCESS (XNNPACK INT8) |        272         |         430          |
+| STYLE_TRANSFER_RAIN_PRINCESS (Core ML FP32) |        100         |          -           |
+| STYLE_TRANSFER_RAIN_PRINCESS (Core ML FP16) |        150         |          -           |
 
 ## OCR
 
@@ -109,23 +164,51 @@ Benchmark times for text embeddings are highly dependent on the sentence length.
 
 ## Image Embeddings
 
-| Model                       | iPhone 17 Pro (XNNPACK) [ms] | OnePlus 12 (XNNPACK) [ms] |
-| --------------------------- | :--------------------------: | :-----------------------: |
-| CLIP_VIT_BASE_PATCH32_IMAGE |              18              |            55             |
+:::info
+Inference times are measured directly from native C++ code, wrapping only the
+model's forward pass, excluding input-dependent pre- and post-processing (e.g.
+image resizing, normalization) and any overhead from React Native runtime.
+:::
 
 :::info
-Image embedding benchmark times are measured using 224×224 pixel images, as required by the model. All input images, whether larger or smaller, are resized to 224×224 before processing. Resizing is typically fast for small images but may be noticeably slower for very large images, which can increase total inference time.
+For this model all input images, whether larger or smaller, are resized before
+processing. Resizing is typically fast for small images but may be noticeably
+slower for very large images, which can increase total time.
 :::
 
+| Model / Device                             | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
+| :----------------------------------------- | :----------------: | :------------------: |
+| CLIP_VIT_BASE_PATCH32_IMAGE (XNNPACK FP32) |         14         |          68          |
+| CLIP_VIT_BASE_PATCH32_IMAGE (XNNPACK INT8) |         11         |          31          |
+
 ## Semantic Segmentation
 
-:::warning
-Times presented in the tables are measured as consecutive runs of the model. Initial run times may be up to 2x longer due to model loading and initialization.
+:::info
+Inference times are measured directly from native C++ code, wrapping only the
+model's forward pass, excluding input-dependent pre- and post-processing (e.g.
+image resizing, normalization) and any overhead from React Native runtime.
+:::
+
+:::info
+For this model all input images, whether larger or smaller, are resized before
+processing. Resizing is typically fast for small images but may be noticeably
+slower for very large images, which can increase total time.
 :::
 
-| Model             | iPhone 16 Pro (Core ML) [ms] | iPhone 14 Pro Max (Core ML) [ms] | Samsung Galaxy S24 (XNNPACK) [ms] |
-| ----------------- | ---------------------------- | -------------------------------- | --------------------------------- |
-| DEELABV3_RESNET50 | 1000                         | 670                              | 700                               |
+| Model / Device                               | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
+| :------------------------------------------- | :----------------: | :------------------: |
+| DEEPLAB_V3_RESNET50 (XNNPACK FP32)           |        2000        |         2200         |
+| DEEPLAB_V3_RESNET50 (XNNPACK INT8)           |        118         |         380          |
+| DEEPLAB_V3_RESNET101 (XNNPACK FP32)          |        2900        |         3300         |
+| DEEPLAB_V3_RESNET101 (XNNPACK INT8)          |        174         |         660          |
+| DEEPLAB_V3_MOBILENET_V3_LARGE (XNNPACK FP32) |        131         |         153          |
+| DEEPLAB_V3_MOBILENET_V3_LARGE (XNNPACK INT8) |         17         |          40          |
+| LRASPP_MOBILENET_V3_LARGE (XNNPACK FP32)     |         13         |          36          |
+| LRASPP_MOBILENET_V3_LARGE (XNNPACK INT8)     |         12         |          20          |
+| FCN_RESNET50 (XNNPACK FP32)                  |        1800        |         2160         |
+| FCN_RESNET50 (XNNPACK INT8)                  |        100         |         320          |
+| FCN_RESNET101 (XNNPACK FP32)                 |        2600        |         3160         |
+| FCN_RESNET101 (XNNPACK INT8)                 |        160         |         620          |
 
 ## Text to image
 
diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
index 2f921cb48e..0dc59f0fc3 100644
--- a/docs/docs/02-benchmarks/memory-usage.md
+++ b/docs/docs/02-benchmarks/memory-usage.md
@@ -2,45 +2,91 @@
 title: Memory Usage
 ---
 
+## Classification
+
 :::info
-All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12 (Android).
+Memory usage values represent the peak memory increase observed while the model was
+loaded and actively running inference, relative to the baseline app memory
+before model initialization.
 :::
 
-## Classification
-
-| Model             | Android (XNNPACK) [MB] | iOS (Core ML) [MB] |
-| ----------------- | :--------------------: | :----------------: |
-| EFFICIENTNET_V2_S |          230           |         87         |
+| Model / Device                   | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
+| -------------------------------- | :----------------: | :------------------: |
+| EFFICIENTNET_V2_S (XNNPACK FP32) |        101         |         122          |
+| EFFICIENTNET_V2_S (XNNPACK INT8) |         62         |          78          |
+| EFFICIENTNET_V2_S (Core ML FP32) |        101         |          -           |
+| EFFICIENTNET_V2_S (Core ML FP16) |         87         |          -           |
 
 ## Object Detection
 
-| Model                          | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| ------------------------------ | :--------------------: | :----------------: |
-| SSDLITE_320_MOBILENET_V3_LARGE |          164           |        132         |
+:::info
+Memory usage values represent the peak memory increase observed while the model was
+loaded and actively running inference, relative to the baseline app memory
+before model initialization.
+:::
+
+| Model / Device                                | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
+| --------------------------------------------- | :----------------: | :------------------: |
+| SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         94         |         104          |
+| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         83         |          -           |
+| SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         62         |          -           |
 
 ## Style Transfer
 
-| Model                        | Android (XNNPACK) [MB] | iOS (Core ML) [MB] |
-| ---------------------------- | :--------------------: | :----------------: |
-| STYLE_TRANSFER_CANDY         |          1200          |        380         |
-| STYLE_TRANSFER_MOSAIC        |          1200          |        380         |
-| STYLE_TRANSFER_UDNIE         |          1200          |        380         |
-| STYLE_TRANSFER_RAIN_PRINCESS |          1200          |        380         |
+:::info
+Memory usage values represent the peak memory increase observed while the model was
+loaded and actively running inference, relative to the baseline app memory
+before model initialization.
+:::
+
+| Model / Device                              | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
+| ------------------------------------------- | :----------------: | :------------------: |
+| STYLE_TRANSFER_CANDY (XNNPACK FP32)         |        1200        |         1200         |
+| STYLE_TRANSFER_CANDY (XNNPACK INT8)         |        800         |         800          |
+| STYLE_TRANSFER_CANDY (Core ML FP32)         |        400         |          -           |
+| STYLE_TRANSFER_CANDY (Core ML FP16)         |        380         |          -           |
+| STYLE_TRANSFER_MOSAIC (XNNPACK FP32)        |        1200        |         1200         |
+| STYLE_TRANSFER_MOSAIC (XNNPACK INT8)        |        800         |         800          |
+| STYLE_TRANSFER_MOSAIC (Core ML FP32)        |        400         |          -           |
+| STYLE_TRANSFER_MOSAIC (Core ML FP16)        |        380         |          -           |
+| STYLE_TRANSFER_UDNIE (XNNPACK FP32)         |        1200        |         1200         |
+| STYLE_TRANSFER_UDNIE (XNNPACK INT8)         |        800         |         800          |
+| STYLE_TRANSFER_UDNIE (Core ML FP32)         |        400         |          -           |
+| STYLE_TRANSFER_UDNIE (Core ML FP16)         |        380         |          -           |
+| STYLE_TRANSFER_RAIN_PRINCESS (XNNPACK FP32) |        1200        |         1200         |
+| STYLE_TRANSFER_RAIN_PRINCESS (XNNPACK INT8) |        800         |         800          |
+| STYLE_TRANSFER_RAIN_PRINCESS (Core ML FP32) |        400         |          -           |
+| STYLE_TRANSFER_RAIN_PRINCESS (Core ML FP16) |        380         |          -           |
 
 ## OCR
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model                                | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | ------------------------------------ | :--------------------: | :----------------: |
 | Detector (CRAFT) + Recognizer (CRNN) |          1400          |        1320        |
 
 ## Vertical OCR
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model                                | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | ------------------------------------ | :--------------------: | :----------------: |
 | Detector (CRAFT) + Recognizer (CRNN) |       1000-1600        |     1000-1500      |
 
 ## LLMs
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model                 | Android (XNNPACK) [GB] | iOS (XNNPACK) [GB] |
 | --------------------- | :--------------------: | :----------------: |
 | LLAMA3_2_1B           |          3.3           |        3.1         |
@@ -52,12 +98,22 @@ All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12 (A
 
 ## Speech to text
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model        | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | ------------ | :--------------------: | :----------------: |
 | WHISPER_TINY |          410           |        375         |
 
 ## Text to speech
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model         | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | ------------- | :--------------------: | :----------------: |
 | KOKORO_SMALL  |          820           |        820         |
@@ -69,6 +125,11 @@ The reported memory usage values include the memory footprint of the Phonemis pa
 
 ## Text Embeddings
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model                      | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | -------------------------- | :--------------------: | :----------------: |
 | ALL_MINILM_L6_V2           |           95           |        110         |
@@ -79,14 +140,27 @@ The reported memory usage values include the memory footprint of the Phonemis pa
 
 ## Image Embeddings
 
-| Model                       | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
-| --------------------------- | :--------------------: | :----------------: |
-| CLIP_VIT_BASE_PATCH32_IMAGE |          345           |        340         |
+:::info
+Memory usage values represent the peak memory increase observed while the model was
+loaded and actively running inference, relative to the baseline app memory
+before model initialization.
+:::
+
+| Model / Device                             | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
+| ------------------------------------------ | :----------------: | :------------------: |
+| CLIP_VIT_BASE_PATCH32_IMAGE (XNNPACK FP32) |        340         |         345          |
 
 ## Semantic Segmentation
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 :::warning
-Data presented in the following sections is based on inference with non-resized output. When resize is enabled, expect higher memory usage and inference time with higher resolutions.
+Data presented in the following sections is based on inference with non-resized
+output. When resize is enabled, expect higher memory usage and inference time
+with higher resolutions.
 :::
 
 | Model             | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
@@ -95,6 +169,11 @@ Data presented in the following sections is based on inference with non-resized
 
 ## Text to image
 
+:::info
+All the below benchmarks were performed on iPhone 17 Pro (iOS) and OnePlus 12
+(Android).
+:::
+
 | Model                 | Android (XNNPACK) [MB] | iOS (XNNPACK) [MB] |
 | --------------------- | ---------------------- | ------------------ |
 | BK_SDM_TINY_VPRED_256 | 2400                   | 2400               |
diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
index 7fa1d7f384..8dcfbbf45a 100644
--- a/docs/docs/02-benchmarks/model-size.md
+++ b/docs/docs/02-benchmarks/model-size.md
@@ -4,24 +4,24 @@ title: Model Size
 
 ## Classification
 
-| Model             | XNNPACK [MB] | Core ML [MB] |
-| ----------------- | :----------: | :----------: |
-| EFFICIENTNET_V2_S |     85.6     |     43.9     |
+| Model             | XNNPACK FP32 [MB] | XNNPACK INT8 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
+| :---------------- | :---------------: | :---------------: | :---------------: | :---------------: |
+| EFFICIENTNET_V2_S |       85.7        |       22.9        |       86.5        |       43.9        |
 
 ## Object Detection
 
-| Model                          | XNNPACK [MB] |
-| ------------------------------ | :----------: |
-| SSDLITE_320_MOBILENET_V3_LARGE |     13.9     |
+| Model                          | XNNPACK FP32 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
+| ------------------------------ | :---------------: | :---------------: | :---------------: |
+| SSDLITE_320_MOBILENET_V3_LARGE |       13.9        |       15.6        |       8.46        |
 
 ## Style Transfer
 
-| Model                        | XNNPACK [MB] | Core ML [MB] |
-| ---------------------------- | :----------: | :----------: |
-| STYLE_TRANSFER_CANDY         |     6.78     |     5.22     |
-| STYLE_TRANSFER_MOSAIC        |     6.78     |     5.22     |
-| STYLE_TRANSFER_UDNIE         |     6.78     |     5.22     |
-| STYLE_TRANSFER_RAIN_PRINCESS |     6.78     |     5.22     |
+| Model                        | XNNPACK FP32 [MB] | XNNPACK INT8 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
+| ---------------------------- | :---------------: | :---------------: | :---------------: | :---------------: |
+| STYLE_TRANSFER_CANDY         |       6.82        |       1.84        |       7.12        |       3.79        |
+| STYLE_TRANSFER_MOSAIC        |       6.82        |       1.84        |       7.12        |       3.79        |
+| STYLE_TRANSFER_UDNIE         |       6.82        |       1.84        |       7.12        |       3.79        |
+| STYLE_TRANSFER_RAIN_PRINCESS |       6.82        |       1.84        |       7.12        |       3.79        |
 
 ## OCR
 
@@ -82,15 +82,20 @@ title: Model Size
 
 ## Image Embeddings
 
-| Model                       | XNNPACK [MB] |
-| --------------------------- | :----------: |
-| CLIP_VIT_BASE_PATCH32_IMAGE |     352      |
+| Model                       | XNNPACK FP32 [MB] | XNNPACK INT8 [MB] |
+| --------------------------- | :---------------: | :---------------: |
+| CLIP_VIT_BASE_PATCH32_IMAGE |        352        |       96.4        |
 
 ## Semantic Segmentation
 
-| Model             | XNNPACK [MB] |
-| ----------------- | ------------ |
-| DEELABV3_RESNET50 | 168          |
+| Model                         | XNNPACK FP32 [MB] | XNNPACK INT8 [MB] |
+| ----------------------------- | :---------------: | :---------------: |
+| DEEPLAB_V3_RESNET50           |        168        |       42.4        |
+| DEEPLAB_V3_RESNET101          |        244        |       61.7        |
+| DEEPLAB_V3_MOBILENET_V3_LARGE |       44.1        |       11.4        |
+| LRASPP_MOBILENET_V3_LARGE     |       12.9        |       3.53        |
+| FCN_RESNET50                  |        141        |       35.7        |
+| FCN_RESNET101                 |        217        |        55         |
 
 ## Text to image
 
diff --git a/docs/docs/03-hooks/02-computer-vision/useClassification.md b/docs/docs/03-hooks/02-computer-vision/useClassification.md
index f57b7b254e..e9c2eebfab 100644
--- a/docs/docs/03-hooks/02-computer-vision/useClassification.md
+++ b/docs/docs/03-hooks/02-computer-vision/useClassification.md
@@ -86,6 +86,6 @@ function App() {
 
 ## Supported models
 
-| Model                                                                                                  | Number of classes | Class list                                                                                                                                                                    |
-| ------------------------------------------------------------------------------------------------------ | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [efficientnet_v2_s](https://huggingface.co/software-mansion/react-native-executorch-efficientnet-v2-s) | 1000              | [ImageNet1k_v1](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/common/rnexecutorch/models/classification/Constants.h) |
+| Model                                                                                                  | Number of classes | Class list                                                                                                                                                                    | Quantized |
+| ------------------------------------------------------------------------------------------------------ | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :-------: |
+| [efficientnet_v2_s](https://huggingface.co/software-mansion/react-native-executorch-efficientnet-v2-s) | 1000              | [ImageNet1k_v1](https://github.com/software-mansion/react-native-executorch/blob/main/packages/react-native-executorch/common/rnexecutorch/models/classification/Constants.h) |    Yes    |
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index e19801cfd3..9c6cb6c016 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -374,18 +374,34 @@ export const LFM2_5_1_2B_INSTRUCT_QUANTIZED = {
 // Classification
 const EFFICIENTNET_V2_S_MODEL =
   Platform.OS === `ios`
-    ? `${URL_PREFIX}-efficientnet-v2-s/${VERSION_TAG}/coreml/efficientnet_v2_s_coreml_all.pte`
-    : `${URL_PREFIX}-efficientnet-v2-s/${VERSION_TAG}/xnnpack/efficientnet_v2_s_xnnpack.pte`;
+    ? `${URL_PREFIX}-efficientnet-v2-s/${NEXT_VERSION_TAG}/coreml/efficientnet_v2_s_coreml_fp32.pte`
+    : `${URL_PREFIX}-efficientnet-v2-s/${NEXT_VERSION_TAG}/xnnpack/efficientnet_v2_s_xnnpack_fp32.pte`;
+const EFFICIENTNET_V2_S_QUANTIZED_MODEL =
+  Platform.OS === `ios`
+    ? `${URL_PREFIX}-efficientnet-v2-s/${NEXT_VERSION_TAG}/coreml/efficientnet_v2_s_coreml_fp16.pte`
+    : `${URL_PREFIX}-efficientnet-v2-s/${NEXT_VERSION_TAG}/xnnpack/efficientnet_v2_s_xnnpack_int8.pte`;
 
 /**
  * @category Models - Classification
  */
 export const EFFICIENTNET_V2_S = {
+  modelName: 'efficientnet-v2-s',
   modelSource: EFFICIENTNET_V2_S_MODEL,
 };
 
+/**
+ * @category Models - Classification
+ */
+export const EFFICIENTNET_V2_S_QUANTIZED = {
+  modelName: 'efficientnet-v2-s-quantized',
+  modelSource: EFFICIENTNET_V2_S_QUANTIZED_MODEL,
+};
+
 // Object detection
-const SSDLITE_320_MOBILENET_V3_LARGE_MODEL = `${URL_PREFIX}-ssdlite320-mobilenet-v3-large/${VERSION_TAG}/ssdlite320-mobilenetv3-large.pte`;
+const SSDLITE_320_MOBILENET_V3_LARGE_MODEL =
+  Platform.OS === 'ios'
+    ? `${URL_PREFIX}-ssdlite320-mobilenet-v3-large/${NEXT_VERSION_TAG}/coreml/ssdlite320_mobilenet_v3_large_coreml_fp16.pte`
+    : `${URL_PREFIX}-ssdlite320-mobilenet-v3-large/${NEXT_VERSION_TAG}/xnnpack/ssdlite320_mobilenet_v3_large_xnnpack_fp32.pte`;
 const RF_DETR_NANO_MODEL = `${URL_PREFIX}-rfdetr-nano-detector/${NEXT_VERSION_TAG}/rfdetr_detector.pte`;
 
 /**
@@ -407,49 +423,101 @@ export const RF_DETR_NANO = {
 // Style transfer
 const STYLE_TRANSFER_CANDY_MODEL =
   Platform.OS === `ios`
-    ? `${URL_PREFIX}-style-transfer-candy/${VERSION_TAG}/coreml/style_transfer_candy_coreml.pte`
-    : `${URL_PREFIX}-style-transfer-candy/${VERSION_TAG}/xnnpack/style_transfer_candy_xnnpack.pte`;
+    ? `${URL_PREFIX}-style-transfer-candy/${NEXT_VERSION_TAG}/coreml/style_transfer_candy_coreml_fp32.pte`
+    : `${URL_PREFIX}-style-transfer-candy/${NEXT_VERSION_TAG}/xnnpack/style_transfer_candy_xnnpack_fp32.pte`;
+const STYLE_TRANSFER_CANDY_QUANTIZED_MODEL =
+  Platform.OS === `ios`
+    ? `${URL_PREFIX}-style-transfer-candy/${NEXT_VERSION_TAG}/coreml/style_transfer_candy_coreml_fp16.pte`
+    : `${URL_PREFIX}-style-transfer-candy/${NEXT_VERSION_TAG}/xnnpack/style_transfer_candy_xnnpack_int8.pte`;
 const STYLE_TRANSFER_MOSAIC_MODEL =
   Platform.OS === `ios`
-    ? `${URL_PREFIX}-style-transfer-mosaic/${VERSION_TAG}/coreml/style_transfer_mosaic_coreml.pte`
-    : `${URL_PREFIX}-style-transfer-mosaic/${VERSION_TAG}/xnnpack/style_transfer_mosaic_xnnpack.pte`;
+    ? `${URL_PREFIX}-style-transfer-mosaic/${NEXT_VERSION_TAG}/coreml/style_transfer_mosaic_coreml_fp32.pte`
+    : `${URL_PREFIX}-style-transfer-mosaic/${NEXT_VERSION_TAG}/xnnpack/style_transfer_mosaic_xnnpack_fp32.pte`;
+const STYLE_TRANSFER_MOSAIC_QUANTIZED_MODEL =
+  Platform.OS === `ios`
+    ? `${URL_PREFIX}-style-transfer-mosaic/${NEXT_VERSION_TAG}/coreml/style_transfer_mosaic_coreml_fp16.pte`
+    : `${URL_PREFIX}-style-transfer-mosaic/${NEXT_VERSION_TAG}/xnnpack/style_transfer_mosaic_xnnpack_int8.pte`;
 const STYLE_TRANSFER_RAIN_PRINCESS_MODEL =
   Platform.OS === `ios`
-    ? `${URL_PREFIX}-style-transfer-rain-princess/${VERSION_TAG}/coreml/style_transfer_rain_princess_coreml.pte`
-    : `${URL_PREFIX}-style-transfer-rain-princess/${VERSION_TAG}/xnnpack/style_transfer_rain_princess_xnnpack.pte`;
+    ? `${URL_PREFIX}-style-transfer-rain-princess/${NEXT_VERSION_TAG}/coreml/style_transfer_rain_princess_coreml_fp32.pte`
+    : `${URL_PREFIX}-style-transfer-rain-princess/${NEXT_VERSION_TAG}/xnnpack/style_transfer_rain_princess_xnnpack_fp32.pte`;
+const STYLE_TRANSFER_RAIN_PRINCESS_QUANTIZED_MODEL =
+  Platform.OS === `ios`
+    ? `${URL_PREFIX}-style-transfer-rain-princess/${NEXT_VERSION_TAG}/coreml/style_transfer_rain_princess_coreml_fp16.pte`
+    : `${URL_PREFIX}-style-transfer-rain-princess/${NEXT_VERSION_TAG}/xnnpack/style_transfer_rain_princess_xnnpack_int8.pte`;
 const STYLE_TRANSFER_UDNIE_MODEL =
   Platform.OS === `ios`
-    ? `${URL_PREFIX}-style-transfer-udnie/${VERSION_TAG}/coreml/style_transfer_udnie_coreml.pte`
-    : `${URL_PREFIX}-style-transfer-udnie/${VERSION_TAG}/xnnpack/style_transfer_udnie_xnnpack.pte`;
+    ? `${URL_PREFIX}-style-transfer-udnie/${NEXT_VERSION_TAG}/coreml/style_transfer_udnie_coreml_fp32.pte`
+    : `${URL_PREFIX}-style-transfer-udnie/${NEXT_VERSION_TAG}/xnnpack/style_transfer_udnie_xnnpack_fp32.pte`;
+const STYLE_TRANSFER_UDNIE_QUANTIZED_MODEL =
+  Platform.OS === `ios`
+    ? `${URL_PREFIX}-style-transfer-udnie/${NEXT_VERSION_TAG}/coreml/style_transfer_udnie_coreml_fp16.pte`
+    : `${URL_PREFIX}-style-transfer-udnie/${NEXT_VERSION_TAG}/xnnpack/style_transfer_udnie_xnnpack_int8.pte`;
 
 /**
  * @category Models - Style Transfer
  */
 export const STYLE_TRANSFER_CANDY = {
+  modelName: 'style-transfer-candy',
   modelSource: STYLE_TRANSFER_CANDY_MODEL,
 };
 
+/**
+ * @category Models - Style Transfer
+ */
+export const STYLE_TRANSFER_CANDY_QUANTIZED = {
+  modelName: 'style-transfer-candy-quantized',
+  modelSource: STYLE_TRANSFER_CANDY_QUANTIZED_MODEL,
+};
+
 /**
  * @category Models - Style Transfer
  */
 export const STYLE_TRANSFER_MOSAIC = {
+  modelName: 'style-transfer-mosaic',
   modelSource: STYLE_TRANSFER_MOSAIC_MODEL,
 };
 
+/**
+ * @category Models - Style Transfer
+ */
+export const STYLE_TRANSFER_MOSAIC_QUANTIZED = {
+  modelName: 'style-transfer-mosaic-quantized',
+  modelSource: STYLE_TRANSFER_MOSAIC_QUANTIZED_MODEL,
+};
+
 /**
  * @category Models - Style Transfer
  */
 export const STYLE_TRANSFER_RAIN_PRINCESS = {
+  modelName: 'style-transfer-rain-princess',
   modelSource: STYLE_TRANSFER_RAIN_PRINCESS_MODEL,
 };
 
+/**
+ * @category Models - Style Transfer
+ */
+export const STYLE_TRANSFER_RAIN_PRINCESS_QUANTIZED = {
+  modelName: 'style-transfer-rain-princess-quantized',
+  modelSource: STYLE_TRANSFER_RAIN_PRINCESS_QUANTIZED_MODEL,
+};
+
 /**
  * @category Models - Style Transfer
  */
 export const STYLE_TRANSFER_UDNIE = {
+  modelName: 'style-transfer-udnie',
   modelSource: STYLE_TRANSFER_UDNIE_MODEL,
 };
 
+/**
+ * @category Models - Style Transfer
+ */
+export const STYLE_TRANSFER_UDNIE_QUANTIZED = {
+  modelName: 'style-transfer-udnie-quantized',
+  modelSource: STYLE_TRANSFER_UDNIE_QUANTIZED_MODEL,
+};
+
 // S2T
 const WHISPER_TINY_EN_TOKENIZER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/tokenizer.json`;
 const WHISPER_TINY_EN_ENCODER = `${URL_PREFIX}-whisper-tiny.en/${VERSION_TAG}/xnnpack/whisper_tiny_en_encoder_xnnpack.pte`;
@@ -668,15 +736,25 @@ export const SELFIE_SEGMENTATION = {
 } as const;
 
 // Image Embeddings
-const CLIP_VIT_BASE_PATCH32_IMAGE_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/clip-vit-base-patch32-vision_xnnpack.pte`;
+const CLIP_VIT_BASE_PATCH32_IMAGE_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/xnnpack/clip_vit_base_patch32_vision_xnnpack_fp32.pte`;
+const CLIP_VIT_BASE_PATCH32_IMAGE_QUANTIZED_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/xnnpack/clip_vit_base_patch32_vision_xnnpack_int8.pte`;
 
 /**
  * @category Models - Image Embeddings
  */
 export const CLIP_VIT_BASE_PATCH32_IMAGE = {
+  modelName: 'clip-vit-base-patch32-image',
   modelSource: CLIP_VIT_BASE_PATCH32_IMAGE_MODEL,
 };
 
+/**
+ * @category Models - Image Embeddings
+ */
+export const CLIP_VIT_BASE_PATCH32_IMAGE_QUANTIZED = {
+  modelName: 'clip-vit-base-patch32-image-quantized',
+  modelSource: CLIP_VIT_BASE_PATCH32_IMAGE_QUANTIZED_MODEL,
+};
+
 // Text Embeddings
 const ALL_MINILM_L6_V2_MODEL = `${URL_PREFIX}-all-MiniLM-L6-v2/${VERSION_TAG}/all-MiniLM-L6-v2_xnnpack.pte`;
 const ALL_MINILM_L6_V2_TOKENIZER = `${URL_PREFIX}-all-MiniLM-L6-v2/${VERSION_TAG}/tokenizer.json`;
@@ -686,8 +764,8 @@ const MULTI_QA_MINILM_L6_COS_V1_MODEL = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1
 const MULTI_QA_MINILM_L6_COS_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-MiniLM-L6-cos-v1/${VERSION_TAG}/tokenizer.json`;
 const MULTI_QA_MPNET_BASE_DOT_V1_MODEL = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/multi-qa-mpnet-base-dot-v1_xnnpack.pte`;
 const MULTI_QA_MPNET_BASE_DOT_V1_TOKENIZER = `${URL_PREFIX}-multi-qa-mpnet-base-dot-v1/${VERSION_TAG}/tokenizer.json`;
-const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/clip-vit-base-patch32-text_xnnpack.pte`;
-const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${VERSION_TAG}/tokenizer.json`;
+const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
+const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${NEXT_VERSION_TAG}/tokenizer.json`;
 
 /**
  * @category Models - Text Embeddings
@@ -725,6 +803,7 @@ export const MULTI_QA_MPNET_BASE_DOT_V1 = {
  * @category Models - Text Embeddings
  */
 export const CLIP_VIT_BASE_PATCH32_TEXT = {
+  modelName: 'clip-vit-base-patch32-text',
   modelSource: CLIP_VIT_BASE_PATCH32_TEXT_MODEL,
   tokenizerSource: CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER,
 };