luxonis · kozlov721 · Mar 23, 2026 · Mar 29, 2026 · Mar 29, 2026 · Mar 29, 2026
@@ -68,7 +68,7 @@ jobs:
           cache: pip
 
       - name: Install dependencies
-        run: pip install -e .[dev]
+        run: pip install -e .[dev,aimet] --extra-index-url https://download.pytorch.org/whl/cu130
 
       - name: Install dev version of LuxonisML
         if: startsWith(github.head_ref, 'release/') == false
@@ -147,7 +147,7 @@ jobs:
           cache: pip
 
       - name: Install dependencies
-        run: pip install -e .[dev]
+        run: pip install -e .[dev,aimet] --extra-index-url https://download.pytorch.org/whl/cu130
 
       - name: Install dev version of LuxonisML
         if: startsWith(github.head_ref, 'release/') == false

@@ -111,6 +111,15 @@ pip install luxonis-train
 
 This will also install the `luxonis_train` CLI. For more information on how to use it, see [CLI Usage](#cli).
 
+### AIMET Quantization Support
+
+To enable support for AIMET quantization, install the `luxonis-train[aimet]` extra:
+
+```bash
+pip install luxonis-train[aimet] --extra-index-url https://download.pytorch.org/whl/cu130
+
+```
+
 <a name="usage"></a>
 
 ## 📝 Usage
@@ -135,6 +144,7 @@ The CLI is the most straightforward way how to use `LuxonisTrain`. The CLI provi
 - `tune` - Tune the hyperparameters of the model for better performance
 - `inspect` - Inspect the dataset you are using and visualize the annotations
 - `annotate` - Annotate a directory using the model’s predictions and generate a new LDF.
+- `quantize` - Quantize the model using `AIMET` quantization techniques
 
 **To get help on any command:**
 

@@ -510,6 +510,7 @@ Here you can define configuration for exporting.
 | `onnx`                   | `dict`                | `{}`              | Options specific for ONNX export. See [ONNX](#onnx) section for details                        |
 | `hubai`                  | `dict`                | `{}`              | Options for HubAI SDK conversion. See [HubAI](#hubai) section for details                      |
 | `blobconverter`          | `dict`                | `{}`              | Options for converting to BLOB format (deprecated). See [Blob](#blob-deprecated) section       |
+| `aimet`                  | `dict`                | `{}`              | Options for AIMET quantization. See [AIMET](#aimet)                                            |
 
 ### `ONNX`
 
@@ -571,6 +572,41 @@ exporter:
     shaves: 8
 ```
 
+### `AIMET`
+
+The [AIMET](https://quic.github.io/aimet-pages/releases/latest/index.html) (AI Model Efficiency Toolkit) provides quantization and model export tools.
+
+| Key                        | Type                                              | Default value                                                  | Description                                                                                                                                                                                                                                          |
+| -------------------------- | ------------------------------------------------- | -------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `active`                   | `bool`                                            | `False`                                                        | Whether to use AIMET for quantization and export                                                                                                                                                                                                     |
+| `epochs`                   | `int`                                             | `20`                                                           | Number of epochs to use for quantization-aware training                                                                                                                                                                                              |
+| `default_output_bw`        | `int`                                             | `8`                                                            | Default bitwidth for quantized activations and weights                                                                                                                                                                                               |
+| `default_param_bw`         | `int`                                             | `8`                                                            | Default bitwidth for quantized parameters                                                                                                                                                                                                            |
+| `default_data_type`        | `Literal["int", "float"]`                         | `int`                                                          | Default data type for quantized values                                                                                                                                                                                                               |
+| `quant_scheme`             | `Literal["min_max", "post_training_tf_enhanced"]` | `min_max`                                                      | Quantization scheme to use                                                                                                                                                                                                                           |
+| `config`                   | `dict \| str`                                     | `{}`                                                           | Additional configuration for AIMET. Can be a dictionary or a path to a JSON config file. Refer to the [AIMET documentation](https://quic.github.io/aimet-pages/releases/latest/techniques/runtime_config.html) for details on the available options. |
+| `fold_batch_norms`         | `bool`                                            | `False`                                                        | Whether to fold batch normalization layers before quantization                                                                                                                                                                                       |
+| `cross_layer_equalization` | `bool`                                            | `False`                                                        | Whether to perform cross-layer equalization before quantization                                                                                                                                                                                      |
+| `batch_norm_reestimation`  | `bool`                                            | `False`                                                        | Whether to perform batch norm re-estimation after quantization                                                                                                                                                                                       |
+| `sequential_mse`           | `bool`                                            | `False`                                                        | Whether to perform sequential MSE optimization.                                                                                                                                                                                                      |
+| `optimizer`                | `dict`                                            | `{"name": "SGD", "params": {"lr": 1e-5}}`                      | Optimizer configuration for quantization-aware training. See [Optimizer](#optimizer) section for details and examples.                                                                                                                               |
+| `scheduler`                | `dict`                                            | `{"name": "StepLR", "params": {"step_size": 5, "gamma": 0.1}}` | Scheduler configuration for quantization-aware training. See [Scheduler](#scheduler) section for details and examples..                                                                                                                              |
+| `adaround`                 | `dict`                                            | `{}`                                                           | Configuration for Adaround weight rounding. See [Adaround](#adaround) for more details.                                                                                                                                                              |
+
+#### Adaround
+
+Adaptive rounding (AdaRound) is a rounding mechanism for model weights designed to adapt to the data to improve the accuracy of the quantized model.
+
+By default, AIMET uses nearest rounding for quantization, in which weight values are quantized to the nearest integer value. AdaRound, however, uses training data to determine how to round quantized weights. This technique often improves the accuracy of the quantized model.
+
+| Key                      | Type              | Default value | Description                                                                                                                                                              |
+| ------------------------ | ----------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `active`                 | `bool`            | `False`       | Whether to use AdaRound for weight rounding during quantization                                                                                                          |
+| `default_num_iterations` | `int \| None`     | `None`        | Number of iterations for the AdaRound optimization. The default value is 10K for models with 8- or higher bit weights, and 15K for models with lower than 8 bit weights. |
+| `default_reg_param`      | `float`           | `0.01`        | Regularization parameter, trading off between rounding loss vs reconstruction loss.                                                                                      |
+| `default_beta_range`     | `tuple[int, int]` | `(20, 2)`     | Start and stop beta parameter for annealing of rounding loss (start_beta, end_beta).                                                                                     |
+| `default_warm_start`     | `float`           | `0.2`         | The warm up period, during which rounding loss has zero effect.                                                                                                          |
+
 ## Tuner
 
 Here you can specify options for tuning.

@@ -154,6 +154,8 @@ def get_visualization_item(
             return np_images, np_labels
 
         images, labels = loader[idx]
+        if not isinstance(images, dict):
+            images = {loader.image_source: images}
         return (
             {
                 name: image.numpy().transpose(1, 2, 0)
@@ -480,6 +482,29 @@ def convert(
     ).convert(save_dir=save_dir, weights=weights)
 
 
+@app.command(group=export_group, sort_key=1)
+def quantize(
+    opts: list[str] | None = None,
+    /,
+    *,
+    config: str | None = None,
+    weights: str | None = None,
+):
+    """Quantize the model using AIMET.
+
+    @type config: str
+    @param config: Path to the configuration file.
+    @type weights: str
+    @param weights: Path to the model weights.
+    @type opts: list[str]
+    @param opts: A list of optional CLI overrides of the config file.
+    """
+    model = create_model(
+        config, opts, weights=weights, allow_empty_dataset=True
+    )
+    model.quantize()
+
+
 @upgrade_app.command()
 def config(
     config: Annotated[

@@ -29,6 +29,7 @@ class AdaptiveDetectionLoss(BaseLoss):
     n_anchors_list: list[int]
     stride_tensor: Tensor
     gt_bboxes_scale: Tensor
+    anchor_points_strided: Tensor
 
     def __init__(
         self,
@@ -102,6 +103,19 @@ def __init__(
         self.class_loss_weight = class_loss_weight
         self.iou_loss_weight = iou_loss_weight
 
+        self.register_buffer(
+            "gt_bboxes_scale",
+            torch.tensor(
+                [
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                    self.original_img_size[1],
+                    self.original_img_size[0],
+                ],
+            ),
+            persistent=False,
+        )
+
         self._logged_assigner_change = False
 
     def forward(
@@ -163,30 +177,35 @@ def forward(
         return loss, sub_losses
 
     def _init_parameters(self, features: list[Tensor]) -> None:
-        if not hasattr(self, "gt_bboxes_scale"):
-            self.gt_bboxes_scale = torch.tensor(
-                [
-                    self.original_img_size[1],
-                    self.original_img_size[0],
-                    self.original_img_size[1],
-                    self.original_img_size[0],
-                ],
-                device=features[0].device,
-            )
+        if not hasattr(self, "anchors"):
             (
-                self.anchors,
-                self.anchor_points,
-                self.n_anchors_list,
-                self.stride_tensor,
+                anchors,
+                anchor_points,
+                n_anchors_list,
+                stride_tensor,
             ) = anchors_for_fpn_features(
                 features,
                 self.stride,
                 self.grid_cell_size,
                 self.grid_cell_offset,
                 multiply_with_stride=True,
             )
-            self.anchor_points_strided = (
-                self.anchor_points / self.stride_tensor
+            self.register_buffer("anchors", anchors, persistent=False)
+            self.register_buffer(
+                "anchor_points", anchor_points, persistent=False
+            )
+            self.register_buffer(
+                "n_anchors_list",
+                torch.tensor(n_anchors_list),
+                persistent=False,
+            )
+            self.register_buffer(
+                "stride_tensor", stride_tensor, persistent=False
+            )
+            self.register_buffer(
+                "anchor_points_strided",
+                anchor_points / stride_tensor,
+                persistent=False,
             )
 
     def _run_assigner(

@@ -17,8 +17,6 @@
 from luxonis_train.utils.boundingbox import IoUType
 from luxonis_train.utils.keypoints import insert_class
 
-from .bce_with_logits import BCEWithLogitsLoss
-
 
 class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss):
     node: EfficientKeypointBBoxHead
@@ -74,9 +72,7 @@ def __init__(
             **kwargs,
         )
 
-        self.b_cross_entropy = BCEWithLogitsLoss(
-            pos_weight=torch.tensor([viz_pw])
-        )
+        self.pos_weight = torch.tensor([viz_pw])
         self.sigmas = get_sigmas(
             sigmas=sigmas, n_keypoints=self.n_keypoints, caller_name=self.name
         )
@@ -85,6 +81,13 @@ def __init__(
         )
         self.regr_kpts_loss_weight = regr_kpts_loss_weight
         self.vis_kpts_loss_weight = vis_kpts_loss_weight
+        self.register_buffer(
+            "gt_kpts_scale",
+            torch.tensor(
+                [self.original_img_size[1], self.original_img_size[0]],
+            ),
+            persistent=False,
+        )
 
     def forward(
         self,
@@ -95,14 +98,14 @@ def forward(
         target_boundingbox: Tensor,
         target_keypoints: Tensor,
     ) -> tuple[Tensor, dict[str, Tensor]]:
+        self._init_parameters(features)
+
         device = keypoints_raw.device
         target_keypoints = insert_class(target_keypoints, target_boundingbox)
 
         batch_size = class_scores.shape[0]
         n_kpts = (target_keypoints.shape[1] - 2) // 3
 
-        self._init_parameters(features)
-
         pred_bboxes = dist2bbox(distributions, self.anchor_points_strided)
         keypoints_raw = self.dist2kpts_noscale(
             self.anchor_points_strided,
@@ -124,7 +127,7 @@ def forward(
         scaled_raw_keypoints = keypoints_raw.clone()
         scaled_raw_keypoints[..., :2] = scaled_raw_keypoints[
             ..., :2
-        ] * self.stride_tensor.view(1, -1, 1, 1)
+        ] * self.stride_tensor.clone().view(1, -1, 1, 1)
 
         sigmas = self.sigmas.to(device)
 
@@ -190,8 +193,11 @@ def forward(
         regression_loss = (
             ((1 - torch.exp(-e)) * mask).sum(dim=1) / (mask.sum(dim=1) + 1e-9)
         ).mean()
-        visibility_loss = self.b_cross_entropy.forward(
-            keypoints_raw[..., 2], mask
+
+        visibility_loss = F.binary_cross_entropy_with_logits(
+            keypoints_raw[..., 2],
+            mask,
+            pos_weight=self.pos_weight.clone().to(device),
         )
 
         one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[
@@ -264,12 +270,3 @@ def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor:
         adj_kpts[..., 0] += x_adj
         adj_kpts[..., 1] += y_adj
         return adj_kpts
-
-    def _init_parameters(self, features: list[Tensor]) -> None:
-        if hasattr(self, "gt_kpts_scale"):
-            return
-        super()._init_parameters(features)
-        self.gt_kpts_scale = torch.tensor(
-            [self.original_img_size[1], self.original_img_size[0]],
-            device=features[0].device,
-        )
@@ -92,7 +92,7 @@ def forward(self, img1: Tensor, img2: Tensor) -> Tensor:
 
             (_, channel, _, _) = img1.size()
             if channel == self.channel and self.window.dtype == img1.dtype:
-                window = self.window.to(device)
+                window = self.window.to(device).clone()
             else:
                 window = (
                     create_window(self.window_size, channel)

@@ -164,6 +164,12 @@ def compute(
         """
         return super().compute()
 
+    def __eq__(self, other: object) -> bool:
+        return self is other
+
+    def __hash__(self) -> int:
+        return id(self)
+
     @cached_property
     def _signature(self) -> dict[str, Parameter]:
         return get_signature(self.update)

@@ -51,6 +51,9 @@ def compute(self) -> dict[str, Tensor]:
         }
 
     def _update(self, predictions: list[Tensor], targets: Tensor) -> None:
+        if self.confusion_matrix.is_inference():
+            self.confusion_matrix = self.confusion_matrix.clone()
+
         for pred, target in zip(
             predictions,
             instances_from_batch(targets, batch_size=len(predictions)),

@@ -3,8 +3,9 @@
 from inspect import Parameter
 
 import torch.nn.functional as F
+from luxonis_ml.data.utils import ColorMap
 from torch import Tensor
-from typing_extensions import TypeVarTuple, Unpack
+from typing_extensions import TypeVarTuple, Unpack, override
 
 from luxonis_train.attached_modules import BaseAttachedModule
 from luxonis_train.registry import VISUALIZERS
@@ -25,6 +26,13 @@ def __init__(self, *args, scale: float = 1.0, **kwargs) -> None:
         super().__init__(*args, **kwargs)
         self.scale = scale
 
+    @override
+    def __getstate__(self) -> dict:
+        state = super().__getstate__()
+        if "colormap" in state:
+            del state["colormap"]
+        return state
+
     @staticmethod
     def scale_canvas(canvas: Tensor, scale: float = 1.0) -> Tensor:
         return F.interpolate(
@@ -34,6 +42,10 @@ def scale_canvas(canvas: Tensor, scale: float = 1.0) -> Tensor:
             align_corners=False,
         )
 
+    @cached_property
+    def colormap(self) -> ColorMap:
+        return ColorMap()
+
     @abstractmethod
     def forward(
         self,